Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 72 additions & 14 deletions riscv.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <stdio.h>
#include <string.h>

#include "common.h"
#include "device.h"
Expand Down Expand Up @@ -180,11 +181,17 @@ static inline uint32_t read_rs2(const hart_t *vm, uint32_t insn)
return vm->x_regs[decode_rs2(insn)];
}

static inline void icache_invalidate_all(hart_t *vm)
{
memset(&vm->icache, 0, sizeof(vm->icache));
}

/* virtual addressing */

void mmu_invalidate(hart_t *vm)
{
vm->cache_fetch.n_pages = 0xFFFFFFFF;
vm->cache_fetch[0].n_pages = 0xFFFFFFFF;
vm->cache_fetch[1].n_pages = 0xFFFFFFFF;
/* Invalidate all 8 sets × 2 ways for load cache */
for (int set = 0; set < 8; set++) {
for (int way = 0; way < 2; way++)
Expand All @@ -197,6 +204,7 @@ void mmu_invalidate(hart_t *vm)
vm->cache_store[set].ways[way].n_pages = 0xFFFFFFFF;
vm->cache_store[set].lru = 0; /* Reset LRU to way 0 */
}
icache_invalidate_all(vm);
}

/* Invalidate MMU caches for a specific virtual address range.
Expand Down Expand Up @@ -227,9 +235,11 @@ void mmu_invalidate_range(hart_t *vm, uint32_t start_addr, uint32_t size)
uint32_t end_vpn = (uint32_t) end_addr >> RV_PAGE_SHIFT;

/* Cache invalidation for fetch cache */
if (vm->cache_fetch.n_pages >= start_vpn &&
vm->cache_fetch.n_pages <= end_vpn)
vm->cache_fetch.n_pages = 0xFFFFFFFF;
for (int i = 0; i < 2; i++) {
if (vm->cache_fetch[i].n_pages >= start_vpn &&
vm->cache_fetch[i].n_pages <= end_vpn)
vm->cache_fetch[i].n_pages = 0xFFFFFFFF;
}

/* Invalidate load cache: 8 sets × 2 ways */
for (int set = 0; set < 8; set++) {
Expand Down Expand Up @@ -361,11 +371,47 @@ static void mmu_fence(hart_t *vm, uint32_t insn UNUSED)

static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
{
uint32_t vpn = addr >> RV_PAGE_SHIFT;
if (unlikely(vpn != vm->cache_fetch.n_pages)) {
/* cache hit */
uint32_t idx = (addr >> ICACHE_OFFSET_BITS) & ICACHE_INDEX_MASK;
uint32_t tag = addr >> (ICACHE_OFFSET_BITS + ICACHE_INDEX_BITS);
icache_block_t *blk = &vm->icache.i_block[idx];

if (likely(blk->valid && blk->tag == tag)) {
#ifdef MMU_CACHE_STATS
vm->cache_fetch.hits++;
#endif
uint32_t ofs = addr & ICACHE_BLOCK_MASK;
*value = *(const uint32_t *) (blk->base + ofs);
return;
}

/* search the victim cache */
uint32_t vcache_key = addr >> ICACHE_OFFSET_BITS;
for (int i = 0; i < VCACHE_BLOCKS; i++) {
victim_cache_block_t *vblk = &vm->icache.v_block[i];

/* victim cache hit, swap blocks */
if (vblk->valid && vblk->tag == vcache_key) {
icache_block_t tmp = *blk;
*blk = *vblk;
*vblk = tmp;
blk->tag = tag;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code looks suspicious to me.

When you move the evicted I-cache block (tmp) back into the victim cache, you are setting the vblk->tag to tmp.tag, which is the 16-bit I-cache tag.

Won't this corrupts the victim cache entry? The VC search logic requires a 24-bit tag ([ICache Tag | ICache Index]) to function. Because you're only storing the 16-bit tag, this VCache entry will never be hit again.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Won't this corrupts the victim cache entry? The VC search logic requires a 24-bit tag ([ICache Tag | ICache Index]) to function. Because you're only storing the 16-bit tag, this VCache entry will never be hit again.

Thank you for pointing that out. I’ve added the following expressions to ensure correctness :

+   vblk->tag = (tmp.tag << ICACHE_INDEX_BITS) | idx;

vblk->tag = (tmp.tag << ICACHE_INDEX_BITS) | idx;

uint32_t ofs = addr & ICACHE_BLOCK_MASK;
*value = *(const uint32_t *) (blk->base + ofs);
return;
}
}

#ifdef MMU_CACHE_STATS
vm->cache_fetch.misses++;
vm->cache_fetch.misses++;
#endif

/* icache miss, Continue using the original va->pa*/
uint32_t vpn = addr >> RV_PAGE_SHIFT;
uint32_t index = __builtin_parity(vpn) & 0x1;
if (unlikely(vpn != vm->cache_fetch[index].n_pages)) {
mmu_translate(vm, &addr, (1 << 3), (1 << 6), false, RV_EXC_FETCH_FAULT,
RV_EXC_FETCH_PFAULT);
if (vm->error)
Expand All @@ -374,15 +420,27 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value)
vm->mem_fetch(vm, addr >> RV_PAGE_SHIFT, &page_addr);
if (vm->error)
return;
vm->cache_fetch.n_pages = vpn;
vm->cache_fetch.page_addr = page_addr;
vm->cache_fetch[index].n_pages = vpn;
vm->cache_fetch[index].page_addr = page_addr;
}
#ifdef MMU_CACHE_STATS
else {
vm->cache_fetch.hits++;

*value =
vm->cache_fetch[index].page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)];

/* Move the current icache block into the victim cache before replacement */
if (blk->valid) {
victim_cache_block_t *vblk = &vm->icache.v_block[vm->icache.v_next];
*vblk = *blk;
vblk->tag = (blk->tag << ICACHE_INDEX_BITS) | idx;
vblk->valid = true;
vm->icache.v_next = (vm->icache.v_next + 1) % VCACHE_BLOCKS;
}
#endif
*value = vm->cache_fetch.page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)];

/* fill into the icache */
uint32_t block_off = (addr & RV_PAGE_MASK) & ~ICACHE_BLOCK_MASK;
blk->base = (const uint8_t *) vm->cache_fetch[index].page_addr + block_off;
blk->tag = tag;
blk->valid = true;
}

static void mmu_load(hart_t *vm,
Expand Down
51 changes: 50 additions & 1 deletion riscv.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,55 @@ typedef struct {
typedef struct __hart_internal hart_t;
typedef struct __vm_internel vm_t;

/* ICACHE_BLOCKS_SIZE: Size of one instruction-cache block (line).
* ICACHE_BLOCKS: Number of blocks (lines) in the instruction cache.
*
* The cache address is decomposed into [ tag | index | offset ] fields:
* - block-offset bits = log2(ICACHE_BLOCKS_SIZE)
* - index bits = log2(ICACHE_BLOCKS)
*
* For power-of-two values, log2(x) equals the number of trailing zero bits in
* x. Therefore, we use __builtin_ctz(x) (count trailing zeros) to compute these
Copy link
Collaborator

@jserv jserv Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not worthy to invoke __builtin_ctz for constants.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason it was designed this way is because I initially needed to experiment with various cache size combinations.
Do you mean it's better to just directly fill in the log2 value?

-   #define ICACHE_OFFSET_BITS (__builtin_ctz((ICACHE_BLOCKS_SIZE)))
-   #define ICACHE_INDEX_BITS (__builtin_ctz((ICACHE_BLOCKS)))
+   #define ICACHE_OFFSET_BITS  8
+   #define ICACHE_INDEX_BITS 8

* log2 values at compile time.
*/
#define ICACHE_BLOCKS_SIZE 256
#define ICACHE_BLOCKS 256
#define ICACHE_OFFSET_BITS (__builtin_ctz((ICACHE_BLOCKS_SIZE)))
#define ICACHE_INDEX_BITS (__builtin_ctz((ICACHE_BLOCKS)))

/* Define the victim cache.
*
* The block size of the victim cache is identical to that of the primary
* instruction cache (IC), ensuring full block compatibility.
* However, the number of blocks is smaller, allowing the VC to store
* a few recently evicted cache lines to reduce conflict misses.
*/
#define VCACHE_BLOCK_SIZE ICACHE_BLOCKS_SIZE
#define VCACHE_BLOCKS 16

/* For power-of-two sizes, (size - 1) sets all low bits to 1,
* allowing fast extraction of an address.
*/
#define ICACHE_INDEX_MASK (ICACHE_BLOCKS - 1)
#define ICACHE_BLOCK_MASK (ICACHE_BLOCKS_SIZE - 1)
#define RV_PAGE_MASK (RV_PAGE_SIZE - 1)

typedef struct {
uint32_t tag;
const uint8_t *base;
bool valid;
} icache_block_t;

typedef icache_block_t victim_cache_block_t;

typedef struct {
icache_block_t i_block[ICACHE_BLOCKS];
victim_cache_block_t v_block[VCACHE_BLOCKS];
uint32_t v_next;
} icache_t;

struct __hart_internal {
icache_t icache;
uint32_t x_regs[32];

/* LR reservation virtual address. last bit is 1 if valid */
Expand Down Expand Up @@ -106,7 +154,8 @@ struct __hart_internal {
*/
uint32_t exc_cause, exc_val;

mmu_fetch_cache_t cache_fetch;
/* 2-entry direct-mapped with hash-based indexing */
mmu_fetch_cache_t cache_fetch[2];
/* 8-set × 2-way set-associative cache with 3-bit parity hash indexing */
mmu_cache_set_t cache_load[8];
/* 8-set × 2-way set-associative cache for store operations */
Expand Down