From a2c1bd003b987fde1694dd04844c0ed0845a6820 Mon Sep 17 00:00:00 2001 From: Po-Ju Chen Date: Thu, 30 Oct 2025 02:39:25 +0800 Subject: [PATCH 1/3] Implement direct-mapped instruction cache Extend the existing architecture to include a direct-mapped instruction cache that stores recently fetched instructions. Add related constants and macros for cache size and address fields. --- riscv.c | 41 +++++++++++++++++++++++++++++++++-------- riscv.h | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 8 deletions(-) diff --git a/riscv.c b/riscv.c index 3e00751..1af0a64 100644 --- a/riscv.c +++ b/riscv.c @@ -1,4 +1,5 @@ #include +#include #include "common.h" #include "device.h" @@ -180,6 +181,11 @@ static inline uint32_t read_rs2(const hart_t *vm, uint32_t insn) return vm->x_regs[decode_rs2(insn)]; } +static inline void icache_invalidate_all(hart_t *vm) +{ + memset(&vm->icache, 0, sizeof(vm->icache)); +} + /* virtual addressing */ void mmu_invalidate(hart_t *vm) @@ -197,6 +203,7 @@ void mmu_invalidate(hart_t *vm) vm->cache_store[set].ways[way].n_pages = 0xFFFFFFFF; vm->cache_store[set].lru = 0; /* Reset LRU to way 0 */ } + icache_invalidate_all(vm); } /* Invalidate MMU caches for a specific virtual address range. @@ -361,11 +368,27 @@ static void mmu_fence(hart_t *vm, uint32_t insn UNUSED) static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value) { - uint32_t vpn = addr >> RV_PAGE_SHIFT; - if (unlikely(vpn != vm->cache_fetch.n_pages)) { + /* cache hit */ + uint32_t idx = (addr >> ICACHE_OFFSET_BITS) & ICACHE_INDEX_MASK; + uint32_t tag = addr >> (ICACHE_OFFSET_BITS + ICACHE_INDEX_BITS); + icache_block_t *blk = &vm->icache.block[idx]; + + if (likely(blk->valid && blk->tag == tag)) { #ifdef MMU_CACHE_STATS - vm->cache_fetch.misses++; + vm->cache_fetch.hits++; #endif + uint32_t ofs = addr & ICACHE_BLOCK_MASK; + *value = *(const uint32_t *) (blk->base + ofs); + return; + } + +#ifdef MMU_CACHE_STATS + vm->cache_fetch.misses++; +#endif + + /* cache miss, Continue using the original va->pa*/ + uint32_t vpn = addr >> RV_PAGE_SHIFT; + if (unlikely(vpn != vm->cache_fetch.n_pages)) { mmu_translate(vm, &addr, (1 << 3), (1 << 6), false, RV_EXC_FETCH_FAULT, RV_EXC_FETCH_PFAULT); if (vm->error) @@ -377,12 +400,14 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value) vm->cache_fetch.n_pages = vpn; vm->cache_fetch.page_addr = page_addr; } -#ifdef MMU_CACHE_STATS - else { - vm->cache_fetch.hits++; - } -#endif + *value = vm->cache_fetch.page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)]; + + /* fill into the cache */ + uint32_t block_off = (addr & RV_PAGE_MASK) & ~ICACHE_BLOCK_MASK; + blk->base = (const uint8_t *) vm->cache_fetch.page_addr + block_off; + blk->tag = tag; + blk->valid = true; } static void mmu_load(hart_t *vm, diff --git a/riscv.h b/riscv.h index 86619a1..1872143 100644 --- a/riscv.h +++ b/riscv.h @@ -75,7 +75,41 @@ typedef struct { typedef struct __hart_internal hart_t; typedef struct __vm_internel vm_t; +/* ICACHE_BLOCKS_SIZE: Size of one instruction-cache block (line). + * ICACHE_BLOCKS: Number of blocks (lines) in the instruction cache. + * + * The cache address is decomposed into [ tag | index | offset ] fields: + * - block-offset bits = log2(ICACHE_BLOCKS_SIZE) + * - index bits = log2(ICACHE_BLOCKS) + * + * For power-of-two values, log2(x) equals the number of trailing zero bits in + * x. Therefore, we use __builtin_ctz(x) (count trailing zeros) to compute these + * log2 values at compile time. + */ +#define ICACHE_BLOCKS_SIZE 256 +#define ICACHE_BLOCKS 256 +#define ICACHE_OFFSET_BITS (__builtin_ctz((ICACHE_BLOCKS_SIZE))) +#define ICACHE_INDEX_BITS (__builtin_ctz((ICACHE_BLOCKS))) + +/* For power-of-two sizes, (size - 1) sets all low bits to 1, + * allowing fast extraction of an address. + */ +#define ICACHE_INDEX_MASK (ICACHE_BLOCKS - 1) +#define ICACHE_BLOCK_MASK (ICACHE_BLOCKS_SIZE - 1) +#define RV_PAGE_MASK (RV_PAGE_SIZE - 1) + +typedef struct { + uint32_t tag; + const uint8_t *base; + bool valid; +} icache_block_t; + +typedef struct { + icache_block_t block[ICACHE_BLOCKS]; +} icache_t; + struct __hart_internal { + icache_t icache; uint32_t x_regs[32]; /* LR reservation virtual address. last bit is 1 if valid */ From 4404e48784ef6ed7c5cf2ae9a8c6cfa278735902 Mon Sep 17 00:00:00 2001 From: Po-Ju Chen Date: Sat, 1 Nov 2025 02:21:19 +0800 Subject: [PATCH 2/3] Adopt 2-entry direct-mapped page cache Replace the previous 1-entry direct-mapped design with a 2-entry direct-mapped cache using hash-based indexing (same parity hash as cache_load). This allows two hot virtual pages to coexist without thrashing. Measurement shows that the number of virtual-to-physical translations during instruction fetch (mmu_translate() calls) decreased by ~10%. --- riscv.c | 23 ++++++++++++++--------- riscv.h | 3 ++- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/riscv.c b/riscv.c index 1af0a64..d22d2e7 100644 --- a/riscv.c +++ b/riscv.c @@ -190,7 +190,8 @@ static inline void icache_invalidate_all(hart_t *vm) void mmu_invalidate(hart_t *vm) { - vm->cache_fetch.n_pages = 0xFFFFFFFF; + vm->cache_fetch[0].n_pages = 0xFFFFFFFF; + vm->cache_fetch[1].n_pages = 0xFFFFFFFF; /* Invalidate all 8 sets × 2 ways for load cache */ for (int set = 0; set < 8; set++) { for (int way = 0; way < 2; way++) @@ -234,9 +235,11 @@ void mmu_invalidate_range(hart_t *vm, uint32_t start_addr, uint32_t size) uint32_t end_vpn = (uint32_t) end_addr >> RV_PAGE_SHIFT; /* Cache invalidation for fetch cache */ - if (vm->cache_fetch.n_pages >= start_vpn && - vm->cache_fetch.n_pages <= end_vpn) - vm->cache_fetch.n_pages = 0xFFFFFFFF; + for (int i = 0; i < 2; i++) { + if (vm->cache_fetch[i].n_pages >= start_vpn && + vm->cache_fetch[i].n_pages <= end_vpn) + vm->cache_fetch[i].n_pages = 0xFFFFFFFF; + } /* Invalidate load cache: 8 sets × 2 ways */ for (int set = 0; set < 8; set++) { @@ -388,7 +391,8 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value) /* cache miss, Continue using the original va->pa*/ uint32_t vpn = addr >> RV_PAGE_SHIFT; - if (unlikely(vpn != vm->cache_fetch.n_pages)) { + uint32_t index = __builtin_parity(vpn) & 0x1; + if (unlikely(vpn != vm->cache_fetch[index].n_pages)) { mmu_translate(vm, &addr, (1 << 3), (1 << 6), false, RV_EXC_FETCH_FAULT, RV_EXC_FETCH_PFAULT); if (vm->error) @@ -397,15 +401,16 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value) vm->mem_fetch(vm, addr >> RV_PAGE_SHIFT, &page_addr); if (vm->error) return; - vm->cache_fetch.n_pages = vpn; - vm->cache_fetch.page_addr = page_addr; + vm->cache_fetch[index].n_pages = vpn; + vm->cache_fetch[index].page_addr = page_addr; } - *value = vm->cache_fetch.page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)]; + *value = + vm->cache_fetch[index].page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)]; /* fill into the cache */ uint32_t block_off = (addr & RV_PAGE_MASK) & ~ICACHE_BLOCK_MASK; - blk->base = (const uint8_t *) vm->cache_fetch.page_addr + block_off; + blk->base = (const uint8_t *) vm->cache_fetch[index].page_addr + block_off; blk->tag = tag; blk->valid = true; } diff --git a/riscv.h b/riscv.h index 1872143..faff4c1 100644 --- a/riscv.h +++ b/riscv.h @@ -140,7 +140,8 @@ struct __hart_internal { */ uint32_t exc_cause, exc_val; - mmu_fetch_cache_t cache_fetch; + /* 2-entry direct-mapped with hash-based indexing */ + mmu_fetch_cache_t cache_fetch[2]; /* 8-set × 2-way set-associative cache with 3-bit parity hash indexing */ mmu_cache_set_t cache_load[8]; /* 8-set × 2-way set-associative cache for store operations */ From f270dca500dd6b866c083f2d21298969bb308338 Mon Sep 17 00:00:00 2001 From: Po-Ju Chen Date: Sun, 2 Nov 2025 16:56:01 +0800 Subject: [PATCH 3/3] Add victim cache for I-cache Introduce a small victim cache to reduce conflict misses in the direct-mapped instruction cache. On an I-cache miss, probe the victim cache; on hit, swap the victim block with the current I-cache block and return the data. Measurement shows that the number of virtual-to-physical translations during instruction fetch (mmu_translate() calls) decreased by ~8%. --- riscv.c | 34 +++++++++++++++++++++++++++++++--- riscv.h | 16 +++++++++++++++- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/riscv.c b/riscv.c index d22d2e7..910595c 100644 --- a/riscv.c +++ b/riscv.c @@ -374,7 +374,7 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value) /* cache hit */ uint32_t idx = (addr >> ICACHE_OFFSET_BITS) & ICACHE_INDEX_MASK; uint32_t tag = addr >> (ICACHE_OFFSET_BITS + ICACHE_INDEX_BITS); - icache_block_t *blk = &vm->icache.block[idx]; + icache_block_t *blk = &vm->icache.i_block[idx]; if (likely(blk->valid && blk->tag == tag)) { #ifdef MMU_CACHE_STATS @@ -385,11 +385,30 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value) return; } + /* search the victim cache */ + uint32_t vcache_key = addr >> ICACHE_OFFSET_BITS; + for (int i = 0; i < VCACHE_BLOCKS; i++) { + victim_cache_block_t *vblk = &vm->icache.v_block[i]; + + /* victim cache hit, swap blocks */ + if (vblk->valid && vblk->tag == vcache_key) { + icache_block_t tmp = *blk; + *blk = *vblk; + *vblk = tmp; + blk->tag = tag; + vblk->tag = (tmp.tag << ICACHE_INDEX_BITS) | idx; + + uint32_t ofs = addr & ICACHE_BLOCK_MASK; + *value = *(const uint32_t *) (blk->base + ofs); + return; + } + } + #ifdef MMU_CACHE_STATS vm->cache_fetch.misses++; #endif - /* cache miss, Continue using the original va->pa*/ + /* icache miss, Continue using the original va->pa*/ uint32_t vpn = addr >> RV_PAGE_SHIFT; uint32_t index = __builtin_parity(vpn) & 0x1; if (unlikely(vpn != vm->cache_fetch[index].n_pages)) { @@ -408,7 +427,16 @@ static void mmu_fetch(hart_t *vm, uint32_t addr, uint32_t *value) *value = vm->cache_fetch[index].page_addr[(addr >> 2) & MASK(RV_PAGE_SHIFT - 2)]; - /* fill into the cache */ + /* Move the current icache block into the victim cache before replacement */ + if (blk->valid) { + victim_cache_block_t *vblk = &vm->icache.v_block[vm->icache.v_next]; + *vblk = *blk; + vblk->tag = (blk->tag << ICACHE_INDEX_BITS) | idx; + vblk->valid = true; + vm->icache.v_next = (vm->icache.v_next + 1) % VCACHE_BLOCKS; + } + + /* fill into the icache */ uint32_t block_off = (addr & RV_PAGE_MASK) & ~ICACHE_BLOCK_MASK; blk->base = (const uint8_t *) vm->cache_fetch[index].page_addr + block_off; blk->tag = tag; diff --git a/riscv.h b/riscv.h index faff4c1..26da457 100644 --- a/riscv.h +++ b/riscv.h @@ -91,6 +91,16 @@ typedef struct __vm_internel vm_t; #define ICACHE_OFFSET_BITS (__builtin_ctz((ICACHE_BLOCKS_SIZE))) #define ICACHE_INDEX_BITS (__builtin_ctz((ICACHE_BLOCKS))) +/* Define the victim cache. + * + * The block size of the victim cache is identical to that of the primary + * instruction cache (IC), ensuring full block compatibility. + * However, the number of blocks is smaller, allowing the VC to store + * a few recently evicted cache lines to reduce conflict misses. + */ +#define VCACHE_BLOCK_SIZE ICACHE_BLOCKS_SIZE +#define VCACHE_BLOCKS 16 + /* For power-of-two sizes, (size - 1) sets all low bits to 1, * allowing fast extraction of an address. */ @@ -104,8 +114,12 @@ typedef struct { bool valid; } icache_block_t; +typedef icache_block_t victim_cache_block_t; + typedef struct { - icache_block_t block[ICACHE_BLOCKS]; + icache_block_t i_block[ICACHE_BLOCKS]; + victim_cache_block_t v_block[VCACHE_BLOCKS]; + uint32_t v_next; } icache_t; struct __hart_internal {