From a5ddb51a3b3320b6107f38872af335f4d11628d5 Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Sun, 16 May 2021 00:53:15 +0200 Subject: [PATCH 01/14] ppcmmu: initial TLB implementation for reads. --- cpu/ppc/ppcexceptions.cpp | 3 + cpu/ppc/ppcmmu.cpp | 351 ++++++++++++++++++++++++++++++++++++++ cpu/ppc/ppcmmu.h | 15 ++ cpu/ppc/ppcopcodes.cpp | 3 + 4 files changed, 372 insertions(+) diff --git a/cpu/ppc/ppcexceptions.cpp b/cpu/ppc/ppcexceptions.cpp index 73bf1ff..015da7f 100644 --- a/cpu/ppc/ppcexceptions.cpp +++ b/cpu/ppc/ppcexceptions.cpp @@ -22,6 +22,7 @@ along with this program. If not, see . /** @file Handling of low-level PPC exceptions. */ #include "ppcemu.h" +#include "ppcmmu.h" #include #include #include @@ -109,6 +110,8 @@ jmp_buf exc_env; /* Global exception environment. */ ppc_next_instruction_address |= 0xFFF00000; } + mmu_change_mode(); + longjmp(exc_env, 2); /* return to the main execution loop. */ } diff --git a/cpu/ppc/ppcmmu.cpp b/cpu/ppc/ppcmmu.cpp index 18ccb50..a6a43eb 100644 --- a/cpu/ppc/ppcmmu.cpp +++ b/cpu/ppc/ppcmmu.cpp @@ -135,6 +135,13 @@ public: }; #endif +/** Temporary TLB test variables. */ +bool MemAccessType; // true - memory, false - I/O +uint64_t MemAddr = 0; +MMIODevice *Device = 0; +uint32_t DevOffset = 0; + + /** remember recently used physical memory regions for quicker translation. */ AddressMapEntry last_read_area = {0xFFFFFFFF, 0xFFFFFFFF}; AddressMapEntry last_write_area = {0xFFFFFFFF, 0xFFFFFFFF}; @@ -160,6 +167,17 @@ static inline T read_phys_mem(AddressMapEntry *mru_rgn, uint32_t addr) #ifdef MMU_PROFILING dmem_reads_total++; #endif + + if (!MemAccessType) { + LOG_F(ERROR, "TLB real memory access expected!"); + } + + if ((mru_rgn->mem_ptr + (addr - mru_rgn->start)) != (uint8_t *)MemAddr) { + LOG_F(ERROR, "TLB address mismatch! Expected: 0x%llu, got: 0x%llu", + (uint64_t)(mru_rgn->mem_ptr + (addr - mru_rgn->start)), + (uint64_t)MemAddr); + } + switch(sizeof(T)) { case 1: return *(mru_rgn->mem_ptr + (addr - mru_rgn->start)); @@ -187,6 +205,15 @@ static inline T read_phys_mem(AddressMapEntry *mru_rgn, uint32_t addr) #ifdef MMU_PROFILING iomem_reads_total++; #endif + if (MemAccessType) { + LOG_F(ERROR, "TLB I/O memory access expected!"); + } + + if (mru_rgn->devobj != Device || (addr - mru_rgn->start) != DevOffset) { + LOG_F(ERROR, "TLB MMIO access mismatch! Expected: 0x%X, got: 0x%X", + addr - mru_rgn->start, DevOffset); + } + return (mru_rgn->devobj->read(mru_rgn->start, addr - mru_rgn->start, sizeof(T))); } else { @@ -311,6 +338,43 @@ void dbat_update(uint32_t bat_reg) { } } +/** PowerPC-style block address translation. */ +template +static BATResult ppc_block_address_translation(uint32_t la) +{ + uint32_t pa; // translated physical address + uint8_t prot; // protection bits for the translated address + PPC_BAT_entry *bat_array; + + bool bat_hit = false; + unsigned msr_pr = !!(ppc_state.msr & 0x4000); + + bat_array = (type == BATType::Instruction) ? ibat_array : dbat_array; + + // Format: %XY + // X - supervisor access bit, Y - problem/user access bit + // Those bits are mutually exclusive + unsigned access_bits = ((msr_pr ^ 1) << 1) | msr_pr; + + for (int bat_index = 0; bat_index < 4; bat_index++) { + PPC_BAT_entry* bat_entry = &bat_array[bat_index]; + + if ((bat_entry->access & access_bits) && ((la & bat_entry->hi_mask) == bat_entry->bepi)) { + bat_hit = true; + +#ifdef MMU_PROFILING + bat_transl_total++; +#endif + // logical to physical translation + pa = bat_entry->phys_hi | (la & ~bat_entry->hi_mask); + prot = bat_entry->prot; + break; + } + } + + return BATResult{bat_hit, prot, pa}; +} + static inline uint8_t* calc_pteg_addr(uint32_t hash) { uint32_t sdr1_val, pteg_addr; @@ -672,8 +736,244 @@ static uint32_t mem_grab_unaligned(uint32_t addr, uint32_t size) { return ret; } +#define PAGE_SIZE_BITS 12 +#define TLB_SIZE 4096 +#define TLB2_WAYS 4 +#define TLB_INVALID_TAG 0xFFFFFFFF + +typedef struct TLBEntry { + uint32_t tag; + uint16_t flags; + uint16_t lru_bits; + union { + int64_t host_va_offset; + AddressMapEntry* reg_desc; + }; +} TLBEntry; + +// primary TLB for all MMU modes +static std::array mode1_tlb1; +static std::array mode2_tlb1; +static std::array mode3_tlb1; + +// secondary TLB for all MMU modes +static std::array mode1_tlb2; +static std::array mode2_tlb2; +static std::array mode3_tlb2; + +TLBEntry *pCurTLB1; // current primary TLB +TLBEntry *pCurTLB2; // current secondary TLB + +uint32_t tlb_size_mask = TLB_SIZE - 1; + +// fake TLB entry for handling of unmapped memory accesses +uint64_t UnmappedVal = -1ULL; +TLBEntry UnmappedMem = {TLB_INVALID_TAG, 0, 0, 0}; + +uint8_t MMUMode = {0xFF}; + +void mmu_change_mode() +{ + uint8_t mmu_mode = ((ppc_state.msr >> 3) & 0x2) | ((ppc_state.msr >> 14) & 1); + + if (MMUMode != mmu_mode) { + switch(mmu_mode) { + case 0: // real address mode + pCurTLB1 = &mode1_tlb1[0]; + pCurTLB2 = &mode1_tlb2[0]; + break; + case 2: // supervisor mode with data translation enabled + pCurTLB1 = &mode2_tlb1[0]; + pCurTLB2 = &mode2_tlb2[0]; + break; + case 3: // user mode with data translation enabled + pCurTLB1 = &mode3_tlb1[0]; + pCurTLB2 = &mode3_tlb2[0]; + break; + } + MMUMode = mmu_mode; + } +} + +static TLBEntry* tlb2_target_entry(uint32_t gp_va) +{ + TLBEntry *tlb_entry; + + tlb_entry = &pCurTLB2[((gp_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; + + // select the target from invalid blocks first + if (tlb_entry[0].tag == TLB_INVALID_TAG) { + // update LRU bits + tlb_entry[0].lru_bits = 0x3; + tlb_entry[1].lru_bits = 0x2; + tlb_entry[2].lru_bits &= 0x1; + tlb_entry[3].lru_bits &= 0x1; + return tlb_entry; + } else if (tlb_entry[1].tag == TLB_INVALID_TAG) { + // update LRU bits + tlb_entry[0].lru_bits = 0x2; + tlb_entry[1].lru_bits = 0x3; + tlb_entry[2].lru_bits &= 0x1; + tlb_entry[3].lru_bits &= 0x1; + return &tlb_entry[1]; + } else if (tlb_entry[2].tag == TLB_INVALID_TAG) { + // update LRU bits + tlb_entry[0].lru_bits &= 0x1; + tlb_entry[1].lru_bits &= 0x1; + tlb_entry[2].lru_bits = 0x3; + tlb_entry[3].lru_bits = 0x2; + return &tlb_entry[2]; + } else if (tlb_entry[3].tag == TLB_INVALID_TAG) { + // update LRU bits + tlb_entry[0].lru_bits &= 0x1; + tlb_entry[1].lru_bits &= 0x1; + tlb_entry[2].lru_bits = 0x2; + tlb_entry[3].lru_bits = 0x3; + return &tlb_entry[3]; + } else { // no invalid blocks, replace an existing one according with the hLRU policy + if (tlb_entry[0].lru_bits == 0) { + // update LRU bits + tlb_entry[0].lru_bits = 0x3; + tlb_entry[1].lru_bits = 0x2; + tlb_entry[2].lru_bits &= 0x1; + tlb_entry[3].lru_bits &= 0x1; + return tlb_entry; + } else if (tlb_entry[1].lru_bits == 0) { + // update LRU bits + tlb_entry[0].lru_bits = 0x2; + tlb_entry[1].lru_bits = 0x3; + tlb_entry[2].lru_bits &= 0x1; + tlb_entry[3].lru_bits &= 0x1; + return &tlb_entry[1]; + } else if (tlb_entry[2].lru_bits == 0) { + // update LRU bits + tlb_entry[0].lru_bits &= 0x1; + tlb_entry[1].lru_bits &= 0x1; + tlb_entry[2].lru_bits = 0x3; + tlb_entry[3].lru_bits = 0x2; + return &tlb_entry[2]; + } else { + // update LRU bits + tlb_entry[0].lru_bits &= 0x1; + tlb_entry[1].lru_bits &= 0x1; + tlb_entry[2].lru_bits = 0x2; + tlb_entry[3].lru_bits = 0x3; + return &tlb_entry[3]; + } + } +} + +static TLBEntry* tlb2_refill(uint32_t guest_va, int is_write) +{ + uint32_t phys_addr; + TLBEntry *tlb_entry; + + const uint32_t tag = guest_va & ~0xFFFUL; + + // attempt block address translation first + BATResult bat_res = ppc_block_address_translation(guest_va); + if (bat_res.hit) { + // check block protection + if (!bat_res.prot || ((bat_res.prot & 1) && is_write)) { + ppc_state.spr[SPR::DSISR] = 0x08000000 | (is_write << 25); + ppc_state.spr[SPR::DAR] = guest_va; + mmu_exception_handler(Except_Type::EXC_DSI, 0); + } + phys_addr = bat_res.phys; + } else { + // page address translation + phys_addr = page_address_translate(guest_va, false, !!(ppc_state.msr & 0x4000), is_write); + } + + // look up host virtual address + AddressMapEntry* reg_desc = mem_ctrl_instance->find_range(phys_addr); + if (reg_desc) { + // refill the secondary TLB + tlb_entry = tlb2_target_entry(tag); + tlb_entry->tag = tag; + if (reg_desc->type & RT_MMIO) { + tlb_entry->flags = 2; // MMIO region + tlb_entry->reg_desc = reg_desc; + } else { + tlb_entry->flags = 1; // memory region backed by host memory + tlb_entry->host_va_offset = (int64_t)reg_desc->mem_ptr - reg_desc->start; + } + return tlb_entry; + } else { + LOG_F(ERROR, "Read from unmapped memory at 0x%08X!\n", phys_addr); + UnmappedMem.tag = tag; + UnmappedMem.host_va_offset = (int64_t)(&UnmappedVal) - guest_va; + return &UnmappedMem; + } +} + +static inline uint64_t tlb_translate_addr(uint32_t guest_va) +{ + TLBEntry *tlb1_entry, *tlb2_entry; + + const uint32_t tag = guest_va & ~0xFFFUL; + + // look up address in the primary TLB + tlb1_entry = &pCurTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; + if (tlb1_entry->tag == tag) { // primary TLB hit -> fast path + MemAccessType = true; + MemAddr = tlb1_entry->host_va_offset + guest_va; + return tlb1_entry->host_va_offset + guest_va; + } else { // primary TLB miss -> look up address in the secondary TLB + tlb2_entry = &pCurTLB2[((guest_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; + if (tlb2_entry->tag == tag) { + // update LRU bits + tlb2_entry[0].lru_bits = 0x3; + tlb2_entry[1].lru_bits = 0x2; + tlb2_entry[2].lru_bits &= 0x1; + tlb2_entry[3].lru_bits &= 0x1; + } else if (tlb2_entry[1].tag == tag) { + tlb2_entry = &tlb2_entry[1]; + // update LRU bits + tlb2_entry[0].lru_bits = 0x2; + tlb2_entry[1].lru_bits = 0x3; + tlb2_entry[2].lru_bits &= 0x1; + tlb2_entry[3].lru_bits &= 0x1; + } else if (tlb2_entry[2].tag == tag) { + tlb2_entry = &tlb2_entry[2]; + // update LRU bits + tlb2_entry[0].lru_bits &= 0x1; + tlb2_entry[1].lru_bits &= 0x1; + tlb2_entry[2].lru_bits = 0x3; + tlb2_entry[3].lru_bits = 0x2; + } else if (tlb2_entry[3].tag == tag) { + tlb2_entry = &tlb2_entry[3]; + // update LRU bits + tlb2_entry[0].lru_bits &= 0x1; + tlb2_entry[1].lru_bits &= 0x1; + tlb2_entry[2].lru_bits = 0x2; + tlb2_entry[3].lru_bits = 0x3; + } else { // secondary TLB miss -> + // perform full address translation and refill the secondary TLB + tlb2_entry = tlb2_refill(guest_va, 0); + } + + if (tlb2_entry->flags & 1) { // is it a real memory region? + // refill the primary TLB + tlb1_entry->tag = tag; + tlb1_entry->flags = 1; + tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; + MemAccessType = true; + MemAddr = tlb1_entry->host_va_offset + guest_va; + return tlb1_entry->host_va_offset + guest_va; + } else { // an attempt to access a memory-mapped device + MemAccessType = false; + Device = tlb2_entry->reg_desc->devobj; + DevOffset = guest_va - tlb2_entry->reg_desc->start; + return guest_va - tlb2_entry->reg_desc->start; + } + } +} + /** Grab a value from memory into a register */ uint8_t mem_grab_byte(uint32_t addr) { + tlb_translate_addr(addr); + /* data address translation if enabled */ if (ppc_state.msr & 0x10) { addr = ppc_mmu_addr_translate(addr, 0); @@ -683,6 +983,8 @@ uint8_t mem_grab_byte(uint32_t addr) { } uint16_t mem_grab_word(uint32_t addr) { + tlb_translate_addr(addr); + if (addr & 1) { return mem_grab_unaligned(addr, 2); } @@ -696,6 +998,8 @@ uint16_t mem_grab_word(uint32_t addr) { } uint32_t mem_grab_dword(uint32_t addr) { + tlb_translate_addr(addr); + if (addr & 3) { return mem_grab_unaligned(addr, 4); } @@ -709,6 +1013,8 @@ uint32_t mem_grab_dword(uint32_t addr) { } uint64_t mem_grab_qword(uint32_t addr) { + tlb_translate_addr(addr); + if (addr & 7) { LOG_F(ERROR, "SOS! Attempt to read unaligned QWORD at 0x%08X\n", addr); exit(-1); // FIXME! @@ -801,6 +1107,51 @@ uint64_t mem_read_dbg(uint32_t virt_addr, uint32_t size) { void ppc_mmu_init() { mmu_exception_handler = ppc_exception_handler; + // invalidate all TLB entries + for(auto &tlb_el : mode1_tlb1) { + tlb_el.tag = TLB_INVALID_TAG; + tlb_el.flags = 0; + tlb_el.lru_bits = 0; + tlb_el.host_va_offset = 0; + } + + for(auto &tlb_el : mode2_tlb1) { + tlb_el.tag = TLB_INVALID_TAG; + tlb_el.flags = 0; + tlb_el.lru_bits = 0; + tlb_el.host_va_offset = 0; + } + + for(auto &tlb_el : mode3_tlb1) { + tlb_el.tag = TLB_INVALID_TAG; + tlb_el.flags = 0; + tlb_el.lru_bits = 0; + tlb_el.host_va_offset = 0; + } + + for(auto &tlb_el : mode1_tlb2) { + tlb_el.tag = TLB_INVALID_TAG; + tlb_el.flags = 0; + tlb_el.lru_bits = 0; + tlb_el.host_va_offset = 0; + } + + for(auto &tlb_el : mode2_tlb2) { + tlb_el.tag = TLB_INVALID_TAG; + tlb_el.flags = 0; + tlb_el.lru_bits = 0; + tlb_el.host_va_offset = 0; + } + + for(auto &tlb_el : mode3_tlb2) { + tlb_el.tag = TLB_INVALID_TAG; + tlb_el.flags = 0; + tlb_el.lru_bits = 0; + tlb_el.host_va_offset = 0; + } + + mmu_change_mode(); + #ifdef MMU_PROFILING gProfilerObj->register_profile("PPC_MMU", std::unique_ptr(new MMUProfile())); diff --git a/cpu/ppc/ppcmmu.h b/cpu/ppc/ppcmmu.h index d8910da..f132301 100644 --- a/cpu/ppc/ppcmmu.h +++ b/cpu/ppc/ppcmmu.h @@ -40,12 +40,27 @@ typedef struct PPC_BAT_entry { uint32_t bepi; /* copy of Block effective page index */ } PPC_BAT_entry; +/** Block address translation types. */ +enum BATType : int { + Instruction, + Data +}; + +/** Result of the block address translation. */ +typedef struct BATResult { + bool hit; + uint8_t prot; + uint32_t phys; +} BATResult; + extern void ibat_update(uint32_t bat_reg); extern void dbat_update(uint32_t bat_reg); extern uint8_t* mmu_get_dma_mem(uint32_t addr, uint32_t size); +extern void mmu_change_mode(void); + extern void ppc_set_cur_instruction(const uint8_t* ptr); extern void mem_write_byte(uint32_t addr, uint8_t value); extern void mem_write_word(uint32_t addr, uint16_t value); diff --git a/cpu/ppc/ppcopcodes.cpp b/cpu/ppc/ppcopcodes.cpp index 48a8e40..34d7a2c 100644 --- a/cpu/ppc/ppcopcodes.cpp +++ b/cpu/ppc/ppcopcodes.cpp @@ -824,6 +824,7 @@ void dppc_interpreter::ppc_mtmsr() { } reg_s = (ppc_cur_instruction >> 21) & 31; ppc_state.msr = ppc_state.gpr[reg_s]; + mmu_change_mode(); } void dppc_interpreter::ppc_mfspr() { @@ -1278,6 +1279,8 @@ void dppc_interpreter::ppc_rfi() { ppc_state.msr = (new_msr_val | new_srr1_val) & 0xFFFBFFFFUL; ppc_next_instruction_address = ppc_state.spr[SPR::SRR0] & 0xFFFFFFFCUL; + mmu_change_mode(); + grab_return = true; bb_kind = BB_end_kind::BB_RFI; } From 7d8f4d4e61d4a0d3edbaa8924f7b353ee7187a9c Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Sun, 16 May 2021 13:32:21 +0200 Subject: [PATCH 02/14] Finalize SoftTLB for reads. --- cpu/ppc/ppcmmu.cpp | 173 ++++++++++++++++++++++++++--------------- cpu/ppc/ppcmmu.h | 1 + cpu/ppc/ppcopcodes.cpp | 3 +- 3 files changed, 114 insertions(+), 63 deletions(-) diff --git a/cpu/ppc/ppcmmu.cpp b/cpu/ppc/ppcmmu.cpp index a6a43eb..afd1373 100644 --- a/cpu/ppc/ppcmmu.cpp +++ b/cpu/ppc/ppcmmu.cpp @@ -173,7 +173,7 @@ static inline T read_phys_mem(AddressMapEntry *mru_rgn, uint32_t addr) } if ((mru_rgn->mem_ptr + (addr - mru_rgn->start)) != (uint8_t *)MemAddr) { - LOG_F(ERROR, "TLB address mismatch! Expected: 0x%llu, got: 0x%llu", + LOG_F(ERROR, "TLB address mismatch! Expected: 0x%llx, got: 0x%llx", (uint64_t)(mru_rgn->mem_ptr + (addr - mru_rgn->start)), (uint64_t)MemAddr); } @@ -686,55 +686,6 @@ void mem_write_qword(uint32_t addr, uint64_t value) { write_phys_mem(&last_write_area, addr, value); } -static uint32_t mem_grab_unaligned(uint32_t addr, uint32_t size) { - uint32_t ret = 0; - -#ifdef MMU_DEBUG - LOG_F(WARNING, "Attempt to read unaligned %d bytes from 0x%08X\n", size, addr); -#endif - - if (((addr & 0xFFF) + size) > 0x1000) { - // Special case: misaligned cross-page reads -#ifdef MMU_PROFILING - unaligned_crossp_r++; -#endif - - uint32_t phys_addr; - uint32_t res = 0; - - // Break misaligned memory accesses into multiple, bytewise accesses - // and retranslate on page boundary. - // Because such accesses suffer a performance penalty, they will be - // presumably very rare so don't care much about performance. - for (int i = 0; i < size; addr++, phys_addr++, i++) { - if ((ppc_state.msr & 0x10) && (!i || !(addr & 0xFFF))) { - phys_addr = ppc_mmu_addr_translate(addr, 0); - } - - res = (res << 8) | - read_phys_mem(&last_read_area, phys_addr); - } - return res; - - } else { - /* data address translation if enabled */ - if (ppc_state.msr & 0x10) { - addr = ppc_mmu_addr_translate(addr, 0); - } - - if (size == 2) { - return read_phys_mem(&last_read_area, addr); - } else { - return read_phys_mem(&last_read_area, addr); - } - -#ifdef MMU_PROFILING - unaligned_reads++; -#endif - } - - return ret; -} #define PAGE_SIZE_BITS 12 #define TLB_SIZE 4096 @@ -870,19 +821,25 @@ static TLBEntry* tlb2_refill(uint32_t guest_va, int is_write) const uint32_t tag = guest_va & ~0xFFFUL; - // attempt block address translation first - BATResult bat_res = ppc_block_address_translation(guest_va); - if (bat_res.hit) { - // check block protection - if (!bat_res.prot || ((bat_res.prot & 1) && is_write)) { - ppc_state.spr[SPR::DSISR] = 0x08000000 | (is_write << 25); - ppc_state.spr[SPR::DAR] = guest_va; - mmu_exception_handler(Except_Type::EXC_DSI, 0); + /* data address translation if enabled */ + if (ppc_state.msr & 0x10) { + // attempt block address translation first + BATResult bat_res = ppc_block_address_translation(guest_va); + if (bat_res.hit) { + // check block protection + if (!bat_res.prot || ((bat_res.prot & 1) && is_write)) { + ppc_state.spr[SPR::DSISR] = 0x08000000 | (is_write << 25); + ppc_state.spr[SPR::DAR] = guest_va; + mmu_exception_handler(Except_Type::EXC_DSI, 0); + } + phys_addr = bat_res.phys; + } else { + // page address translation + phys_addr = page_address_translate(guest_va, false, + !!(ppc_state.msr & 0x4000), is_write); } - phys_addr = bat_res.phys; } else { - // page address translation - phys_addr = page_address_translate(guest_va, false, !!(ppc_state.msr & 0x4000), is_write); + phys_addr = guest_va; } // look up host virtual address @@ -896,7 +853,8 @@ static TLBEntry* tlb2_refill(uint32_t guest_va, int is_write) tlb_entry->reg_desc = reg_desc; } else { tlb_entry->flags = 1; // memory region backed by host memory - tlb_entry->host_va_offset = (int64_t)reg_desc->mem_ptr - reg_desc->start; + tlb_entry->host_va_offset = (int64_t)reg_desc->mem_ptr - guest_va + + (phys_addr - reg_desc->start); } return tlb_entry; } else { @@ -907,6 +865,46 @@ static TLBEntry* tlb2_refill(uint32_t guest_va, int is_write) } } +void flush_tlb_entry(uint32_t ea) +{ + TLBEntry *tlb_entry, *tlb1, *tlb2; + + const uint32_t tag = ea & ~0xFFFUL; + + for (int m = 0; m < 3; m++) { + switch (m) { + case 0: + tlb1 = &mode1_tlb1[0]; + tlb2 = &mode1_tlb2[0]; + break; + case 1: + tlb1 = &mode1_tlb1[0]; + tlb2 = &mode1_tlb2[0]; + break; + case 2: + tlb1 = &mode1_tlb1[0]; + tlb2 = &mode1_tlb2[0]; + break; + } + + // flush primary TLB + tlb_entry = &tlb1[(ea >> PAGE_SIZE_BITS) & tlb_size_mask]; + if (tlb_entry->tag == tag) { + tlb_entry->tag = TLB_INVALID_TAG; + //LOG_F(INFO, "Invalidated primary TLB entry at 0x%X", ea); + } + + // flush secondary TLB + tlb_entry = &tlb2[((ea >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; + for (int i = 0; i < TLB2_WAYS; i++) { + if (tlb_entry[i].tag == tag) { + tlb_entry[i].tag = TLB_INVALID_TAG; + //LOG_F(INFO, "Invalidated secondary TLB entry at 0x%X", ea); + } + } + } +} + static inline uint64_t tlb_translate_addr(uint32_t guest_va) { TLBEntry *tlb1_entry, *tlb2_entry; @@ -970,6 +968,57 @@ static inline uint64_t tlb_translate_addr(uint32_t guest_va) } } +static uint32_t mem_grab_unaligned(uint32_t addr, uint32_t size) { + uint32_t ret = 0; + +#ifdef MMU_DEBUG + LOG_F(WARNING, "Attempt to read unaligned %d bytes from 0x%08X\n", size, addr); +#endif + + if (((addr & 0xFFF) + size) > 0x1000) { + // Special case: misaligned cross-page reads +#ifdef MMU_PROFILING + unaligned_crossp_r++; +#endif + + uint32_t phys_addr; + uint32_t res = 0; + + // Break misaligned memory accesses into multiple, bytewise accesses + // and retranslate on page boundary. + // Because such accesses suffer a performance penalty, they will be + // presumably very rare so don't care much about performance. + for (int i = 0; i < size; addr++, phys_addr++, i++) { + tlb_translate_addr(addr); + if ((ppc_state.msr & 0x10) && (!i || !(addr & 0xFFF))) { + phys_addr = ppc_mmu_addr_translate(addr, 0); + } + + res = (res << 8) | + read_phys_mem(&last_read_area, phys_addr); + } + return res; + + } else { + /* data address translation if enabled */ + if (ppc_state.msr & 0x10) { + addr = ppc_mmu_addr_translate(addr, 0); + } + + if (size == 2) { + return read_phys_mem(&last_read_area, addr); + } else { + return read_phys_mem(&last_read_area, addr); + } + +#ifdef MMU_PROFILING + unaligned_reads++; +#endif + } + + return ret; +} + /** Grab a value from memory into a register */ uint8_t mem_grab_byte(uint32_t addr) { tlb_translate_addr(addr); diff --git a/cpu/ppc/ppcmmu.h b/cpu/ppc/ppcmmu.h index f132301..5f6d958 100644 --- a/cpu/ppc/ppcmmu.h +++ b/cpu/ppc/ppcmmu.h @@ -60,6 +60,7 @@ extern void dbat_update(uint32_t bat_reg); extern uint8_t* mmu_get_dma_mem(uint32_t addr, uint32_t size); extern void mmu_change_mode(void); +extern void flush_tlb_entry(uint32_t ea); extern void ppc_set_cur_instruction(const uint8_t* ptr); extern void mem_write_byte(uint32_t addr, uint8_t value); diff --git a/cpu/ppc/ppcopcodes.cpp b/cpu/ppc/ppcopcodes.cpp index 34d7a2c..5b05917 100644 --- a/cpu/ppc/ppcopcodes.cpp +++ b/cpu/ppc/ppcopcodes.cpp @@ -2014,7 +2014,8 @@ void dppc_interpreter::ppc_tlbie() { #ifdef CPU_PROFILING num_supervisor_instrs++; #endif - /* placeholder */ + + flush_tlb_entry(ppc_state.gpr[(ppc_cur_instruction >> 11) & 31]); } void dppc_interpreter::ppc_tlbia() { From 592d32017e847a284f2dccaee13b858fbca4c0ea Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Sun, 20 Jun 2021 22:28:48 +0200 Subject: [PATCH 03/14] memaccess: fix [-Wshift-count-overflow] compiler warning. --- memaccess.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/memaccess.h b/memaccess.h index 76de00d..0e05f6f 100644 --- a/memaccess.h +++ b/memaccess.h @@ -34,9 +34,10 @@ #define READ_DWORD_BE_U(addr) (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3]) /* read an unaligned big-endian QWORD (32bit) */ -#define READ_QWORD_BE_U(addr) \ - (((addr)[0] << 56) | ((addr)[1] << 48) | ((addr)[2] << 40) | ((addr)[3] << 32) | \ - ((addr)[4] << 24) | ((addr)[5] << 16) | ((addr)[6] << 8) | (addr)[7]) +#define READ_QWORD_BE_U(addr) \ + ((uint64_t((addr)[0]) << 56) | (uint64_t((addr)[1]) << 48) | \ + (uint64_t((addr)[2]) << 40) | (uint64_t((addr)[3]) << 32) | \ + ((addr)[4] << 24) | ((addr)[5] << 16) | ((addr)[6] << 8) | (addr)[7]) /* read an unaligned little-endian WORD (16bit) */ #define READ_WORD_LE_U(addr) (((addr)[1] << 8) | (addr)[0]) From 4da95a66d7b315c53c66e2ed1dab02f54bfc3499 Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Sun, 20 Jun 2021 22:33:03 +0200 Subject: [PATCH 04/14] Make emulated memory loads to use SoftTLB. --- cpu/ppc/ppcfpopcodes.cpp | 12 ++-- cpu/ppc/ppcmmu.cpp | 123 +++++++++++++++++++++++++++++++++++++++ cpu/ppc/ppcmmu.h | 3 + cpu/ppc/ppcopcodes.cpp | 68 ++++++++++++++-------- 4 files changed, 179 insertions(+), 27 deletions(-) diff --git a/cpu/ppc/ppcfpopcodes.cpp b/cpu/ppc/ppcfpopcodes.cpp index b509cad..e3078bf 100644 --- a/cpu/ppc/ppcfpopcodes.cpp +++ b/cpu/ppc/ppcfpopcodes.cpp @@ -776,7 +776,8 @@ void dppc_interpreter::ppc_lfd() { ppc_grab_regsfpdia(); ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += (reg_a) ? val_reg_a : 0; - ppc_result64_d = mem_grab_qword(ppc_effective_address); + //ppc_result64_d = mem_grab_qword(ppc_effective_address); + ppc_result64_d = mmu_read_vmem(ppc_effective_address); ppc_store_dfpresult_int(reg_d); } @@ -785,7 +786,8 @@ void dppc_interpreter::ppc_lfdu() { if (reg_a != 0) { ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += val_reg_a; - ppc_result64_d = mem_grab_qword(ppc_effective_address); + //ppc_result64_d = mem_grab_qword(ppc_effective_address); + ppc_result64_d = mmu_read_vmem(ppc_effective_address); ppc_store_dfpresult_int(reg_d); ppc_state.gpr[reg_a] = ppc_effective_address; } else { @@ -796,7 +798,8 @@ void dppc_interpreter::ppc_lfdu() { void dppc_interpreter::ppc_lfdx() { ppc_grab_regsfpdiab(); ppc_effective_address = (reg_a) ? val_reg_a + val_reg_b : val_reg_b; - ppc_result64_d = mem_grab_qword(ppc_effective_address); + //ppc_result64_d = mem_grab_qword(ppc_effective_address); + ppc_result64_d = mmu_read_vmem(ppc_effective_address); ppc_store_dfpresult_int(reg_d); } @@ -804,7 +807,8 @@ void dppc_interpreter::ppc_lfdux() { ppc_grab_regsfpdiab(); if (reg_a) { ppc_effective_address = val_reg_a + val_reg_b; - ppc_result64_d = mem_grab_qword(ppc_effective_address); + //ppc_result64_d = mem_grab_qword(ppc_effective_address); + ppc_result64_d = mmu_read_vmem(ppc_effective_address); ppc_store_dfpresult_int(reg_d); ppc_state.gpr[reg_a] = ppc_effective_address; } else { diff --git a/cpu/ppc/ppcmmu.cpp b/cpu/ppc/ppcmmu.cpp index afd1373..f9f8755 100644 --- a/cpu/ppc/ppcmmu.cpp +++ b/cpu/ppc/ppcmmu.cpp @@ -1019,6 +1019,129 @@ static uint32_t mem_grab_unaligned(uint32_t addr, uint32_t size) { return ret; } +static inline TLBEntry * lookup_secondary_tlb(uint32_t guest_va, uint32_t tag) { + TLBEntry *tlb_entry; + + tlb_entry = &pCurTLB2[((guest_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; + if (tlb_entry->tag == tag) { + // update LRU bits + tlb_entry[0].lru_bits = 0x3; + tlb_entry[1].lru_bits = 0x2; + tlb_entry[2].lru_bits &= 0x1; + tlb_entry[3].lru_bits &= 0x1; + } else if (tlb_entry[1].tag == tag) { + tlb_entry = &tlb_entry[1]; + // update LRU bits + tlb_entry[0].lru_bits = 0x2; + tlb_entry[1].lru_bits = 0x3; + tlb_entry[2].lru_bits &= 0x1; + tlb_entry[3].lru_bits &= 0x1; + } else if (tlb_entry[2].tag == tag) { + tlb_entry = &tlb_entry[2]; + // update LRU bits + tlb_entry[0].lru_bits &= 0x1; + tlb_entry[1].lru_bits &= 0x1; + tlb_entry[2].lru_bits = 0x3; + tlb_entry[3].lru_bits = 0x2; + } else if (tlb_entry[3].tag == tag) { + tlb_entry = &tlb_entry[3]; + // update LRU bits + tlb_entry[0].lru_bits &= 0x1; + tlb_entry[1].lru_bits &= 0x1; + tlb_entry[2].lru_bits = 0x2; + tlb_entry[3].lru_bits = 0x3; + } else { + return nullptr; + } + return tlb_entry; +} + +static uint32_t read_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t size); + +template +inline T mmu_read_vmem(uint32_t guest_va) { + TLBEntry *tlb1_entry, *tlb2_entry; + uint8_t *host_va; + + const uint32_t tag = guest_va & ~0xFFFUL; + + // look up guest virtual address in the primary TLB + tlb1_entry = &pCurTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; + if (tlb1_entry->tag == tag) { // primary TLB hit -> fast path + host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); + } else { + // primary TLB miss -> look up address in the secondary TLB + tlb2_entry = lookup_secondary_tlb(guest_va, tag); + if (tlb2_entry == nullptr) { + // secondary TLB miss -> + // perform full address translation and refill the secondary TLB + tlb2_entry = tlb2_refill(guest_va, 0); + } + + if (tlb2_entry->flags & 1) { // is it a real memory region? + // refill the primary TLB + tlb1_entry->tag = tag; + tlb1_entry->flags = 1; + tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; + host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); + } else { // otherwise, it's an access to a memory-mapped device + return ( + tlb2_entry->reg_desc->devobj->read(tlb2_entry->reg_desc->start, + guest_va - tlb2_entry->reg_desc->start, sizeof(T)) + ); + } + } + + // handle unaligned memory accesses + if (sizeof(T) > 1 && (guest_va & (sizeof(T) - 1))) { + return read_unaligned(guest_va, host_va, sizeof(T)); + } + + // handle aligned memory accesses + switch(sizeof(T)) { + case 1: + return *host_va; + case 2: + return READ_WORD_BE_A(host_va); + case 4: + return READ_DWORD_BE_A(host_va); + case 8: + return READ_QWORD_BE_A(host_va); + } +} + +// explicitely instantiate all required mmu_read_vmem variants +// to avoid linking errors +template uint8_t mmu_read_vmem(uint32_t guest_va); +template uint16_t mmu_read_vmem(uint32_t guest_va); +template uint32_t mmu_read_vmem(uint32_t guest_va); +template uint64_t mmu_read_vmem(uint32_t guest_va); + +static uint32_t read_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t size) +{ + uint32_t result = 0; + + // is it a misaligned cross-page read? + if (((guest_va & 0xFFF) + size) > 0x1000) { + // Break such a memory access into multiple, bytewise accesses. + // Because such accesses suffer a performance penalty, they will be + // presumably very rare so don't waste time optimizing the code below. + for (int i = 0; i < size; guest_va++, i++) { + result = (result << 8) | mmu_read_vmem(guest_va); + } + } else { + switch(size) { + case 2: + return READ_WORD_BE_U(host_va); + case 4: + return READ_DWORD_BE_U(host_va); + case 8: // FIXME: should we raise alignment exception here? + return READ_QWORD_BE_U(host_va); + } + } + return result; +} + /** Grab a value from memory into a register */ uint8_t mem_grab_byte(uint32_t addr) { tlb_translate_addr(addr); diff --git a/cpu/ppc/ppcmmu.h b/cpu/ppc/ppcmmu.h index 5f6d958..dbf5fd0 100644 --- a/cpu/ppc/ppcmmu.h +++ b/cpu/ppc/ppcmmu.h @@ -74,4 +74,7 @@ extern uint64_t mem_grab_qword(uint32_t addr); extern uint64_t mem_read_dbg(uint32_t virt_addr, uint32_t size); extern uint8_t* quickinstruction_translate(uint32_t address_grab); +template +extern inline T mmu_read_vmem(uint32_t guest_va); + #endif // PPCMEMORY_H diff --git a/cpu/ppc/ppcopcodes.cpp b/cpu/ppc/ppcopcodes.cpp index 5b05917..1bf96c0 100644 --- a/cpu/ppc/ppcopcodes.cpp +++ b/cpu/ppc/ppcopcodes.cpp @@ -1580,7 +1580,8 @@ void dppc_interpreter::ppc_lbz() { ppc_grab_regsda(); ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += (reg_a > 0) ? ppc_result_a : 0; - ppc_result_d = mem_grab_byte(ppc_effective_address); + //ppc_result_d = mem_grab_byte(ppc_effective_address); + ppc_result_d = mmu_read_vmem(ppc_effective_address); ppc_store_result_regd(); } @@ -1592,7 +1593,8 @@ void dppc_interpreter::ppc_lbzu() { ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); if ((reg_a != reg_d) || reg_a != 0) { ppc_effective_address += ppc_result_a; - ppc_result_d = mem_grab_byte(ppc_effective_address); + //ppc_result_d = mem_grab_byte(ppc_effective_address); + ppc_result_d = mmu_read_vmem(ppc_effective_address); ppc_result_a = ppc_effective_address; ppc_store_result_regd(); ppc_store_result_rega(); @@ -1607,7 +1609,8 @@ void dppc_interpreter::ppc_lbzx() { #endif ppc_grab_regsdab(); ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b; - ppc_result_d = mem_grab_byte(ppc_effective_address); + //ppc_result_d = mem_grab_byte(ppc_effective_address); + ppc_result_d = mmu_read_vmem(ppc_effective_address); ppc_store_result_regd(); } @@ -1618,7 +1621,8 @@ void dppc_interpreter::ppc_lbzux() { ppc_grab_regsdab(); if ((reg_a != reg_d) || reg_a != 0) { ppc_effective_address = ppc_result_a + ppc_result_b; - ppc_result_d = mem_grab_byte(ppc_effective_address); + //ppc_result_d = mem_grab_byte(ppc_effective_address); + ppc_result_d = mmu_read_vmem(ppc_effective_address); ppc_result_a = ppc_effective_address; ppc_store_result_regd(); ppc_store_result_rega(); @@ -1635,7 +1639,8 @@ void dppc_interpreter::ppc_lhz() { ppc_grab_regsda(); ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += reg_a ? ppc_result_a : 0; - ppc_result_d = mem_grab_word(ppc_effective_address); + //ppc_result_d = mem_grab_word(ppc_effective_address); + ppc_result_d = mmu_read_vmem(ppc_effective_address); ppc_store_result_regd(); } @@ -1647,7 +1652,8 @@ void dppc_interpreter::ppc_lhzu() { if ((reg_a != reg_d) || reg_a != 0) { ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += ppc_result_a; - ppc_result_d = mem_grab_word(ppc_effective_address); + //ppc_result_d = mem_grab_word(ppc_effective_address); + ppc_result_d = mmu_read_vmem(ppc_effective_address); ppc_result_a = ppc_effective_address; ppc_store_result_regd(); ppc_store_result_rega(); @@ -1662,7 +1668,8 @@ void dppc_interpreter::ppc_lhzx() { #endif ppc_grab_regsdab(); ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b; - ppc_result_d = mem_grab_word(ppc_effective_address); + //ppc_result_d = mem_grab_word(ppc_effective_address); + ppc_result_d = mmu_read_vmem(ppc_effective_address); ppc_store_result_regd(); } @@ -1673,8 +1680,9 @@ void dppc_interpreter::ppc_lhzux() { ppc_grab_regsdab(); if ((reg_a != reg_d) || reg_a != 0) { ppc_effective_address = ppc_result_a + ppc_result_b; - ppc_result_d = mem_grab_word(ppc_effective_address); - ppc_result_a = ppc_effective_address; + //ppc_result_d = mem_grab_word(ppc_effective_address); + ppc_result_d = mmu_read_vmem(ppc_effective_address); + ppc_result_a = ppc_effective_address; ppc_store_result_regd(); ppc_store_result_rega(); } else { @@ -1689,7 +1697,8 @@ void dppc_interpreter::ppc_lha() { ppc_grab_regsda(); ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += (reg_a > 0) ? ppc_result_a : 0; - uint16_t val = mem_grab_word(ppc_effective_address); + //uint16_t val = mem_grab_word(ppc_effective_address); + uint16_t val = mmu_read_vmem(ppc_effective_address); if (val & 0x8000) { ppc_result_d = 0xFFFF0000UL | (uint32_t)val; } else { @@ -1706,7 +1715,8 @@ void dppc_interpreter::ppc_lhau() { if ((reg_a != reg_d) || reg_a != 0) { ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += ppc_result_a; - uint16_t val = mem_grab_word(ppc_effective_address); + //uint16_t val = mem_grab_word(ppc_effective_address); + uint16_t val = mmu_read_vmem(ppc_effective_address); if (val & 0x8000) { ppc_result_d = 0xFFFF0000UL | (uint32_t)val; } else { @@ -1726,7 +1736,8 @@ void dppc_interpreter::ppc_lhaux() { #endif ppc_grab_regsdab(); ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b; - uint16_t val = mem_grab_word(ppc_effective_address); + //uint16_t val = mem_grab_word(ppc_effective_address); + uint16_t val = mmu_read_vmem(ppc_effective_address); if (val & 0x8000) { ppc_result_d = 0xFFFF0000UL | (uint32_t)val; } else { @@ -1743,7 +1754,8 @@ void dppc_interpreter::ppc_lhax() { #endif ppc_grab_regsdab(); ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b; - uint16_t val = mem_grab_word(ppc_effective_address); + //uint16_t val = mem_grab_word(ppc_effective_address); + uint16_t val = mmu_read_vmem(ppc_effective_address); if (val & 0x8000) { ppc_result_d = 0xFFFF0000UL | (uint32_t)val; } else { @@ -1758,7 +1770,8 @@ void dppc_interpreter::ppc_lhbrx() { #endif ppc_grab_regsdab(); ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b; - ppc_result_d = (uint32_t)(BYTESWAP_16(mem_grab_word(ppc_effective_address))); + //ppc_result_d = (uint32_t)(BYTESWAP_16(mem_grab_word(ppc_effective_address))); + ppc_result_d = (uint32_t)(BYTESWAP_16(mmu_read_vmem(ppc_effective_address))); ppc_store_result_regd(); } @@ -1769,7 +1782,8 @@ void dppc_interpreter::ppc_lwz() { ppc_grab_regsda(); ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += (reg_a > 0) ? ppc_result_a : 0; - ppc_result_d = mem_grab_dword(ppc_effective_address); + //ppc_result_d = mem_grab_dword(ppc_effective_address); + ppc_result_d = mmu_read_vmem(ppc_effective_address); ppc_store_result_regd(); } @@ -1779,7 +1793,8 @@ void dppc_interpreter::ppc_lwbrx() { #endif ppc_grab_regsdab(); ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b; - ppc_result_d = BYTESWAP_32(mem_grab_dword(ppc_effective_address)); + //ppc_result_d = BYTESWAP_32(mem_grab_dword(ppc_effective_address)); + ppc_result_d = BYTESWAP_32(mmu_read_vmem(ppc_effective_address)); ppc_store_result_regd(); } @@ -1791,7 +1806,8 @@ void dppc_interpreter::ppc_lwzu() { ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); if ((reg_a != reg_d) || reg_a != 0) { ppc_effective_address += ppc_result_a; - ppc_result_d = mem_grab_dword(ppc_effective_address); + //ppc_result_d = mem_grab_dword(ppc_effective_address); + ppc_result_d = mmu_read_vmem(ppc_effective_address); ppc_store_result_regd(); ppc_result_a = ppc_effective_address; ppc_store_result_rega(); @@ -1806,7 +1822,8 @@ void dppc_interpreter::ppc_lwzx() { #endif ppc_grab_regsdab(); ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b; - ppc_result_d = mem_grab_dword(ppc_effective_address); + //ppc_result_d = mem_grab_dword(ppc_effective_address); + ppc_result_d = mmu_read_vmem(ppc_effective_address); ppc_store_result_regd(); } @@ -1820,7 +1837,8 @@ void dppc_interpreter::ppc_lwzux() { } else { ppc_exception_handler(Except_Type::EXC_PROGRAM, Exc_Cause::ILLEGAL_OP); } - ppc_result_d = mem_grab_dword(ppc_effective_address); + //ppc_result_d = mem_grab_dword(ppc_effective_address); + ppc_result_d = mmu_read_vmem(ppc_effective_address); ppc_result_a = ppc_effective_address; ppc_store_result_regd(); ppc_store_result_rega(); @@ -1834,7 +1852,8 @@ void dppc_interpreter::ppc_lwarx() { ppc_grab_regsdab(); ppc_effective_address = (reg_a == 0) ? ppc_result_b : (ppc_result_a + ppc_result_b); ppc_state.reserve = true; - ppc_result_d = mem_grab_dword(ppc_effective_address); + //ppc_result_d = mem_grab_dword(ppc_effective_address); + ppc_result_d = mmu_read_vmem(ppc_effective_address); ppc_store_result_regd(); } @@ -1847,7 +1866,8 @@ void dppc_interpreter::ppc_lmw() { ppc_effective_address += (reg_a > 0) ? ppc_result_a : 0; // How many words to load in memory - using a do-while for this do { - ppc_state.gpr[reg_d] = mem_grab_dword(ppc_effective_address); + //ppc_state.gpr[reg_d] = mem_grab_dword(ppc_effective_address); + ppc_state.gpr[reg_d] = mmu_read_vmem(ppc_effective_address); ppc_effective_address += 4; reg_d++; } while (reg_d < 32); @@ -1884,7 +1904,8 @@ void dppc_interpreter::ppc_lswi() { grab_inb = 0; break; default: - ppc_state.gpr[reg_d] = mem_grab_word(ppc_effective_address); + //ppc_state.gpr[reg_d] = mem_grab_word(ppc_effective_address); + ppc_state.gpr[reg_d] = mmu_read_vmem(ppc_effective_address); reg_d++; ppc_effective_address += 4; grab_inb -= 4; @@ -1931,7 +1952,8 @@ void dppc_interpreter::ppc_lswx() { grab_inb = 0; break; default: - ppc_state.gpr[reg_d] = mem_grab_word(ppc_effective_address); + //ppc_state.gpr[reg_d] = mem_grab_word(ppc_effective_address); + ppc_state.gpr[reg_d] = mmu_read_vmem(ppc_effective_address); reg_d++; ppc_effective_address += 4; grab_inb -= 4; From 094d9a9c2ff292a574f42c8359f7c7bf7c2c8338 Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Mon, 21 Jun 2021 00:11:00 +0200 Subject: [PATCH 05/14] Remove inline to fix compiler warnings. --- cpu/ppc/ppcmmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpu/ppc/ppcmmu.h b/cpu/ppc/ppcmmu.h index dbf5fd0..c05cc89 100644 --- a/cpu/ppc/ppcmmu.h +++ b/cpu/ppc/ppcmmu.h @@ -75,6 +75,6 @@ extern uint64_t mem_read_dbg(uint32_t virt_addr, uint32_t size); extern uint8_t* quickinstruction_translate(uint32_t address_grab); template -extern inline T mmu_read_vmem(uint32_t guest_va); +extern T mmu_read_vmem(uint32_t guest_va); #endif // PPCMEMORY_H From 4f3dd797bee69b6fd9f934463537897a95957764 Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Tue, 27 Jul 2021 12:58:42 +0200 Subject: [PATCH 06/14] Clean up memaccess header. --- memaccess.h | 69 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/memaccess.h b/memaccess.h index 0e05f6f..8f25d12 100644 --- a/memaccess.h +++ b/memaccess.h @@ -1,4 +1,4 @@ -/** @file Set of macros for accessing host memory units of various sizes +/** @file Set of macros for accessing host memory in units of various sizes and endianness. */ @@ -31,7 +31,8 @@ #define READ_WORD_BE_U(addr) (((addr)[0] << 8) | (addr)[1]) /* read an unaligned big-endian DWORD (32bit) */ -#define READ_DWORD_BE_U(addr) (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3]) +#define READ_DWORD_BE_U(addr) \ + (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3]) /* read an unaligned big-endian QWORD (32bit) */ #define READ_QWORD_BE_U(addr) \ @@ -43,14 +44,15 @@ #define READ_WORD_LE_U(addr) (((addr)[1] << 8) | (addr)[0]) /* read an unaligned little-endian DWORD (32bit) */ -#define READ_DWORD_LE_U(addr) (((addr)[3] << 24) | ((addr)[2] << 16) | ((addr)[1] << 8) | (addr)[0]) +#define READ_DWORD_LE_U(addr) \ + (((addr)[3] << 24) | ((addr)[2] << 16) | ((addr)[1] << 8) | (addr)[0]) -/* read an unaligned little-endian DWORD (32bit) */ -#define READ_QWORD_LE_U(addr) \ - (((addr)[7] << 56) | ((addr)[6] << 48) | ((addr)[5] << 40) | ((addr)[4] << 32) | \ +/* read an unaligned little-endian DWORD (64bit) */ +#define READ_QWORD_LE_U(addr) \ + ((uint64_t((addr)[7]) << 56) | (uint64_t((addr)[6]) << 48) | \ + (uint64_t((addr)[5]) << 40) | (uint64_t((addr)[4]) << 32) | \ ((addr)[3] << 24) | ((addr)[2] << 16) | ((addr)[1] << 8) | (addr)[0]) - /* write an aligned big-endian WORD (16bit) */ #define WRITE_WORD_BE_A(addr, val) (*((uint16_t*)((addr))) = BYTESWAP_16(val)) @@ -61,19 +63,32 @@ #define WRITE_QWORD_BE_A(addr, val) (*((uint64_t*)((addr))) = BYTESWAP_64(val)) /* write an unaligned big-endian WORD (16bit) */ -#define WRITE_WORD_BE_U(addr, val) \ - do { \ - (addr)[0] = ((val) >> 8) & 0xFF; \ - (addr)[1] = (val)&0xFF; \ +#define WRITE_WORD_BE_U(addr, val) \ + do { \ + (addr)[0] = ((val) >> 8) & 0xFF; \ + (addr)[1] = (val) & 0xFF; \ } while (0) /* write an unaligned big-endian DWORD (32bit) */ -#define WRITE_DWORD_BE_U(addr, val) \ - do { \ - (addr)[0] = ((val) >> 24) & 0xFF; \ - (addr)[1] = ((val) >> 16) & 0xFF; \ - (addr)[2] = ((val) >> 8) & 0xFF; \ - (addr)[3] = (val)&0xFF; \ +#define WRITE_DWORD_BE_U(addr, val) \ + do { \ + (addr)[0] = ((val) >> 24) & 0xFF; \ + (addr)[1] = ((val) >> 16) & 0xFF; \ + (addr)[2] = ((val) >> 8) & 0xFF; \ + (addr)[3] = (val) & 0xFF; \ + } while (0) + +/* write an unaligned big-endian DWORD (64bit) */ +#define WRITE_QWORD_BE_U(addr, val) \ + do { \ + (addr)[0] = ((uint64_t)(val) >> 56) & 0xFF; \ + (addr)[1] = ((uint64_t)(val) >> 48) & 0xFF; \ + (addr)[2] = ((uint64_t)(val) >> 40) & 0xFF; \ + (addr)[3] = ((uint64_t)(val) >> 32) & 0xFF; \ + (addr)[4] = ((val) >> 24) & 0xFF; \ + (addr)[5] = ((val) >> 16) & 0xFF; \ + (addr)[6] = ((val) >> 8) & 0xFF; \ + (addr)[7] = (val) & 0xFF; \ } while (0) /* write an aligned little-endian WORD (16bit) */ @@ -86,19 +101,19 @@ #define WRITE_QWORD_LE_A(addr, val) (*((uint64_t*)((addr))) = (val)) /* write an unaligned little-endian WORD (16bit) */ -#define WRITE_WORD_LE_U(addr, val) \ - do { \ - (addr)[0] = (val)&0xFF; \ - (addr)[1] = ((val) >> 8) & 0xFF; \ +#define WRITE_WORD_LE_U(addr, val) \ + do { \ + (addr)[0] = (val)&0xFF; \ + (addr)[1] = ((val) >> 8) & 0xFF; \ } while (0) /* write an unaligned little-endian DWORD (32bit) */ -#define WRITE_DWORD_LE_U(addr, val) \ - do { \ - (addr)[0] = (val)&0xFF; \ - (addr)[1] = ((val) >> 8) & 0xFF; \ - (addr)[2] = ((val) >> 16) & 0xFF; \ - (addr)[3] = ((val) >> 24) & 0xFF; \ +#define WRITE_DWORD_LE_U(addr, val) \ + do { \ + (addr)[0] = (val)&0xFF; \ + (addr)[1] = ((val) >> 8) & 0xFF; \ + (addr)[2] = ((val) >> 16) & 0xFF; \ + (addr)[3] = ((val) >> 24) & 0xFF; \ } while (0) /* read value of the specified size from memory starting at addr, From 089645e830746580e39fb45f607bf4ccc7223604 Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Tue, 3 Aug 2021 16:01:32 +0200 Subject: [PATCH 07/14] Implement SoftTLB for writes. --- cpu/ppc/poweropcodes.cpp | 14 +- cpu/ppc/ppcemu.h | 4 + cpu/ppc/ppcfpopcodes.cpp | 51 +++-- cpu/ppc/ppcmmu.cpp | 437 +++++++++++++++++++++++++++++++-------- cpu/ppc/ppcmmu.h | 38 +++- cpu/ppc/ppcopcodes.cpp | 163 ++++++++++----- main.cpp | 4 +- 7 files changed, 546 insertions(+), 165 deletions(-) diff --git a/cpu/ppc/poweropcodes.cpp b/cpu/ppc/poweropcodes.cpp index 6e2b3c1..3ce0e2c 100644 --- a/cpu/ppc/poweropcodes.cpp +++ b/cpu/ppc/poweropcodes.cpp @@ -148,22 +148,26 @@ void dppc_interpreter::power_lscbx() { if (match_found == false) { switch (shift_amount) { case 0: - return_value = mem_grab_byte(ppc_effective_address); + return_value = mmu_read_vmem(ppc_effective_address); + //return_value = mem_grab_byte(ppc_effective_address); ppc_result_d = (ppc_result_d & 0x00FFFFFF) | (return_value << 24); ppc_store_result_regd(); break; case 1: - return_value = mem_grab_byte(ppc_effective_address); + return_value = mmu_read_vmem(ppc_effective_address); + //return_value = mem_grab_byte(ppc_effective_address); ppc_result_d = (ppc_result_d & 0xFF00FFFF) | (return_value << 16); ppc_store_result_regd(); break; case 2: - return_value = mem_grab_byte(ppc_effective_address); + return_value = mmu_read_vmem(ppc_effective_address); + //return_value = mem_grab_byte(ppc_effective_address); ppc_result_d = (ppc_result_d & 0xFFFF00FF) | (return_value << 8); ppc_store_result_regd(); break; case 3: - return_value = mem_grab_byte(ppc_effective_address); + return_value = mmu_read_vmem(ppc_effective_address); + //return_value = mem_grab_byte(ppc_effective_address); ppc_result_d = (ppc_result_d & 0xFFFFFF00) | return_value; ppc_store_result_regd(); break; @@ -494,4 +498,4 @@ void dppc_interpreter::power_srlq() { void dppc_interpreter::power_srq() { LOG_F(WARNING, "OOPS! Placeholder for srq!!! \n"); -} \ No newline at end of file +} diff --git a/cpu/ppc/ppcemu.h b/cpu/ppc/ppcemu.h index d396fb2..903f911 100644 --- a/cpu/ppc/ppcemu.h +++ b/cpu/ppc/ppcemu.h @@ -25,6 +25,7 @@ along with this program. If not, see . #include "devices/memctrlbase.h" #include "endianswap.h" #include +#include #include #include @@ -301,6 +302,9 @@ void ppc_fp_changecrf1(); // MEMORY DECLARATIONS extern MemCtrlBase* mem_ctrl_instance; +//typedef std::function CtxSyncCallback; +extern void add_ctx_sync_action(const std::function &); + // The functions used by the PowerPC processor namespace dppc_interpreter { extern void ppc_bcctr(); diff --git a/cpu/ppc/ppcfpopcodes.cpp b/cpu/ppc/ppcfpopcodes.cpp index e3078bf..b4d6d43 100644 --- a/cpu/ppc/ppcfpopcodes.cpp +++ b/cpu/ppc/ppcfpopcodes.cpp @@ -674,11 +674,11 @@ void dppc_interpreter::ppc_fctiw() { if (std::isnan(val_reg_b)) { ppc_state.fpr[reg_d].int64_r = 0x80000000; ppc_state.fpscr |= 0x1000100; - } + } else if (val_reg_b > static_cast(0x7fffffff)) { ppc_state.fpr[reg_d].int64_r = 0x7fffffff; ppc_state.fpscr |= 0x100; - } + } else if (val_reg_b < -static_cast(0x80000000)) { ppc_state.fpr[reg_d].int64_r = 0x80000000; ppc_state.fpscr |= 0x100; @@ -696,7 +696,7 @@ void dppc_interpreter::ppc_fctiw() { } ppc_store_dfpresult_int(reg_d); - + } if (rc_flag) @@ -710,15 +710,15 @@ void dppc_interpreter::ppc_fctiwz() { if (std::isnan(val_reg_b)) { ppc_state.fpr[reg_d].int64_r = 0x80000000; ppc_state.fpscr |= 0x1000100; - } + } else if (val_reg_b > static_cast(0x7fffffff)) { ppc_state.fpr[reg_d].int64_r = 0x7fffffff; ppc_state.fpscr |= 0x100; - } + } else if (val_reg_b < -static_cast(0x80000000)) { ppc_state.fpr[reg_d].int64_r = 0x80000000; ppc_state.fpscr |= 0x100; - } + } else { ppc_result64_d = round_to_zero(val_reg_b); @@ -735,7 +735,8 @@ void dppc_interpreter::ppc_lfs() { ppc_grab_regsfpdia(); ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += (reg_a) ? val_reg_a : 0; - ppc_result64_d = mem_grab_dword(ppc_effective_address); + ppc_result64_d = mmu_read_vmem(ppc_effective_address); + //ppc_result64_d = mem_grab_dword(ppc_effective_address); ppc_store_sfpresult_int(reg_d); } @@ -745,7 +746,8 @@ void dppc_interpreter::ppc_lfsu() { if (reg_a) { ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += (reg_a) ? val_reg_a : 0; - ppc_result64_d = mem_grab_dword(ppc_effective_address); + ppc_result64_d = mmu_read_vmem(ppc_effective_address); + //ppc_result64_d = mem_grab_dword(ppc_effective_address); ppc_store_sfpresult_int(reg_d); ppc_state.gpr[reg_a] = ppc_effective_address; } else { @@ -756,7 +758,8 @@ void dppc_interpreter::ppc_lfsu() { void dppc_interpreter::ppc_lfsx() { ppc_grab_regsfpdiab(); ppc_effective_address = (reg_a) ? val_reg_a + val_reg_b : val_reg_b; - ppc_result64_d = mem_grab_dword(ppc_effective_address); + ppc_result64_d = mmu_read_vmem(ppc_effective_address); + //ppc_result64_d = mem_grab_dword(ppc_effective_address); ppc_store_sfpresult_int(reg_d); } @@ -764,7 +767,8 @@ void dppc_interpreter::ppc_lfsux() { ppc_grab_regsfpdiab(); if (reg_a) { ppc_effective_address = val_reg_a + val_reg_b; - ppc_result64_d = mem_grab_dword(ppc_effective_address); + ppc_result64_d = mmu_read_vmem(ppc_effective_address); + //ppc_result64_d = mem_grab_dword(ppc_effective_address); ppc_store_sfpresult_int(reg_d); ppc_state.gpr[reg_a] = ppc_effective_address; } else { @@ -820,7 +824,8 @@ void dppc_interpreter::ppc_stfs() { ppc_grab_regsfpsia(); ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += (reg_a) ? val_reg_a : 0; - mem_write_dword(ppc_effective_address, uint32_t(ppc_state.fpr[reg_s].int64_r)); + mmu_write_vmem(ppc_effective_address, uint32_t(ppc_state.fpr[reg_s].int64_r)); + //mem_write_dword(ppc_effective_address, uint32_t(ppc_state.fpr[reg_s].int64_r)); } void dppc_interpreter::ppc_stfsu() { @@ -828,7 +833,8 @@ void dppc_interpreter::ppc_stfsu() { if (reg_a != 0) { ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += val_reg_a; - mem_write_dword(ppc_effective_address, uint32_t(ppc_state.fpr[reg_s].int64_r)); + mmu_write_vmem(ppc_effective_address, uint32_t(ppc_state.fpr[reg_s].int64_r)); + //mem_write_dword(ppc_effective_address, uint32_t(ppc_state.fpr[reg_s].int64_r)); ppc_state.gpr[reg_a] = ppc_effective_address; } else { ppc_exception_handler(Except_Type::EXC_PROGRAM, Exc_Cause::ILLEGAL_OP); @@ -838,14 +844,16 @@ void dppc_interpreter::ppc_stfsu() { void dppc_interpreter::ppc_stfsx() { ppc_grab_regsfpsiab(); ppc_effective_address = (reg_a) ? val_reg_a + val_reg_b : val_reg_b; - mem_write_dword(ppc_effective_address, uint32_t(ppc_state.fpr[reg_s].int64_r)); + mmu_write_vmem(ppc_effective_address, uint32_t(ppc_state.fpr[reg_s].int64_r)); + //mem_write_dword(ppc_effective_address, uint32_t(ppc_state.fpr[reg_s].int64_r)); } void dppc_interpreter::ppc_stfsux() { ppc_grab_regsfpsiab(); if (reg_a) { ppc_effective_address = val_reg_a + val_reg_b; - mem_write_dword(ppc_effective_address, uint32_t(ppc_state.fpr[reg_s].int64_r)); + mmu_write_vmem(ppc_effective_address, uint32_t(ppc_state.fpr[reg_s].int64_r)); + //mem_write_dword(ppc_effective_address, uint32_t(ppc_state.fpr[reg_s].int64_r)); ppc_state.gpr[reg_a] = ppc_effective_address; } else { ppc_exception_handler(Except_Type::EXC_PROGRAM, Exc_Cause::ILLEGAL_OP); @@ -856,7 +864,8 @@ void dppc_interpreter::ppc_stfd() { ppc_grab_regsfpsia(); ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += (reg_a) ? val_reg_a : 0; - mem_write_qword(ppc_effective_address, ppc_state.fpr[reg_s].int64_r); + mmu_write_vmem(ppc_effective_address, ppc_state.fpr[reg_s].int64_r); + //mem_write_qword(ppc_effective_address, ppc_state.fpr[reg_s].int64_r); } void dppc_interpreter::ppc_stfdu() { @@ -864,7 +873,8 @@ void dppc_interpreter::ppc_stfdu() { if (reg_a != 0) { ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += val_reg_a; - mem_write_qword(ppc_effective_address, ppc_state.fpr[reg_s].int64_r); + mmu_write_vmem(ppc_effective_address, ppc_state.fpr[reg_s].int64_r); + //mem_write_qword(ppc_effective_address, ppc_state.fpr[reg_s].int64_r); ppc_state.gpr[reg_a] = ppc_effective_address; } else { ppc_exception_handler(Except_Type::EXC_PROGRAM, Exc_Cause::ILLEGAL_OP); @@ -874,14 +884,16 @@ void dppc_interpreter::ppc_stfdu() { void dppc_interpreter::ppc_stfdx() { ppc_grab_regsfpsiab(); ppc_effective_address = (reg_a) ? val_reg_a + val_reg_b : val_reg_b; - mem_write_qword(ppc_effective_address, ppc_state.fpr[reg_s].int64_r); + mmu_write_vmem(ppc_effective_address, ppc_state.fpr[reg_s].int64_r); + //mem_write_qword(ppc_effective_address, ppc_state.fpr[reg_s].int64_r); } void dppc_interpreter::ppc_stfdux() { ppc_grab_regsfpsiab(); if (reg_a != 0) { ppc_effective_address = val_reg_a + val_reg_b; - mem_write_qword(ppc_effective_address, ppc_state.fpr[reg_s].int64_r); + mmu_write_vmem(ppc_effective_address, ppc_state.fpr[reg_s].int64_r); + //mem_write_qword(ppc_effective_address, ppc_state.fpr[reg_s].int64_r); ppc_state.gpr[reg_a] = ppc_effective_address; } else { ppc_exception_handler(Except_Type::EXC_PROGRAM, Exc_Cause::ILLEGAL_OP); @@ -891,7 +903,8 @@ void dppc_interpreter::ppc_stfdux() { void dppc_interpreter::ppc_stfiwx() { ppc_grab_regsfpsiab(); ppc_effective_address = (reg_a) ? val_reg_a + val_reg_b : val_reg_b; - mem_write_dword(ppc_effective_address, (uint32_t)(ppc_state.fpr[reg_s].int64_r)); + mmu_write_vmem(ppc_effective_address, (uint32_t)(ppc_state.fpr[reg_s].int64_r)); + //mem_write_dword(ppc_effective_address, (uint32_t)(ppc_state.fpr[reg_s].int64_r)); } // Floating Point Register Transfer diff --git a/cpu/ppc/ppcmmu.cpp b/cpu/ppc/ppcmmu.cpp index f9f8755..12d656f 100644 --- a/cpu/ppc/ppcmmu.cpp +++ b/cpu/ppc/ppcmmu.cpp @@ -137,6 +137,7 @@ public: /** Temporary TLB test variables. */ bool MemAccessType; // true - memory, false - I/O +bool Unaligned_crosspage = false; uint64_t MemAddr = 0; MMIODevice *Device = 0; uint32_t DevOffset = 0; @@ -239,6 +240,21 @@ static inline void write_phys_mem(AddressMapEntry *mru_rgn, uint32_t addr, T val #ifdef MMU_PROFILING dmem_writes_total++; #endif + +#if 1 + if (!MemAccessType) { + LOG_F(ERROR, "TLB real memory access expected!"); + } + + if (!is_aligned && Unaligned_crosspage) { + LOG_F(WARNING, "Unaligned cross-page access ignored!"); + } else if ((mru_rgn->mem_ptr + (addr - mru_rgn->start)) != (uint8_t *)MemAddr) { + LOG_F(ERROR, "TLB address mismatch! Expected: 0x%llx, got: 0x%llx", + (uint64_t)(mru_rgn->mem_ptr + (addr - mru_rgn->start)), + (uint64_t)MemAddr); + } +#endif + switch(sizeof(T)) { case 1: *(mru_rgn->mem_ptr + (addr - mru_rgn->start)) = value; @@ -270,6 +286,18 @@ static inline void write_phys_mem(AddressMapEntry *mru_rgn, uint32_t addr, T val #ifdef MMU_PROFILING iomem_writes_total++; #endif + +#if 1 + if (MemAccessType) { + LOG_F(ERROR, "TLB I/O memory access expected!"); + } + + if (mru_rgn->devobj != Device || (addr - mru_rgn->start) != DevOffset) { + LOG_F(ERROR, "TLB MMIO access mismatch! Expected: 0x%X, got: 0x%X", + addr - mru_rgn->start, DevOffset); + } +#endif + mru_rgn->devobj->write(mru_rgn->start, addr - mru_rgn->start, value, sizeof(T)); } else { @@ -298,6 +326,9 @@ void ppc_set_cur_instruction(const uint8_t* ptr) { ppc_cur_instruction = READ_DWORD_BE_A(ptr); } +bool gTLBFlushBatEntries = false; +bool gTLBFlushPatEntries = false; + void ibat_update(uint32_t bat_reg) { int upper_reg_num; uint32_t bl, hi_mask; @@ -335,6 +366,19 @@ void dbat_update(uint32_t bat_reg) { bat_entry->hi_mask = hi_mask; bat_entry->phys_hi = ppc_state.spr[upper_reg_num + 1] & hi_mask; bat_entry->bepi = ppc_state.spr[upper_reg_num] & hi_mask; + + if (!gTLBFlushBatEntries) { + gTLBFlushBatEntries = true; + add_ctx_sync_action(&tlb_flush_bat_entries); + } + } +} + +void mmu_pat_ctx_changed() +{ + if (!gTLBFlushPatEntries) { + gTLBFlushPatEntries = true; + add_ctx_sync_action(&tlb_flush_pat_entries); } } @@ -437,7 +481,9 @@ static bool search_pteg( return false; } -static uint32_t page_address_translate(uint32_t la, bool is_instr_fetch, unsigned msr_pr, int is_write) { +static PATResult page_address_translate(uint32_t la, bool is_instr_fetch, + unsigned msr_pr, int is_write) +{ uint32_t sr_val, page_index, pteg_hash1, vsid, pte_word2; unsigned key, pp; uint8_t* pte_addr; @@ -497,8 +543,12 @@ static uint32_t page_address_translate(uint32_t la, bool is_instr_fetch, unsigne pte_addr[7] |= 0x80; } - /* return physical address */ - return ((pte_word2 & 0xFFFFF000) | (la & 0x00000FFF)); + /* return physical address, access protection and C status */ + return PATResult{ + ((pte_word2 & 0xFFFFF000) | (la & 0x00000FFF)), + static_cast((key << 2) | pp), + static_cast(pte_word2 & 0x80) + }; } /** PowerPC-style MMU instruction address translation. */ @@ -535,7 +585,8 @@ static uint32_t ppc_mmu_instr_translate(uint32_t la) { /* page address translation */ if (!bat_hit) { - pa = page_address_translate(la, true, msr_pr, 0); + PATResult pat_res = page_address_translate(la, true, msr_pr, 0); + pa = pat_res.phys; #ifdef MMU_PROFILING ptab_transl_total++; @@ -581,7 +632,8 @@ static uint32_t ppc_mmu_addr_translate(uint32_t la, int is_write) { /* page address translation */ if (!bat_hit) { - pa = page_address_translate(la, false, msr_pr, is_write); + PATResult pat_res = page_address_translate(la, false, msr_pr, is_write); + pa = pat_res.phys; #ifdef MMU_PROFILING ptab_transl_total++; @@ -635,73 +687,6 @@ static void mem_write_unaligned(uint32_t addr, uint32_t value, uint32_t size) { } } -void mem_write_byte(uint32_t addr, uint8_t value) { - /* data address translation if enabled */ - if (ppc_state.msr & 0x10) { - addr = ppc_mmu_addr_translate(addr, 1); - } - - write_phys_mem(&last_write_area, addr, value); -} - -void mem_write_word(uint32_t addr, uint16_t value) { - if (addr & 1) { - mem_write_unaligned(addr, value, 2); - return; - } - - /* data address translation if enabled */ - if (ppc_state.msr & 0x10) { - addr = ppc_mmu_addr_translate(addr, 1); - } - - write_phys_mem(&last_write_area, addr, value); -} - -void mem_write_dword(uint32_t addr, uint32_t value) { - if (addr & 3) { - mem_write_unaligned(addr, value, 4); - return; - } - - /* data address translation if enabled */ - if (ppc_state.msr & 0x10) { - addr = ppc_mmu_addr_translate(addr, 1); - } - - write_phys_mem(&last_write_area, addr, value); -} - -void mem_write_qword(uint32_t addr, uint64_t value) { - if (addr & 7) { - LOG_F(ERROR, "SOS! Attempt to write unaligned QWORD to 0x%08X\n", addr); - exit(-1); // FIXME! - } - - /* data address translation if enabled */ - if (ppc_state.msr & 0x10) { - addr = ppc_mmu_addr_translate(addr, 1); - } - - write_phys_mem(&last_write_area, addr, value); -} - - -#define PAGE_SIZE_BITS 12 -#define TLB_SIZE 4096 -#define TLB2_WAYS 4 -#define TLB_INVALID_TAG 0xFFFFFFFF - -typedef struct TLBEntry { - uint32_t tag; - uint16_t flags; - uint16_t lru_bits; - union { - int64_t host_va_offset; - AddressMapEntry* reg_desc; - }; -} TLBEntry; - // primary TLB for all MMU modes static std::array mode1_tlb1; static std::array mode2_tlb1; @@ -721,13 +706,13 @@ uint32_t tlb_size_mask = TLB_SIZE - 1; uint64_t UnmappedVal = -1ULL; TLBEntry UnmappedMem = {TLB_INVALID_TAG, 0, 0, 0}; -uint8_t MMUMode = {0xFF}; +uint8_t CurMMUMode = {0xFF}; // current MMU mode void mmu_change_mode() { uint8_t mmu_mode = ((ppc_state.msr >> 3) & 0x2) | ((ppc_state.msr >> 14) & 1); - if (MMUMode != mmu_mode) { + if (CurMMUMode != mmu_mode) { switch(mmu_mode) { case 0: // real address mode pCurTLB1 = &mode1_tlb1[0]; @@ -742,7 +727,7 @@ void mmu_change_mode() pCurTLB2 = &mode3_tlb2[0]; break; } - MMUMode = mmu_mode; + CurMMUMode = mmu_mode; } } @@ -817,6 +802,7 @@ static TLBEntry* tlb2_target_entry(uint32_t gp_va) static TLBEntry* tlb2_refill(uint32_t guest_va, int is_write) { uint32_t phys_addr; + uint16_t flags = 0; TLBEntry *tlb_entry; const uint32_t tag = guest_va & ~0xFFFUL; @@ -828,18 +814,40 @@ static TLBEntry* tlb2_refill(uint32_t guest_va, int is_write) if (bat_res.hit) { // check block protection if (!bat_res.prot || ((bat_res.prot & 1) && is_write)) { + LOG_F(WARNING, "BAT DSI exception in TLB2 refill!"); + LOG_F(WARNING, "Attempt to write to read-only region, LA=0x%08X, PC=0x%08X!", guest_va, ppc_state.pc); + //UnmappedMem.tag = tag; + //UnmappedMem.host_va_offset = (int64_t)(&UnmappedVal) - guest_va; + //return &UnmappedMem; ppc_state.spr[SPR::DSISR] = 0x08000000 | (is_write << 25); ppc_state.spr[SPR::DAR] = guest_va; mmu_exception_handler(Except_Type::EXC_DSI, 0); } phys_addr = bat_res.phys; + flags = TLBFlags::PTE_SET_C; // prevent PTE.C updates for BAT + flags |= TLBFlags::TLBE_FROM_BAT; // tell the world we come from + if (bat_res.prot == 2) { + flags |= TLBFlags::PAGE_WRITABLE; + } } else { // page address translation - phys_addr = page_address_translate(guest_va, false, - !!(ppc_state.msr & 0x4000), is_write); + PATResult pat_res = page_address_translate(guest_va, false, + !!(ppc_state.msr & 0x4000), is_write); + phys_addr = pat_res.phys; + flags = TLBFlags::TLBE_FROM_PAT; // tell the world we come from + if (pat_res.prot <= 2 || pat_res.prot == 6) { + flags |= TLBFlags::PAGE_WRITABLE; + } + if (is_write || pat_res.pte_c_status) { + // C-bit of the PTE is already set so the TLB logic + // doesn't need to update it anymore + flags |= TLBFlags::PTE_SET_C; + } } - } else { + } else { // data translation disabled phys_addr = guest_va; + flags = TLBFlags::PTE_SET_C; // no PTE.C updates in real addressing mode + flags |= TLBFlags::PAGE_WRITABLE; // assume physical pages are writable } // look up host virtual address @@ -848,11 +856,11 @@ static TLBEntry* tlb2_refill(uint32_t guest_va, int is_write) // refill the secondary TLB tlb_entry = tlb2_target_entry(tag); tlb_entry->tag = tag; - if (reg_desc->type & RT_MMIO) { - tlb_entry->flags = 2; // MMIO region + if (reg_desc->type & RT_MMIO) { // MMIO region + tlb_entry->flags = flags | TLBFlags::PAGE_IO; tlb_entry->reg_desc = reg_desc; - } else { - tlb_entry->flags = 1; // memory region backed by host memory + } else { // memory region backed by host memory + tlb_entry->flags = flags | TLBFlags::PAGE_MEM; tlb_entry->host_va_offset = (int64_t)reg_desc->mem_ptr - guest_va + (phys_addr - reg_desc->start); } @@ -865,7 +873,7 @@ static TLBEntry* tlb2_refill(uint32_t guest_va, int is_write) } } -void flush_tlb_entry(uint32_t ea) +void tlb_flush_entry(uint32_t ea) { TLBEntry *tlb_entry, *tlb1, *tlb2; @@ -878,12 +886,12 @@ void flush_tlb_entry(uint32_t ea) tlb2 = &mode1_tlb2[0]; break; case 1: - tlb1 = &mode1_tlb1[0]; - tlb2 = &mode1_tlb2[0]; + tlb1 = &mode2_tlb1[0]; + tlb2 = &mode2_tlb2[0]; break; case 2: - tlb1 = &mode1_tlb1[0]; - tlb2 = &mode1_tlb2[0]; + tlb1 = &mode3_tlb1[0]; + tlb2 = &mode3_tlb2[0]; break; } @@ -905,6 +913,53 @@ void flush_tlb_entry(uint32_t ea) } } +void tlb_flush_entries(TLBFlags type) +{ + int i; + + // Flush BAT entries from the primary TLBs + for (i = 0; i < TLB_SIZE; i++) { + if (mode2_tlb1[i].flags & type) { + mode2_tlb1[i].tag = TLB_INVALID_TAG; + } + + if (mode3_tlb1[i].flags & type) { + mode3_tlb1[i].tag = TLB_INVALID_TAG; + } + } + + // Flush BAT entries from the secondary TLBs + for (i = 0; i < TLB_SIZE * TLB2_WAYS; i++) { + if (mode2_tlb2[i].flags & type) { + mode2_tlb2[i].tag = TLB_INVALID_TAG; + } + + if (mode3_tlb2[i].flags & type) { + mode3_tlb2[i].tag = TLB_INVALID_TAG; + } + } +} + +void tlb_flush_bat_entries() +{ + if (!gTLBFlushBatEntries) + return; + + tlb_flush_entries(TLBE_FROM_BAT); + + gTLBFlushBatEntries = false; +} + +void tlb_flush_pat_entries() +{ + if (!gTLBFlushPatEntries) + return; + + tlb_flush_entries(TLBE_FROM_PAT); + + gTLBFlushPatEntries = false; +} + static inline uint64_t tlb_translate_addr(uint32_t guest_va) { TLBEntry *tlb1_entry, *tlb2_entry; @@ -951,10 +1006,10 @@ static inline uint64_t tlb_translate_addr(uint32_t guest_va) tlb2_entry = tlb2_refill(guest_va, 0); } - if (tlb2_entry->flags & 1) { // is it a real memory region? + if (tlb2_entry->flags & TLBFlags::PAGE_MEM) { // is it a real memory region? // refill the primary TLB tlb1_entry->tag = tag; - tlb1_entry->flags = 1; + tlb1_entry->flags = tlb2_entry->flags; tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; MemAccessType = true; MemAddr = tlb1_entry->host_va_offset + guest_va; @@ -1056,7 +1111,10 @@ static inline TLBEntry * lookup_secondary_tlb(uint32_t guest_va, uint32_t tag) { return tlb_entry; } +// Forward declarations. static uint32_t read_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t size); +static void write_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t value, + uint32_t size); template inline T mmu_read_vmem(uint32_t guest_va) { @@ -1078,10 +1136,10 @@ inline T mmu_read_vmem(uint32_t guest_va) { tlb2_entry = tlb2_refill(guest_va, 0); } - if (tlb2_entry->flags & 1) { // is it a real memory region? + if (tlb2_entry->flags & TLBFlags::PAGE_MEM) { // is it a real memory region? // refill the primary TLB tlb1_entry->tag = tag; - tlb1_entry->flags = 1; + tlb1_entry->flags = tlb2_entry->flags; tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); } else { // otherwise, it's an access to a memory-mapped device @@ -1117,6 +1175,110 @@ template uint16_t mmu_read_vmem(uint32_t guest_va); template uint32_t mmu_read_vmem(uint32_t guest_va); template uint64_t mmu_read_vmem(uint32_t guest_va); +template +inline void mmu_write_vmem(uint32_t guest_va, T value) { + TLBEntry *tlb1_entry, *tlb2_entry; + uint8_t *host_va; + + const uint32_t tag = guest_va & ~0xFFFUL; + + // look up guest virtual address in the primary TLB + tlb1_entry = &pCurTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; + if (tlb1_entry->tag == tag) { // primary TLB hit -> fast path + if (!(tlb1_entry->flags & TLBFlags::PAGE_WRITABLE)) { + ppc_state.spr[SPR::DSISR] = 0x08000000 | (1 << 25); + ppc_state.spr[SPR::DAR] = guest_va; + mmu_exception_handler(Except_Type::EXC_DSI, 0); + } + if (!(tlb1_entry->flags & TLBFlags::PTE_SET_C)) { + // perform full page address translation to update PTE.C bit + PATResult pat_res = page_address_translate(guest_va, false, + !!(ppc_state.msr & 0x4000), true); + tlb1_entry->flags |= TLBFlags::PTE_SET_C; + + // don't forget to update the secondary TLB as well + tlb2_entry = lookup_secondary_tlb(guest_va, tag); + if (tlb2_entry != nullptr) { + tlb2_entry->flags |= TLBFlags::PTE_SET_C; + } + } + host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); + MemAccessType = true; + MemAddr = (uint64_t)host_va; + } else { + // primary TLB miss -> look up address in the secondary TLB + tlb2_entry = lookup_secondary_tlb(guest_va, tag); + if (tlb2_entry == nullptr) { + // secondary TLB miss -> + // perform full address translation and refill the secondary TLB + tlb2_entry = tlb2_refill(guest_va, 1); + } + + if (!(tlb2_entry->flags & TLBFlags::PAGE_WRITABLE)) { + LOG_F(WARNING, "DSI Exception in mmu_write_vmem! PC=0x%08X", ppc_state.pc); + //return; + ppc_state.spr[SPR::DSISR] = 0x08000000 | (1 << 25); + ppc_state.spr[SPR::DAR] = guest_va; + mmu_exception_handler(Except_Type::EXC_DSI, 0); + } + + if (!(tlb2_entry->flags & TLBFlags::PTE_SET_C)) { + // perform full page address translation to update PTE.C bit + PATResult pat_res = page_address_translate(guest_va, false, + !!(ppc_state.msr & 0x4000), true); + tlb2_entry->flags |= TLBFlags::PTE_SET_C; + } + + if (tlb2_entry->flags & TLBFlags::PAGE_MEM) { // is it a real memory region? + // refill the primary TLB + tlb1_entry->tag = tag; + tlb1_entry->flags = tlb2_entry->flags; + tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; + host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); + //MemAccessType = true; + //MemAddr = (uint64_t)host_va; + } else { // otherwise, it's an access to a memory-mapped device + tlb2_entry->reg_desc->devobj->write(tlb2_entry->reg_desc->start, + guest_va - tlb2_entry->reg_desc->start, value, sizeof(T)); + //MemAccessType = false; + //Device = tlb2_entry->reg_desc->devobj; + //DevOffset = guest_va - tlb2_entry->reg_desc->start; + return; + } + } + + // handle unaligned memory accesses + if (sizeof(T) > 1 && (guest_va & (sizeof(T) - 1))) { + write_unaligned(guest_va, host_va, value, sizeof(T)); + return; + } + +#if 1 + // handle aligned memory accesses + switch(sizeof(T)) { + case 1: + *host_va = value; + break; + case 2: + WRITE_WORD_BE_A(host_va, value); + break; + case 4: + WRITE_DWORD_BE_A(host_va, value); + break; + case 8: + WRITE_QWORD_BE_A(host_va, value); + break; + } +#endif +} + +// explicitely instantiate all required mmu_write_vmem variants +// to avoid linking errors +template void mmu_write_vmem(uint32_t guest_va, uint8_t value); +template void mmu_write_vmem(uint32_t guest_va, uint16_t value); +template void mmu_write_vmem(uint32_t guest_va, uint32_t value); +template void mmu_write_vmem(uint32_t guest_va, uint64_t value); + static uint32_t read_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t size) { uint32_t result = 0; @@ -1142,6 +1304,99 @@ static uint32_t read_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t siz return result; } +static void write_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t value, + uint32_t size) +{ + // is it a misaligned cross-page write? + if (((guest_va & 0xFFF) + size) > 0x1000) { + Unaligned_crosspage = true; + + // Break such a memory access into multiple, bytewise accesses. + // Because such accesses suffer a performance penalty, they will be + // presumably very rare so don't waste time optimizing the code below. + + uint32_t shift = (size - 1) * 8; + + for (int i = 0; i < size; shift -= 8, guest_va++, i++) { + mmu_write_vmem(guest_va, (value >> shift) & 0xFF); + } + } else { + Unaligned_crosspage = false; +#if 1 + switch(size) { + case 2: + WRITE_WORD_BE_U(host_va, value); + break; + case 4: + WRITE_DWORD_BE_U(host_va, value); + break; + case 8: // FIXME: should we raise alignment exception here? + WRITE_QWORD_BE_U(host_va, value); + break; + } +#endif + } +} + +void mem_write_byte(uint32_t addr, uint8_t value) { + mmu_write_vmem(addr, value); + + /* data address translation if enabled */ + if (ppc_state.msr & 0x10) { + addr = ppc_mmu_addr_translate(addr, 1); + } + + write_phys_mem(&last_write_area, addr, value); +} + +void mem_write_word(uint32_t addr, uint16_t value) { + mmu_write_vmem(addr, value); + + if (addr & 1) { + mem_write_unaligned(addr, value, 2); + return; + } + + /* data address translation if enabled */ + if (ppc_state.msr & 0x10) { + addr = ppc_mmu_addr_translate(addr, 1); + } + + write_phys_mem(&last_write_area, addr, value); +} + +void mem_write_dword(uint32_t addr, uint32_t value) { + mmu_write_vmem(addr, value); + + if (addr & 3) { + mem_write_unaligned(addr, value, 4); + return; + } + + /* data address translation if enabled */ + if (ppc_state.msr & 0x10) { + addr = ppc_mmu_addr_translate(addr, 1); + } + + write_phys_mem(&last_write_area, addr, value); +} + +void mem_write_qword(uint32_t addr, uint64_t value) { + mmu_write_vmem(addr, value); + + if (addr & 7) { + LOG_F(ERROR, "SOS! Attempt to write unaligned QWORD to 0x%08X\n", addr); + exit(-1); // FIXME! + } + + /* data address translation if enabled */ + if (ppc_state.msr & 0x10) { + addr = ppc_mmu_addr_translate(addr, 1); + } + + write_phys_mem(&last_write_area, addr, value); +} + /** Grab a value from memory into a register */ uint8_t mem_grab_byte(uint32_t addr) { tlb_translate_addr(addr); diff --git a/cpu/ppc/ppcmmu.h b/cpu/ppc/ppcmmu.h index c05cc89..80ae209 100644 --- a/cpu/ppc/ppcmmu.h +++ b/cpu/ppc/ppcmmu.h @@ -27,6 +27,7 @@ along with this program. If not, see . #include #include #include +#include "devices/memctrlbase.h" /* Uncomment this to exhaustive MMU integrity checks. */ //#define MMU_INTEGRITY_CHECKS @@ -53,6 +54,36 @@ typedef struct BATResult { uint32_t phys; } BATResult; +/** Result of the page address translation. */ +typedef struct PATResult { + uint32_t phys; + uint8_t prot; + uint8_t pte_c_status; // status of the C bit of the PTE +} PATResult; + +#define PAGE_SIZE_BITS 12 +#define TLB_SIZE 4096 +#define TLB2_WAYS 4 +#define TLB_INVALID_TAG 0xFFFFFFFF + +typedef struct TLBEntry { + uint32_t tag; + uint16_t flags; + uint16_t lru_bits; + union { + int64_t host_va_offset; + AddressMapEntry* reg_desc; + }; +} TLBEntry; + +enum TLBFlags : uint16_t { + PAGE_MEM = 1 << 0, // memory page backed by host memory + PAGE_IO = 1 << 1, // memory mapped I/O page + TLBE_FROM_BAT = 1 << 2, // TLB entry has been translated with BAT + TLBE_FROM_PAT = 1 << 3, // TLB entry has been translated with PAT + PAGE_WRITABLE = 1 << 4, // page is writable + PTE_SET_C = 1 << 5, // tells if C bit of the PTE needs to be updated +}; extern void ibat_update(uint32_t bat_reg); extern void dbat_update(uint32_t bat_reg); @@ -60,7 +91,10 @@ extern void dbat_update(uint32_t bat_reg); extern uint8_t* mmu_get_dma_mem(uint32_t addr, uint32_t size); extern void mmu_change_mode(void); -extern void flush_tlb_entry(uint32_t ea); +extern void mmu_pat_ctx_changed(); +extern void tlb_flush_entry(uint32_t ea); +extern void tlb_flush_bat_entries(); +extern void tlb_flush_pat_entries(); extern void ppc_set_cur_instruction(const uint8_t* ptr); extern void mem_write_byte(uint32_t addr, uint8_t value); @@ -76,5 +110,7 @@ extern uint8_t* quickinstruction_translate(uint32_t address_grab); template extern T mmu_read_vmem(uint32_t guest_va); +template +extern void mmu_write_vmem(uint32_t guest_va, T value); #endif // PPCMEMORY_H diff --git a/cpu/ppc/ppcopcodes.cpp b/cpu/ppc/ppcopcodes.cpp index 1bf96c0..71e8572 100644 --- a/cpu/ppc/ppcopcodes.cpp +++ b/cpu/ppc/ppcopcodes.cpp @@ -26,12 +26,14 @@ along with this program. If not, see . #include #include #include +#include #include #include #include #include #include #include +#include uint32_t crf_d; uint32_t crf_s; @@ -188,6 +190,21 @@ inline void ppc_setsoov(uint32_t a, uint32_t b, uint32_t d) { } } +typedef std::function CtxSyncCallback; +std::vector gCtxSyncCallbacks; + +// perform context synchronization by executing registered actions if any +void do_ctx_sync() { + while (!gCtxSyncCallbacks.empty()) { + gCtxSyncCallbacks.back()(); + gCtxSyncCallbacks.pop_back(); + } +} + +void add_ctx_sync_action(const CtxSyncCallback &cb) { + gCtxSyncCallbacks.push_back(cb); +} + /** The core functionality of this PPC emulation is within all of these void functions. This is where the opcode tables in the ppcemumain.h come into play - reducing the number of @@ -768,6 +785,7 @@ void dppc_interpreter::ppc_mtsr() { reg_s = (ppc_cur_instruction >> 21) & 31; grab_sr = (ppc_cur_instruction >> 16) & 15; ppc_state.sr[grab_sr] = ppc_state.gpr[reg_s]; + mmu_pat_ctx_changed(); } } @@ -779,6 +797,7 @@ void dppc_interpreter::ppc_mtsrin() { ppc_grab_regssb(); grab_sr = ppc_result_b >> 28; ppc_state.sr[grab_sr] = ppc_result_d; + mmu_pat_ctx_changed(); } } @@ -853,6 +872,11 @@ void dppc_interpreter::ppc_mtspr() { ppc_state.spr[ref_spr] = ppc_state.gpr[reg_s]; } + if (ref_spr == SPR::SDR1) { + LOG_F(INFO, "SDR1 changed to 0x%08X", ppc_state.spr[SPR::SDR1]); + mmu_pat_ctx_changed(); + } + switch (ref_spr) { // Mirror the TBRs in the SPR range to the user-mode TBRs. case 284: @@ -1320,7 +1344,7 @@ void dppc_interpreter::ppc_eieio() { } void dppc_interpreter::ppc_isync() { - /* placeholder */ + do_ctx_sync(); } void dppc_interpreter::ppc_sync() { @@ -1362,10 +1386,10 @@ void dppc_interpreter::ppc_dcbz() { ppc_effective_address &= 0xFFFFFFE0; // align EA on a 32-byte boundary - mem_write_qword(ppc_effective_address, 0); - mem_write_qword((ppc_effective_address + 8), 0); - mem_write_qword((ppc_effective_address + 16), 0); - mem_write_qword((ppc_effective_address + 24), 0); + //mem_write_qword(ppc_effective_address, 0); + //mem_write_qword((ppc_effective_address + 8), 0); + //mem_write_qword((ppc_effective_address + 16), 0); + //mem_write_qword((ppc_effective_address + 24), 0); } @@ -1378,7 +1402,8 @@ void dppc_interpreter::ppc_stb() { ppc_grab_regssa(); ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += reg_a ? ppc_result_a : 0; - mem_write_byte(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_byte(ppc_effective_address, ppc_result_d); } void dppc_interpreter::ppc_stbx() { @@ -1387,7 +1412,8 @@ void dppc_interpreter::ppc_stbx() { #endif ppc_grab_regssab(); ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b; - mem_write_byte(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_byte(ppc_effective_address, ppc_result_d); } void dppc_interpreter::ppc_stbu() { @@ -1398,7 +1424,8 @@ void dppc_interpreter::ppc_stbu() { if (reg_a != 0) { ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += ppc_result_a; - mem_write_byte(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_byte(ppc_effective_address, ppc_result_d); ppc_state.gpr[reg_a] = ppc_effective_address; } else { ppc_exception_handler(Except_Type::EXC_PROGRAM, Exc_Cause::ILLEGAL_OP); @@ -1412,7 +1439,8 @@ void dppc_interpreter::ppc_stbux() { ppc_grab_regssab(); if (reg_a != 0) { ppc_effective_address = ppc_result_a + ppc_result_b; - mem_write_byte(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_byte(ppc_effective_address, ppc_result_d); ppc_state.gpr[reg_a] = ppc_effective_address; } else { ppc_exception_handler(Except_Type::EXC_PROGRAM, Exc_Cause::ILLEGAL_OP); @@ -1426,7 +1454,8 @@ void dppc_interpreter::ppc_sth() { ppc_grab_regssa(); ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += (reg_a > 0) ? ppc_result_a : 0; - mem_write_word(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_word(ppc_effective_address, ppc_result_d); } void dppc_interpreter::ppc_sthu() { @@ -1437,7 +1466,8 @@ void dppc_interpreter::ppc_sthu() { if (reg_a != 0) { ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += ppc_result_a; - mem_write_word(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_word(ppc_effective_address, ppc_result_d); ppc_state.gpr[reg_a] = ppc_effective_address; } else { ppc_exception_handler(Except_Type::EXC_PROGRAM, Exc_Cause::ILLEGAL_OP); @@ -1451,7 +1481,8 @@ void dppc_interpreter::ppc_sthux() { ppc_grab_regssab(); if (reg_a != 0) { ppc_effective_address = ppc_result_a + ppc_result_b; - mem_write_word(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_word(ppc_effective_address, ppc_result_d); ppc_state.gpr[reg_a] = ppc_effective_address; } else { ppc_exception_handler(Except_Type::EXC_PROGRAM, Exc_Cause::ILLEGAL_OP); @@ -1464,7 +1495,8 @@ void dppc_interpreter::ppc_sthx() { #endif ppc_grab_regssab(); ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b; - mem_write_word(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_word(ppc_effective_address, ppc_result_d); } void dppc_interpreter::ppc_sthbrx() { @@ -1474,8 +1506,10 @@ void dppc_interpreter::ppc_sthbrx() { ppc_grab_regssab(); ppc_effective_address = (reg_a == 0) ? ppc_result_b : (ppc_result_a + ppc_result_b); ppc_result_d = (uint32_t)(BYTESWAP_16((uint16_t)ppc_result_d)); - mem_write_word(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_word(ppc_effective_address, ppc_result_d); } + void dppc_interpreter::ppc_stw() { #ifdef CPU_PROFILING num_int_stores++; @@ -1483,7 +1517,8 @@ void dppc_interpreter::ppc_stw() { ppc_grab_regssa(); ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += reg_a ? ppc_result_a : 0; - mem_write_dword(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_dword(ppc_effective_address, ppc_result_d); } void dppc_interpreter::ppc_stwx() { @@ -1492,7 +1527,8 @@ void dppc_interpreter::ppc_stwx() { #endif ppc_grab_regssab(); ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b; - mem_write_dword(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_dword(ppc_effective_address, ppc_result_d); } void dppc_interpreter::ppc_stwcx() { @@ -1506,7 +1542,8 @@ void dppc_interpreter::ppc_stwcx() { ppc_grab_regssab(); ppc_effective_address = (reg_a == 0) ? ppc_result_b : (ppc_result_a + ppc_result_b); if (ppc_state.reserve) { - mem_write_dword(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_dword(ppc_effective_address, ppc_result_d); ppc_state.cr |= (ppc_state.spr[SPR::XER] & 0x80000000) ? 0x30000000 : 0x20000000; ppc_state.reserve = false; } else { @@ -1523,7 +1560,8 @@ void dppc_interpreter::ppc_stwu() { if (reg_a != 0) { ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF)); ppc_effective_address += ppc_result_a; - mem_write_dword(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_dword(ppc_effective_address, ppc_result_d); ppc_state.gpr[reg_a] = ppc_effective_address; } else { ppc_exception_handler(Except_Type::EXC_PROGRAM, Exc_Cause::ILLEGAL_OP); @@ -1537,7 +1575,8 @@ void dppc_interpreter::ppc_stwux() { ppc_grab_regssab(); if (reg_a != 0) { ppc_effective_address = ppc_result_a + ppc_result_b; - mem_write_dword(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_dword(ppc_effective_address, ppc_result_d); ppc_state.gpr[reg_a] = ppc_effective_address; } else { ppc_exception_handler(Except_Type::EXC_PROGRAM, Exc_Cause::ILLEGAL_OP); @@ -1551,7 +1590,8 @@ void dppc_interpreter::ppc_stwbrx() { ppc_grab_regssab(); ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b; ppc_result_d = BYTESWAP_32(ppc_result_d); - mem_write_dword(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_dword(ppc_effective_address, ppc_result_d); } void dppc_interpreter::ppc_stmw() { @@ -1568,7 +1608,8 @@ void dppc_interpreter::ppc_stmw() { } for (; reg_s <= 31; reg_s++) { - mem_write_dword(ppc_effective_address, ppc_state.gpr[reg_s]); + mmu_write_vmem(ppc_effective_address, ppc_state.gpr[reg_s]); + //mem_write_dword(ppc_effective_address, ppc_state.gpr[reg_s]); ppc_effective_address += 4; } } @@ -1886,20 +1927,26 @@ void dppc_interpreter::ppc_lswi() { while (grab_inb > 0) { switch (grab_inb) { case 1: - stringed_word = mem_grab_byte(ppc_effective_address) << 24; + stringed_word = mmu_read_vmem(ppc_effective_address) << 24; + //stringed_word = mem_grab_byte(ppc_effective_address) << 24; ppc_state.gpr[reg_d] = stringed_word; grab_inb = 0; break; case 2: - stringed_word = mem_grab_byte(ppc_effective_address) << 24; - stringed_word += mem_grab_byte(ppc_effective_address + 1) << 16; + stringed_word = mmu_read_vmem(ppc_effective_address) << 24; + //stringed_word = mem_grab_byte(ppc_effective_address) << 24; + stringed_word += mmu_read_vmem(ppc_effective_address + 1) << 16; + //stringed_word += mem_grab_byte(ppc_effective_address + 1) << 16; ppc_state.gpr[reg_d] = stringed_word; grab_inb = 0; break; case 3: - stringed_word = mem_grab_byte(ppc_effective_address) << 24; - stringed_word += mem_grab_byte(ppc_effective_address + 1) << 16; - stringed_word += mem_grab_byte(ppc_effective_address + 2) << 8; + stringed_word = mmu_read_vmem(ppc_effective_address) << 24; + //stringed_word = mem_grab_byte(ppc_effective_address) << 24; + stringed_word += mmu_read_vmem(ppc_effective_address + 1) << 16; + //stringed_word += mem_grab_byte(ppc_effective_address + 1) << 16; + stringed_word += mmu_read_vmem(ppc_effective_address + 2) << 8; + //stringed_word += mem_grab_byte(ppc_effective_address + 2) << 8; ppc_state.gpr[reg_d] = stringed_word; grab_inb = 0; break; @@ -1934,20 +1981,26 @@ void dppc_interpreter::ppc_lswx() { while (grab_inb > 0) { switch (grab_inb) { case 1: - stringed_word = mem_grab_byte(ppc_effective_address) << 24; + stringed_word = mmu_read_vmem(ppc_effective_address) << 24; + //stringed_word = mem_grab_byte(ppc_effective_address) << 24; ppc_state.gpr[reg_d] = stringed_word; grab_inb = 0; break; case 2: - stringed_word = mem_grab_byte(ppc_effective_address) << 24; - stringed_word += mem_grab_byte(ppc_effective_address + 1) << 16; + stringed_word = mmu_read_vmem(ppc_effective_address) << 24; + //stringed_word = mem_grab_byte(ppc_effective_address) << 24; + stringed_word += mmu_read_vmem(ppc_effective_address + 1) << 16; + //stringed_word += mem_grab_byte(ppc_effective_address + 1) << 16; ppc_state.gpr[reg_d] = stringed_word; grab_inb = 0; break; case 3: - stringed_word = mem_grab_byte(ppc_effective_address) << 24; - stringed_word += mem_grab_byte(ppc_effective_address + 1) << 16; - stringed_word += mem_grab_byte(ppc_effective_address + 2) << 8; + stringed_word = mmu_read_vmem(ppc_effective_address) << 24; + //stringed_word = mem_grab_byte(ppc_effective_address) << 24; + stringed_word += mmu_read_vmem(ppc_effective_address + 1) << 16; + //stringed_word += mem_grab_byte(ppc_effective_address + 1) << 16; + stringed_word += mmu_read_vmem(ppc_effective_address + 2) << 8; + //stringed_word += mem_grab_byte(ppc_effective_address + 2) << 8; ppc_state.gpr[reg_d] = stringed_word; grab_inb = 0; break; @@ -1973,22 +2026,29 @@ void dppc_interpreter::ppc_stswi() { while (grab_inb > 0) { switch (grab_inb) { case 1: - mem_write_byte(ppc_effective_address, (ppc_result_d >> 24)); + mmu_write_vmem(ppc_effective_address, (ppc_result_d >> 24)); + //mem_write_byte(ppc_effective_address, (ppc_result_d >> 24)); grab_inb = 0; break; case 2: - mem_write_byte(ppc_effective_address, ((ppc_result_d >> 24) & 0xFF)); - mem_write_byte((ppc_effective_address + 1), ((ppc_result_d >> 16) & 0xFF)); + mmu_write_vmem(ppc_effective_address, ((ppc_result_d >> 24) & 0xFF)); + //mem_write_byte(ppc_effective_address, ((ppc_result_d >> 24) & 0xFF)); + mmu_write_vmem((ppc_effective_address + 1), ((ppc_result_d >> 16) & 0xFF)); + //mem_write_byte((ppc_effective_address + 1), ((ppc_result_d >> 16) & 0xFF)); grab_inb = 0; break; case 3: - mem_write_byte(ppc_effective_address, ((ppc_result_d >> 24) & 0xFF)); - mem_write_byte((ppc_effective_address + 1), ((ppc_result_d >> 16) & 0xFF)); - mem_write_byte((ppc_effective_address + 2), ((ppc_result_d >> 8) & 0xFF)); + mmu_write_vmem(ppc_effective_address, ((ppc_result_d >> 24) & 0xFF)); + //mem_write_byte(ppc_effective_address, ((ppc_result_d >> 24) & 0xFF)); + mmu_write_vmem((ppc_effective_address + 1), ((ppc_result_d >> 16) & 0xFF)); + //mem_write_byte((ppc_effective_address + 1), ((ppc_result_d >> 16) & 0xFF)); + mmu_write_vmem((ppc_effective_address + 2), ((ppc_result_d >> 8) & 0xFF)); + //mem_write_byte((ppc_effective_address + 2), ((ppc_result_d >> 8) & 0xFF)); grab_inb = 0; break; default: - mem_write_dword(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_dword(ppc_effective_address, ppc_result_d); reg_s++; ppc_effective_address += 4; grab_inb -= 4; @@ -2007,22 +2067,29 @@ void dppc_interpreter::ppc_stswx() { while (grab_inb > 0) { switch (grab_inb) { case 1: - mem_write_byte(ppc_effective_address, (ppc_result_d >> 24)); + mmu_write_vmem(ppc_effective_address, (ppc_result_d >> 24)); + //mem_write_byte(ppc_effective_address, (ppc_result_d >> 24)); grab_inb = 0; break; case 2: - mem_write_byte(ppc_effective_address, ((ppc_result_d >> 24) & 0xFF)); - mem_write_byte((ppc_effective_address + 1), ((ppc_result_d >> 16) & 0xFF)); + mmu_write_vmem(ppc_effective_address, ((ppc_result_d >> 24) & 0xFF)); + //mem_write_byte(ppc_effective_address, ((ppc_result_d >> 24) & 0xFF)); + mmu_write_vmem((ppc_effective_address + 1), ((ppc_result_d >> 16) & 0xFF)); + //mem_write_byte((ppc_effective_address + 1), ((ppc_result_d >> 16) & 0xFF)); grab_inb = 0; break; case 3: - mem_write_byte(ppc_effective_address, ((ppc_result_d >> 24) & 0xFF)); - mem_write_byte((ppc_effective_address + 1), ((ppc_result_d >> 16) & 0xFF)); - mem_write_byte((ppc_effective_address + 2), ((ppc_result_d >> 8) & 0xFF)); + mmu_write_vmem(ppc_effective_address, ((ppc_result_d >> 24) & 0xFF)); + //mem_write_byte(ppc_effective_address, ((ppc_result_d >> 24) & 0xFF)); + mmu_write_vmem((ppc_effective_address + 1), ((ppc_result_d >> 16) & 0xFF)); + //mem_write_byte((ppc_effective_address + 1), ((ppc_result_d >> 16) & 0xFF)); + mmu_write_vmem((ppc_effective_address + 2), ((ppc_result_d >> 8) & 0xFF)); + //mem_write_byte((ppc_effective_address + 2), ((ppc_result_d >> 8) & 0xFF)); grab_inb = 0; break; default: - mem_write_dword(ppc_effective_address, ppc_result_d); + mmu_write_vmem(ppc_effective_address, ppc_result_d); + //mem_write_dword(ppc_effective_address, ppc_result_d); reg_s++; ppc_effective_address += 4; grab_inb -= 4; @@ -2037,7 +2104,7 @@ void dppc_interpreter::ppc_tlbie() { num_supervisor_instrs++; #endif - flush_tlb_entry(ppc_state.gpr[(ppc_cur_instruction >> 11) & 31]); + tlb_flush_entry(ppc_state.gpr[(ppc_cur_instruction >> 11) & 31]); } void dppc_interpreter::ppc_tlbia() { diff --git a/main.cpp b/main.cpp index e6169ad..c3a3a2c 100644 --- a/main.cpp +++ b/main.cpp @@ -118,8 +118,10 @@ int main(int argc, char** argv) { loguru::init(argc, argv); loguru::add_file("dingusppc.log", loguru::Append, 0); } else { - loguru::g_stderr_verbosity = 0; + loguru::g_preamble_uptime = false; + loguru::g_stderr_verbosity = loguru::Verbosity_INFO; loguru::init(argc, argv); + loguru::add_file("exceptions.log", loguru::Truncate, -7); } if (*machine_opt) { From a8f400287a67f850fa90f6bcd9a03ce1170ff930 Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Thu, 19 Aug 2021 11:28:18 +0200 Subject: [PATCH 08/14] Add TLB profiling. --- cpu/ppc/ppcmmu.cpp | 117 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 107 insertions(+), 10 deletions(-) diff --git a/cpu/ppc/ppcmmu.cpp b/cpu/ppc/ppcmmu.cpp index 12d656f..395de91 100644 --- a/cpu/ppc/ppcmmu.cpp +++ b/cpu/ppc/ppcmmu.cpp @@ -46,7 +46,8 @@ void (*mmu_exception_handler)(Except_Type exception_type, uint32_t srr1_bits); PPC_BAT_entry ibat_array[4] = {{0}}; PPC_BAT_entry dbat_array[4] = {{0}}; -//#define MMU_PROFILING // enable MMU profiling +#define MMU_PROFILING // uncomment this to enable MMU profiling +#define TLB_PROFILING // uncomment this to enable SoftTLB profiling /* MMU profiling */ #ifdef MMU_PROFILING @@ -135,6 +136,51 @@ public: }; #endif +/* SoftTLB profiling. */ +#ifdef TLB_PROFILING + +/* global variables for lightweight SoftTLB profiling */ +uint64_t num_primary_tlb_hits = 0; // number of hits in the primary TLB +uint64_t num_secondary_tlb_hits = 0; // number of hits in the secondary TLB +uint64_t num_tlb_refills = 0; // number of TLB refills +uint64_t num_entry_replacements = 0; // number of entry replacements + +#include "utils/profiler.h" +#include + +class TLBProfile : public BaseProfile { +public: + TLBProfile() : BaseProfile("PPC:MMU:TLB") {}; + + void populate_variables(std::vector& vars) { + vars.clear(); + + vars.push_back({.name = "Number of hits in the primary TLB", + .format = ProfileVarFmt::DEC, + .value = num_primary_tlb_hits}); + + vars.push_back({.name = "Number of hits in the secondary TLB", + .format = ProfileVarFmt::DEC, + .value = num_secondary_tlb_hits}); + + vars.push_back({.name = "Number of TLB refills", + .format = ProfileVarFmt::DEC, + .value = num_tlb_refills}); + + vars.push_back({.name = "Number of replaced TLB entries", + .format = ProfileVarFmt::DEC, + .value = num_entry_replacements}); + }; + + void reset() { + num_primary_tlb_hits = 0; + num_secondary_tlb_hits = 0; + num_tlb_refills = 0; + num_entry_replacements = 0; + }; +}; +#endif + /** Temporary TLB test variables. */ bool MemAccessType; // true - memory, false - I/O bool Unaligned_crosspage = false; @@ -767,6 +813,9 @@ static TLBEntry* tlb2_target_entry(uint32_t gp_va) tlb_entry[3].lru_bits = 0x3; return &tlb_entry[3]; } else { // no invalid blocks, replace an existing one according with the hLRU policy +#ifdef TLB_PROFILING + num_entry_replacements++; +#endif if (tlb_entry[0].lru_bits == 0) { // update LRU bits tlb_entry[0].lru_bits = 0x3; @@ -946,7 +995,7 @@ void tlb_flush_bat_entries() return; tlb_flush_entries(TLBE_FROM_BAT); - + gTLBFlushBatEntries = false; } @@ -954,9 +1003,9 @@ void tlb_flush_pat_entries() { if (!gTLBFlushPatEntries) return; - + tlb_flush_entries(TLBE_FROM_PAT); - + gTLBFlushPatEntries = false; } @@ -1126,15 +1175,26 @@ inline T mmu_read_vmem(uint32_t guest_va) { // look up guest virtual address in the primary TLB tlb1_entry = &pCurTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; if (tlb1_entry->tag == tag) { // primary TLB hit -> fast path +#ifdef TLB_PROFILING + num_primary_tlb_hits++; +#endif host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); } else { // primary TLB miss -> look up address in the secondary TLB tlb2_entry = lookup_secondary_tlb(guest_va, tag); if (tlb2_entry == nullptr) { +#ifdef TLB_PROFILING + num_tlb_refills++; +#endif // secondary TLB miss -> // perform full address translation and refill the secondary TLB tlb2_entry = tlb2_refill(guest_va, 0); } +#ifdef TLB_PROFILING + else { + num_secondary_tlb_hits++; + } +#endif if (tlb2_entry->flags & TLBFlags::PAGE_MEM) { // is it a real memory region? // refill the primary TLB @@ -1143,6 +1203,9 @@ inline T mmu_read_vmem(uint32_t guest_va) { tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); } else { // otherwise, it's an access to a memory-mapped device +#ifdef MMU_PROFILING + iomem_reads_total++; +#endif return ( tlb2_entry->reg_desc->devobj->read(tlb2_entry->reg_desc->start, guest_va - tlb2_entry->reg_desc->start, sizeof(T)) @@ -1150,6 +1213,10 @@ inline T mmu_read_vmem(uint32_t guest_va) { } } +#ifdef MMU_PROFILING + dmem_reads_total++; +#endif + // handle unaligned memory accesses if (sizeof(T) > 1 && (guest_va & (sizeof(T) - 1))) { return read_unaligned(guest_va, host_va, sizeof(T)); @@ -1185,6 +1252,9 @@ inline void mmu_write_vmem(uint32_t guest_va, T value) { // look up guest virtual address in the primary TLB tlb1_entry = &pCurTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; if (tlb1_entry->tag == tag) { // primary TLB hit -> fast path +#ifdef TLB_PROFILING + num_primary_tlb_hits++; +#endif if (!(tlb1_entry->flags & TLBFlags::PAGE_WRITABLE)) { ppc_state.spr[SPR::DSISR] = 0x08000000 | (1 << 25); ppc_state.spr[SPR::DAR] = guest_va; @@ -1209,10 +1279,18 @@ inline void mmu_write_vmem(uint32_t guest_va, T value) { // primary TLB miss -> look up address in the secondary TLB tlb2_entry = lookup_secondary_tlb(guest_va, tag); if (tlb2_entry == nullptr) { +#ifdef TLB_PROFILING + num_tlb_refills++; +#endif // secondary TLB miss -> // perform full address translation and refill the secondary TLB tlb2_entry = tlb2_refill(guest_va, 1); } +#ifdef TLB_PROFILING + else { + num_secondary_tlb_hits++; + } +#endif if (!(tlb2_entry->flags & TLBFlags::PAGE_WRITABLE)) { LOG_F(WARNING, "DSI Exception in mmu_write_vmem! PC=0x%08X", ppc_state.pc); @@ -1238,6 +1316,9 @@ inline void mmu_write_vmem(uint32_t guest_va, T value) { //MemAccessType = true; //MemAddr = (uint64_t)host_va; } else { // otherwise, it's an access to a memory-mapped device +#ifdef MMU_PROFILING + iomem_writes_total++; +#endif tlb2_entry->reg_desc->devobj->write(tlb2_entry->reg_desc->start, guest_va - tlb2_entry->reg_desc->start, value, sizeof(T)); //MemAccessType = false; @@ -1247,6 +1328,10 @@ inline void mmu_write_vmem(uint32_t guest_va, T value) { } } +#ifdef MMU_PROFILING + dmem_writes_total++; +#endif + // handle unaligned memory accesses if (sizeof(T) > 1 && (guest_va & (sizeof(T) - 1))) { write_unaligned(guest_va, host_va, value, sizeof(T)); @@ -1285,6 +1370,9 @@ static uint32_t read_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t siz // is it a misaligned cross-page read? if (((guest_va & 0xFFF) + size) > 0x1000) { +#ifdef MMU_PROFILING + unaligned_crossp_r++; +#endif // Break such a memory access into multiple, bytewise accesses. // Because such accesses suffer a performance penalty, they will be // presumably very rare so don't waste time optimizing the code below. @@ -1292,6 +1380,9 @@ static uint32_t read_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t siz result = (result << 8) | mmu_read_vmem(guest_va); } } else { +#ifdef MMU_PROFILING + unaligned_reads++; +#endif switch(size) { case 2: return READ_WORD_BE_U(host_va); @@ -1309,8 +1400,9 @@ static void write_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t value, { // is it a misaligned cross-page write? if (((guest_va & 0xFFF) + size) > 0x1000) { - Unaligned_crosspage = true; - +#ifdef MMU_PROFILING + unaligned_crossp_w++; +#endif // Break such a memory access into multiple, bytewise accesses. // Because such accesses suffer a performance penalty, they will be // presumably very rare so don't waste time optimizing the code below. @@ -1321,8 +1413,9 @@ static void write_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t value, mmu_write_vmem(guest_va, (value >> shift) & 0xFF); } } else { - Unaligned_crosspage = false; -#if 1 +#ifdef MMU_PROFILING + unaligned_writes++; +#endif switch(size) { case 2: WRITE_WORD_BE_U(host_va, value); @@ -1334,7 +1427,6 @@ static void write_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t value, WRITE_QWORD_BE_U(host_va, value); break; } -#endif } } @@ -1580,7 +1672,12 @@ void ppc_mmu_init() { mmu_change_mode(); #ifdef MMU_PROFILING - gProfilerObj->register_profile("PPC_MMU", + gProfilerObj->register_profile("PPC:MMU", std::unique_ptr(new MMUProfile())); #endif + +#ifdef TLB_PROFILING + gProfilerObj->register_profile("PPC:MMU:TLB", + std::unique_ptr(new TLBProfile())); +#endif } From c5f45c6f9af9f45244f62f5e2564b54dee558360 Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Thu, 19 Aug 2021 11:29:44 +0200 Subject: [PATCH 09/14] SIGINT invokes the built-in debugger. --- main.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/main.cpp b/main.cpp index c3a3a2c..915a894 100644 --- a/main.cpp +++ b/main.cpp @@ -29,6 +29,7 @@ along with this program. If not, see . #include "utils/profiler.h" #include "ppcemu.h" #include +#include #include #include #include @@ -40,6 +41,15 @@ along with this program. If not, see . using namespace std; +void sigint_handler(int signum) { + enter_debugger(); + + LOG_F(INFO, "Shutting down..."); + + delete gMachineObj.release(); + exit(0); +} + static string appDescription = string( "\nDingusPPC - Prototype 5bf5 (8/23/2020) " "\nWritten by divingkatae and maximumspatium " @@ -118,10 +128,8 @@ int main(int argc, char** argv) { loguru::init(argc, argv); loguru::add_file("dingusppc.log", loguru::Append, 0); } else { - loguru::g_preamble_uptime = false; loguru::g_stderr_verbosity = loguru::Verbosity_INFO; loguru::init(argc, argv); - loguru::add_file("exceptions.log", loguru::Truncate, -7); } if (*machine_opt) { @@ -163,6 +171,9 @@ int main(int argc, char** argv) { goto bail; } + // redirect SIGINT to our own handler + signal(SIGINT, sigint_handler); + #ifdef SDL if (SDL_Init(SDL_INIT_AUDIO)){ LOG_F(ERROR, "SDL_Init error: %s", SDL_GetError()); From 2a79c9a63c4ffad5c45cc9568323f65ce4622eae Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Thu, 19 Aug 2021 13:31:13 +0200 Subject: [PATCH 10/14] ppcmmu.c: restructure and clean up. --- cpu/ppc/ppcmmu.cpp | 94 +++++++----------------------------------- cpu/ppc/ppcmmu.h | 10 ++--- cpu/ppc/ppcopcodes.cpp | 1 - 3 files changed, 18 insertions(+), 87 deletions(-) diff --git a/cpu/ppc/ppcmmu.cpp b/cpu/ppc/ppcmmu.cpp index 395de91..6763602 100644 --- a/cpu/ppc/ppcmmu.cpp +++ b/cpu/ppc/ppcmmu.cpp @@ -19,7 +19,7 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . */ -/** @file PowerPC Memory management unit emulation. */ +/** @file PowerPC Memory Management Unit emulation. */ /* TODO: - implement TLB @@ -181,13 +181,6 @@ public: }; #endif -/** Temporary TLB test variables. */ -bool MemAccessType; // true - memory, false - I/O -bool Unaligned_crosspage = false; -uint64_t MemAddr = 0; -MMIODevice *Device = 0; -uint32_t DevOffset = 0; - /** remember recently used physical memory regions for quicker translation. */ AddressMapEntry last_read_area = {0xFFFFFFFF, 0xFFFFFFFF}; @@ -215,16 +208,6 @@ static inline T read_phys_mem(AddressMapEntry *mru_rgn, uint32_t addr) dmem_reads_total++; #endif - if (!MemAccessType) { - LOG_F(ERROR, "TLB real memory access expected!"); - } - - if ((mru_rgn->mem_ptr + (addr - mru_rgn->start)) != (uint8_t *)MemAddr) { - LOG_F(ERROR, "TLB address mismatch! Expected: 0x%llx, got: 0x%llx", - (uint64_t)(mru_rgn->mem_ptr + (addr - mru_rgn->start)), - (uint64_t)MemAddr); - } - switch(sizeof(T)) { case 1: return *(mru_rgn->mem_ptr + (addr - mru_rgn->start)); @@ -252,14 +235,6 @@ static inline T read_phys_mem(AddressMapEntry *mru_rgn, uint32_t addr) #ifdef MMU_PROFILING iomem_reads_total++; #endif - if (MemAccessType) { - LOG_F(ERROR, "TLB I/O memory access expected!"); - } - - if (mru_rgn->devobj != Device || (addr - mru_rgn->start) != DevOffset) { - LOG_F(ERROR, "TLB MMIO access mismatch! Expected: 0x%X, got: 0x%X", - addr - mru_rgn->start, DevOffset); - } return (mru_rgn->devobj->read(mru_rgn->start, addr - mru_rgn->start, sizeof(T))); @@ -287,20 +262,6 @@ static inline void write_phys_mem(AddressMapEntry *mru_rgn, uint32_t addr, T val dmem_writes_total++; #endif -#if 1 - if (!MemAccessType) { - LOG_F(ERROR, "TLB real memory access expected!"); - } - - if (!is_aligned && Unaligned_crosspage) { - LOG_F(WARNING, "Unaligned cross-page access ignored!"); - } else if ((mru_rgn->mem_ptr + (addr - mru_rgn->start)) != (uint8_t *)MemAddr) { - LOG_F(ERROR, "TLB address mismatch! Expected: 0x%llx, got: 0x%llx", - (uint64_t)(mru_rgn->mem_ptr + (addr - mru_rgn->start)), - (uint64_t)MemAddr); - } -#endif - switch(sizeof(T)) { case 1: *(mru_rgn->mem_ptr + (addr - mru_rgn->start)) = value; @@ -333,17 +294,6 @@ static inline void write_phys_mem(AddressMapEntry *mru_rgn, uint32_t addr, T val iomem_writes_total++; #endif -#if 1 - if (MemAccessType) { - LOG_F(ERROR, "TLB I/O memory access expected!"); - } - - if (mru_rgn->devobj != Device || (addr - mru_rgn->start) != DevOffset) { - LOG_F(ERROR, "TLB MMIO access mismatch! Expected: 0x%X, got: 0x%X", - addr - mru_rgn->start, DevOffset); - } -#endif - mru_rgn->devobj->write(mru_rgn->start, addr - mru_rgn->start, value, sizeof(T)); } else { @@ -375,6 +325,10 @@ void ppc_set_cur_instruction(const uint8_t* ptr) { bool gTLBFlushBatEntries = false; bool gTLBFlushPatEntries = false; +// Forward declarations. +void tlb_flush_bat_entries(); +void tlb_flush_pat_entries(); + void ibat_update(uint32_t bat_reg) { int upper_reg_num; uint32_t bl, hi_mask; @@ -527,7 +481,7 @@ static bool search_pteg( return false; } -static PATResult page_address_translate(uint32_t la, bool is_instr_fetch, +static PATResult page_address_translation(uint32_t la, bool is_instr_fetch, unsigned msr_pr, int is_write) { uint32_t sr_val, page_index, pteg_hash1, vsid, pte_word2; @@ -598,7 +552,7 @@ static PATResult page_address_translate(uint32_t la, bool is_instr_fetch, } /** PowerPC-style MMU instruction address translation. */ -static uint32_t ppc_mmu_instr_translate(uint32_t la) { +static uint32_t mmu_instr_translation(uint32_t la) { uint32_t pa; /* translated physical address */ bool bat_hit = false; @@ -631,7 +585,7 @@ static uint32_t ppc_mmu_instr_translate(uint32_t la) { /* page address translation */ if (!bat_hit) { - PATResult pat_res = page_address_translate(la, true, msr_pr, 0); + PATResult pat_res = page_address_translation(la, true, msr_pr, 0); pa = pat_res.phys; #ifdef MMU_PROFILING @@ -678,7 +632,7 @@ static uint32_t ppc_mmu_addr_translate(uint32_t la, int is_write) { /* page address translation */ if (!bat_hit) { - PATResult pat_res = page_address_translate(la, false, msr_pr, is_write); + PATResult pat_res = page_address_translation(la, false, msr_pr, is_write); pa = pat_res.phys; #ifdef MMU_PROFILING @@ -812,7 +766,7 @@ static TLBEntry* tlb2_target_entry(uint32_t gp_va) tlb_entry[2].lru_bits = 0x2; tlb_entry[3].lru_bits = 0x3; return &tlb_entry[3]; - } else { // no invalid blocks, replace an existing one according with the hLRU policy + } else { // no free entries, replace an existing one according with the hLRU policy #ifdef TLB_PROFILING num_entry_replacements++; #endif @@ -880,7 +834,7 @@ static TLBEntry* tlb2_refill(uint32_t guest_va, int is_write) } } else { // page address translation - PATResult pat_res = page_address_translate(guest_va, false, + PATResult pat_res = page_address_translation(guest_va, false, !!(ppc_state.msr & 0x4000), is_write); phys_addr = pat_res.phys; flags = TLBFlags::TLBE_FROM_PAT; // tell the world we come from @@ -1018,8 +972,6 @@ static inline uint64_t tlb_translate_addr(uint32_t guest_va) // look up address in the primary TLB tlb1_entry = &pCurTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; if (tlb1_entry->tag == tag) { // primary TLB hit -> fast path - MemAccessType = true; - MemAddr = tlb1_entry->host_va_offset + guest_va; return tlb1_entry->host_va_offset + guest_va; } else { // primary TLB miss -> look up address in the secondary TLB tlb2_entry = &pCurTLB2[((guest_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; @@ -1060,13 +1012,8 @@ static inline uint64_t tlb_translate_addr(uint32_t guest_va) tlb1_entry->tag = tag; tlb1_entry->flags = tlb2_entry->flags; tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; - MemAccessType = true; - MemAddr = tlb1_entry->host_va_offset + guest_va; return tlb1_entry->host_va_offset + guest_va; } else { // an attempt to access a memory-mapped device - MemAccessType = false; - Device = tlb2_entry->reg_desc->devobj; - DevOffset = guest_va - tlb2_entry->reg_desc->start; return guest_va - tlb2_entry->reg_desc->start; } } @@ -1236,7 +1183,6 @@ inline T mmu_read_vmem(uint32_t guest_va) { } // explicitely instantiate all required mmu_read_vmem variants -// to avoid linking errors template uint8_t mmu_read_vmem(uint32_t guest_va); template uint16_t mmu_read_vmem(uint32_t guest_va); template uint32_t mmu_read_vmem(uint32_t guest_va); @@ -1262,7 +1208,7 @@ inline void mmu_write_vmem(uint32_t guest_va, T value) { } if (!(tlb1_entry->flags & TLBFlags::PTE_SET_C)) { // perform full page address translation to update PTE.C bit - PATResult pat_res = page_address_translate(guest_va, false, + PATResult pat_res = page_address_translation(guest_va, false, !!(ppc_state.msr & 0x4000), true); tlb1_entry->flags |= TLBFlags::PTE_SET_C; @@ -1273,8 +1219,6 @@ inline void mmu_write_vmem(uint32_t guest_va, T value) { } } host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); - MemAccessType = true; - MemAddr = (uint64_t)host_va; } else { // primary TLB miss -> look up address in the secondary TLB tlb2_entry = lookup_secondary_tlb(guest_va, tag); @@ -1293,8 +1237,6 @@ inline void mmu_write_vmem(uint32_t guest_va, T value) { #endif if (!(tlb2_entry->flags & TLBFlags::PAGE_WRITABLE)) { - LOG_F(WARNING, "DSI Exception in mmu_write_vmem! PC=0x%08X", ppc_state.pc); - //return; ppc_state.spr[SPR::DSISR] = 0x08000000 | (1 << 25); ppc_state.spr[SPR::DAR] = guest_va; mmu_exception_handler(Except_Type::EXC_DSI, 0); @@ -1302,7 +1244,7 @@ inline void mmu_write_vmem(uint32_t guest_va, T value) { if (!(tlb2_entry->flags & TLBFlags::PTE_SET_C)) { // perform full page address translation to update PTE.C bit - PATResult pat_res = page_address_translate(guest_va, false, + PATResult pat_res = page_address_translation(guest_va, false, !!(ppc_state.msr & 0x4000), true); tlb2_entry->flags |= TLBFlags::PTE_SET_C; } @@ -1313,17 +1255,12 @@ inline void mmu_write_vmem(uint32_t guest_va, T value) { tlb1_entry->flags = tlb2_entry->flags; tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); - //MemAccessType = true; - //MemAddr = (uint64_t)host_va; } else { // otherwise, it's an access to a memory-mapped device #ifdef MMU_PROFILING iomem_writes_total++; #endif tlb2_entry->reg_desc->devobj->write(tlb2_entry->reg_desc->start, guest_va - tlb2_entry->reg_desc->start, value, sizeof(T)); - //MemAccessType = false; - //Device = tlb2_entry->reg_desc->devobj; - //DevOffset = guest_va - tlb2_entry->reg_desc->start; return; } } @@ -1338,7 +1275,6 @@ inline void mmu_write_vmem(uint32_t guest_va, T value) { return; } -#if 1 // handle aligned memory accesses switch(sizeof(T)) { case 1: @@ -1354,11 +1290,9 @@ inline void mmu_write_vmem(uint32_t guest_va, T value) { WRITE_QWORD_BE_A(host_va, value); break; } -#endif } // explicitely instantiate all required mmu_write_vmem variants -// to avoid linking errors template void mmu_write_vmem(uint32_t guest_va, uint8_t value); template void mmu_write_vmem(uint32_t guest_va, uint16_t value); template void mmu_write_vmem(uint32_t guest_va, uint32_t value); @@ -1556,7 +1490,7 @@ uint8_t* quickinstruction_translate(uint32_t addr) { /* perform instruction address translation if enabled */ if (ppc_state.msr & 0x20) { - addr = ppc_mmu_instr_translate(addr); + addr = mmu_instr_translation(addr); } if (addr >= last_exec_area.start && addr <= last_exec_area.end) { diff --git a/cpu/ppc/ppcmmu.h b/cpu/ppc/ppcmmu.h index 80ae209..6fcb8f2 100644 --- a/cpu/ppc/ppcmmu.h +++ b/cpu/ppc/ppcmmu.h @@ -19,10 +19,10 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . */ -// The opcodes for the processor - ppcopcodes.cpp +/** @file PowerPC Memory Management Unit definitions. */ -#ifndef PPCMEMORY_H -#define PPCMEMORY_H +#ifndef PPCMMU_H +#define PPCMMU_H #include #include @@ -93,8 +93,6 @@ extern uint8_t* mmu_get_dma_mem(uint32_t addr, uint32_t size); extern void mmu_change_mode(void); extern void mmu_pat_ctx_changed(); extern void tlb_flush_entry(uint32_t ea); -extern void tlb_flush_bat_entries(); -extern void tlb_flush_pat_entries(); extern void ppc_set_cur_instruction(const uint8_t* ptr); extern void mem_write_byte(uint32_t addr, uint8_t value); @@ -113,4 +111,4 @@ extern T mmu_read_vmem(uint32_t guest_va); template extern void mmu_write_vmem(uint32_t guest_va, T value); -#endif // PPCMEMORY_H +#endif // PPCMMU_H diff --git a/cpu/ppc/ppcopcodes.cpp b/cpu/ppc/ppcopcodes.cpp index 71e8572..5283b99 100644 --- a/cpu/ppc/ppcopcodes.cpp +++ b/cpu/ppc/ppcopcodes.cpp @@ -873,7 +873,6 @@ void dppc_interpreter::ppc_mtspr() { } if (ref_spr == SPR::SDR1) { - LOG_F(INFO, "SDR1 changed to 0x%08X", ppc_state.spr[SPR::SDR1]); mmu_pat_ctx_changed(); } From 501f24f0d3a54f07265bd3b9fb855e6f9068e8f9 Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Sat, 25 Sep 2021 10:16:48 +0200 Subject: [PATCH 11/14] ppcmmu: implement SoftTLB for instructions. --- cpu/ppc/ppcexec.cpp | 26 +-- cpu/ppc/ppcmmu.cpp | 375 ++++++++++++++++++++++++++++++++++---------- cpu/ppc/ppcmmu.h | 12 +- 3 files changed, 320 insertions(+), 93 deletions(-) diff --git a/cpu/ppc/ppcexec.cpp b/cpu/ppc/ppcexec.cpp index 124f16f..141dda0 100644 --- a/cpu/ppc/ppcexec.cpp +++ b/cpu/ppc/ppcexec.cpp @@ -309,7 +309,8 @@ void ppc_exec() timebase_counter += ((ppc_state.pc - glob_bb_start_la) >> 2) + 1; #endif glob_bb_start_la = bb_start_la = ppc_next_instruction_address; - pc_real = quickinstruction_translate(bb_start_la); + //pc_real = mmu_translate_imem(bb_start_la); + pc_real = mmu_translate_imem(bb_start_la); page_start = bb_start_la & 0xFFFFF000; ppc_state.pc = bb_start_la; bb_kind = BB_end_kind::BB_NONE; @@ -317,7 +318,8 @@ void ppc_exec() } /* initial MMU translation for the current code page. */ - pc_real = quickinstruction_translate(bb_start_la); + //pc_real = quickinstruction_translate(bb_start_la); + pc_real = mmu_translate_imem(bb_start_la); /* set current code page limits */ page_start = bb_start_la & 0xFFFFF000; @@ -335,7 +337,7 @@ again: glob_bb_start_la = bb_start_la = ppc_next_instruction_address; if ((ppc_next_instruction_address & 0xFFFFF000) != page_start) { page_start = bb_start_la & 0xFFFFF000; - pc_real = quickinstruction_translate(bb_start_la); + pc_real = mmu_translate_imem(bb_start_la); } else { pc_real += (int)bb_start_la - (int)ppc_state.pc; ppc_set_cur_instruction(pc_real); @@ -368,7 +370,8 @@ void ppc_exec_single() return; } - quickinstruction_translate(ppc_state.pc); + //quickinstruction_translate(ppc_state.pc); + mmu_translate_imem(ppc_state.pc); ppc_main_opcode(); if (bb_kind != BB_end_kind::BB_NONE) { ppc_state.pc = ppc_next_instruction_address; @@ -403,7 +406,7 @@ void ppc_exec_until(volatile uint32_t goal_addr) timebase_counter += ((ppc_state.pc - glob_bb_start_la) >> 2) + 1; #endif glob_bb_start_la = bb_start_la = ppc_next_instruction_address; - pc_real = quickinstruction_translate(bb_start_la); + pc_real = mmu_translate_imem(bb_start_la); page_start = bb_start_la & 0xFFFFF000; ppc_state.pc = bb_start_la; bb_kind = BB_end_kind::BB_NONE; @@ -411,7 +414,8 @@ void ppc_exec_until(volatile uint32_t goal_addr) } /* initial MMU translation for the current code page. */ - pc_real = quickinstruction_translate(bb_start_la); + //pc_real = quickinstruction_translate(bb_start_la); + pc_real = mmu_translate_imem(bb_start_la); /* set current code page limits */ page_start = bb_start_la & 0xFFFFF000; @@ -429,7 +433,7 @@ again: glob_bb_start_la = bb_start_la = ppc_next_instruction_address; if ((ppc_next_instruction_address & 0xFFFFF000) != page_start) { page_start = bb_start_la & 0xFFFFF000; - pc_real = quickinstruction_translate(bb_start_la); + pc_real = mmu_translate_imem(bb_start_la); } else { pc_real += (int)bb_start_la - (int)ppc_state.pc; ppc_set_cur_instruction(pc_real); @@ -463,7 +467,8 @@ void ppc_exec_dbg(volatile uint32_t start_addr, volatile uint32_t size) timebase_counter += ((ppc_state.pc - glob_bb_start_la) >> 2) + 1; #endif glob_bb_start_la = bb_start_la = ppc_next_instruction_address; - pc_real = quickinstruction_translate(bb_start_la); + //pc_real = quickinstruction_translate(bb_start_la); + pc_real = mmu_translate_imem(bb_start_la); page_start = bb_start_la & 0xFFFFF000; ppc_state.pc = bb_start_la; bb_kind = BB_end_kind::BB_NONE; @@ -472,7 +477,7 @@ void ppc_exec_dbg(volatile uint32_t start_addr, volatile uint32_t size) } /* initial MMU translation for the current code page. */ - pc_real = quickinstruction_translate(bb_start_la); + pc_real = mmu_translate_imem(bb_start_la); /* set current code page limits */ page_start = bb_start_la & 0xFFFFF000; @@ -490,7 +495,8 @@ again: glob_bb_start_la = bb_start_la = ppc_next_instruction_address; if ((ppc_next_instruction_address & 0xFFFFF000) != page_start) { page_start = bb_start_la & 0xFFFFF000; - pc_real = quickinstruction_translate(bb_start_la); + //pc_real = quickinstruction_translate(bb_start_la); + pc_real = mmu_translate_imem(bb_start_la); } else { pc_real += (int)bb_start_la - (int)ppc_state.pc; ppc_set_cur_instruction(pc_real); diff --git a/cpu/ppc/ppcmmu.cpp b/cpu/ppc/ppcmmu.cpp index 6763602..bf88fe7 100644 --- a/cpu/ppc/ppcmmu.cpp +++ b/cpu/ppc/ppcmmu.cpp @@ -46,8 +46,8 @@ void (*mmu_exception_handler)(Except_Type exception_type, uint32_t srr1_bits); PPC_BAT_entry ibat_array[4] = {{0}}; PPC_BAT_entry dbat_array[4] = {{0}}; -#define MMU_PROFILING // uncomment this to enable MMU profiling -#define TLB_PROFILING // uncomment this to enable SoftTLB profiling +//#define MMU_PROFILING // uncomment this to enable MMU profiling +//#define TLB_PROFILING // uncomment this to enable SoftTLB profiling /* MMU profiling */ #ifdef MMU_PROFILING @@ -140,10 +140,13 @@ public: #ifdef TLB_PROFILING /* global variables for lightweight SoftTLB profiling */ -uint64_t num_primary_tlb_hits = 0; // number of hits in the primary TLB -uint64_t num_secondary_tlb_hits = 0; // number of hits in the secondary TLB -uint64_t num_tlb_refills = 0; // number of TLB refills -uint64_t num_entry_replacements = 0; // number of entry replacements +uint64_t num_primary_itlb_hits = 0; // number of hits in the primary ITLB +uint64_t num_secondary_itlb_hits = 0; // number of hits in the secondary ITLB +uint64_t num_itlb_refills = 0; // number of ITLB refills +uint64_t num_primary_dtlb_hits = 0; // number of hits in the primary DTLB +uint64_t num_secondary_dtlb_hits = 0; // number of hits in the secondary DTLB +uint64_t num_dtlb_refills = 0; // number of DTLB refills +uint64_t num_entry_replacements = 0; // number of entry replacements #include "utils/profiler.h" #include @@ -155,17 +158,29 @@ public: void populate_variables(std::vector& vars) { vars.clear(); - vars.push_back({.name = "Number of hits in the primary TLB", + vars.push_back({.name = "Number of hits in the primary ITLB", .format = ProfileVarFmt::DEC, - .value = num_primary_tlb_hits}); + .value = num_primary_itlb_hits}); - vars.push_back({.name = "Number of hits in the secondary TLB", + vars.push_back({.name = "Number of hits in the secondary ITLB", .format = ProfileVarFmt::DEC, - .value = num_secondary_tlb_hits}); + .value = num_secondary_itlb_hits}); - vars.push_back({.name = "Number of TLB refills", + vars.push_back({.name = "Number of ITLB refills", .format = ProfileVarFmt::DEC, - .value = num_tlb_refills}); + .value = num_itlb_refills}); + + vars.push_back({.name = "Number of hits in the primary DTLB", + .format = ProfileVarFmt::DEC, + .value = num_primary_dtlb_hits}); + + vars.push_back({.name = "Number of hits in the secondary DTLB", + .format = ProfileVarFmt::DEC, + .value = num_secondary_dtlb_hits}); + + vars.push_back({.name = "Number of DTLB refills", + .format = ProfileVarFmt::DEC, + .value = num_dtlb_refills}); vars.push_back({.name = "Number of replaced TLB entries", .format = ProfileVarFmt::DEC, @@ -173,9 +188,9 @@ public: }; void reset() { - num_primary_tlb_hits = 0; - num_secondary_tlb_hits = 0; - num_tlb_refills = 0; + num_primary_dtlb_hits = 0; + num_secondary_dtlb_hits = 0; + num_dtlb_refills = 0; num_entry_replacements = 0; }; }; @@ -346,6 +361,11 @@ void ibat_update(uint32_t bat_reg) { bat_entry->hi_mask = hi_mask; bat_entry->phys_hi = ppc_state.spr[upper_reg_num + 1] & hi_mask; bat_entry->bepi = ppc_state.spr[upper_reg_num] & hi_mask; + + if (!gTLBFlushBatEntries) { + gTLBFlushBatEntries = true; + add_ctx_sync_action(&tlb_flush_bat_entries); + } } } @@ -393,7 +413,7 @@ static BATResult ppc_block_address_translation(uint32_t la) bool bat_hit = false; unsigned msr_pr = !!(ppc_state.msr & 0x4000); - bat_array = (type == BATType::Instruction) ? ibat_array : dbat_array; + bat_array = (type == BATType::IBAT) ? ibat_array : dbat_array; // Format: %XY // X - supervisor access bit, Y - problem/user access bit @@ -687,18 +707,30 @@ static void mem_write_unaligned(uint32_t addr, uint32_t value, uint32_t size) { } } -// primary TLB for all MMU modes -static std::array mode1_tlb1; -static std::array mode2_tlb1; -static std::array mode3_tlb1; +// primary ITLB for all MMU modes +static std::array itlb1_mode1; +static std::array itlb1_mode2; +static std::array itlb1_mode3; -// secondary TLB for all MMU modes -static std::array mode1_tlb2; -static std::array mode2_tlb2; -static std::array mode3_tlb2; +// secondary ITLB for all MMU modes +static std::array itlb2_mode1; +static std::array itlb2_mode2; +static std::array itlb2_mode3; -TLBEntry *pCurTLB1; // current primary TLB -TLBEntry *pCurTLB2; // current secondary TLB +// primary DTLB for all MMU modes +static std::array dtlb1_mode1; +static std::array dtlb1_mode2; +static std::array dtlb1_mode3; + +// secondary DTLB for all MMU modes +static std::array dtlb2_mode1; +static std::array dtlb2_mode2; +static std::array dtlb2_mode3; + +TLBEntry *pCurITLB1; // current primary ITLB +TLBEntry *pCurITLB2; // current secondary ITLB +TLBEntry *pCurDTLB1; // current primary DTLB +TLBEntry *pCurDTLB2; // current secondary DTLB uint32_t tlb_size_mask = TLB_SIZE - 1; @@ -706,28 +738,53 @@ uint32_t tlb_size_mask = TLB_SIZE - 1; uint64_t UnmappedVal = -1ULL; TLBEntry UnmappedMem = {TLB_INVALID_TAG, 0, 0, 0}; -uint8_t CurMMUMode = {0xFF}; // current MMU mode +uint8_t CurITLBMode = {0xFF}; // current ITLB mode +uint8_t CurDTLBMode = {0xFF}; // current DTLB mode void mmu_change_mode() { - uint8_t mmu_mode = ((ppc_state.msr >> 3) & 0x2) | ((ppc_state.msr >> 14) & 1); + uint8_t mmu_mode; - if (CurMMUMode != mmu_mode) { + // switch ITLB tables first + mmu_mode = ((ppc_state.msr >> 4) & 0x2) | ((ppc_state.msr >> 14) & 1); + + if (CurITLBMode != mmu_mode) { switch(mmu_mode) { case 0: // real address mode - pCurTLB1 = &mode1_tlb1[0]; - pCurTLB2 = &mode1_tlb2[0]; + pCurITLB1 = &dtlb1_mode1[0]; + pCurITLB2 = &dtlb2_mode1[0]; break; - case 2: // supervisor mode with data translation enabled - pCurTLB1 = &mode2_tlb1[0]; - pCurTLB2 = &mode2_tlb2[0]; + case 2: // supervisor mode with instruction translation enabled + pCurITLB1 = &dtlb1_mode2[0]; + pCurITLB2 = &dtlb2_mode2[0]; break; - case 3: // user mode with data translation enabled - pCurTLB1 = &mode3_tlb1[0]; - pCurTLB2 = &mode3_tlb2[0]; + case 3: // user mode with instruction translation enabled + pCurITLB1 = &dtlb1_mode3[0]; + pCurITLB2 = &dtlb2_mode3[0]; break; } - CurMMUMode = mmu_mode; + CurITLBMode = mmu_mode; + } + + // then switch DTLB tables + mmu_mode = ((ppc_state.msr >> 3) & 0x2) | ((ppc_state.msr >> 14) & 1); + + if (CurDTLBMode != mmu_mode) { + switch(mmu_mode) { + case 0: // real address mode + pCurDTLB1 = &dtlb1_mode1[0]; + pCurDTLB2 = &dtlb2_mode1[0]; + break; + case 2: // supervisor mode with data translation enabled + pCurDTLB1 = &dtlb1_mode2[0]; + pCurDTLB2 = &dtlb2_mode2[0]; + break; + case 3: // user mode with data translation enabled + pCurDTLB1 = &dtlb1_mode3[0]; + pCurDTLB2 = &dtlb2_mode3[0]; + break; + } + CurDTLBMode = mmu_mode; } } @@ -735,7 +792,7 @@ static TLBEntry* tlb2_target_entry(uint32_t gp_va) { TLBEntry *tlb_entry; - tlb_entry = &pCurTLB2[((gp_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; + tlb_entry = &pCurDTLB2[((gp_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; // select the target from invalid blocks first if (tlb_entry[0].tag == TLB_INVALID_TAG) { @@ -802,7 +859,56 @@ static TLBEntry* tlb2_target_entry(uint32_t gp_va) } } -static TLBEntry* tlb2_refill(uint32_t guest_va, int is_write) +static TLBEntry* itlb2_refill(uint32_t guest_va) +{ + uint32_t phys_addr; + TLBEntry *tlb_entry; + uint16_t flags = 0; + + /* instruction address translation if enabled */ + if (ppc_state.msr & 0x20) { + // attempt block address translation first + BATResult bat_res = ppc_block_address_translation(guest_va); + if (bat_res.hit) { + // check block protection + // only PP = 0 (no access) causes ISI exception + if (!bat_res.prot) { + mmu_exception_handler(Except_Type::EXC_ISI, 0x08000000); + } + phys_addr = bat_res.phys; + flags |= TLBFlags::TLBE_FROM_BAT; // tell the world we come from + } else { + // page address translation + PATResult pat_res = page_address_translation(guest_va, true, + !!(ppc_state.msr & 0x4000), 0); + phys_addr = pat_res.phys; + flags = TLBFlags::TLBE_FROM_PAT; // tell the world we come from + } + } else { // instruction translation disabled + phys_addr = guest_va; + } + + // look up host virtual address + AddressMapEntry* reg_desc = mem_ctrl_instance->find_range(phys_addr); + if (reg_desc) { + if (reg_desc->type & RT_MMIO) { + ABORT_F("Instruction fetch from MMIO region at 0x%08X!\n", phys_addr); + } + // refill the secondary TLB + const uint32_t tag = guest_va & ~0xFFFUL; + tlb_entry = tlb2_target_entry(tag); + tlb_entry->tag = tag; + tlb_entry->flags = flags | TLBFlags::PAGE_MEM; + tlb_entry->host_va_offset = (int64_t)reg_desc->mem_ptr - guest_va + + (phys_addr - reg_desc->start); + } else { + ABORT_F("Instruction fetch from unmapped memory at 0x%08X!\n", phys_addr); + } + + return tlb_entry; +} + +static TLBEntry* dtlb2_refill(uint32_t guest_va, int is_write) { uint32_t phys_addr; uint16_t flags = 0; @@ -813,7 +919,7 @@ static TLBEntry* tlb2_refill(uint32_t guest_va, int is_write) /* data address translation if enabled */ if (ppc_state.msr & 0x10) { // attempt block address translation first - BATResult bat_res = ppc_block_address_translation(guest_va); + BATResult bat_res = ppc_block_address_translation(guest_va); if (bat_res.hit) { // check block protection if (!bat_res.prot || ((bat_res.prot & 1) && is_write)) { @@ -882,19 +988,31 @@ void tlb_flush_entry(uint32_t ea) const uint32_t tag = ea & ~0xFFFUL; - for (int m = 0; m < 3; m++) { + for (int m = 0; m < 6; m++) { switch (m) { case 0: - tlb1 = &mode1_tlb1[0]; - tlb2 = &mode1_tlb2[0]; + tlb1 = &itlb1_mode1[0]; + tlb2 = &itlb2_mode1[0]; break; case 1: - tlb1 = &mode2_tlb1[0]; - tlb2 = &mode2_tlb2[0]; + tlb1 = &itlb1_mode2[0]; + tlb2 = &itlb2_mode2[0]; break; case 2: - tlb1 = &mode3_tlb1[0]; - tlb2 = &mode3_tlb2[0]; + tlb1 = &itlb1_mode3[0]; + tlb2 = &itlb2_mode3[0]; + break; + case 3: + tlb1 = &dtlb1_mode1[0]; + tlb2 = &dtlb2_mode1[0]; + break; + case 4: + tlb1 = &dtlb1_mode2[0]; + tlb2 = &dtlb2_mode2[0]; + break; + case 5: + tlb1 = &dtlb1_mode3[0]; + tlb2 = &dtlb2_mode3[0]; break; } @@ -922,23 +1040,23 @@ void tlb_flush_entries(TLBFlags type) // Flush BAT entries from the primary TLBs for (i = 0; i < TLB_SIZE; i++) { - if (mode2_tlb1[i].flags & type) { - mode2_tlb1[i].tag = TLB_INVALID_TAG; + if (dtlb1_mode2[i].flags & type) { + dtlb1_mode2[i].tag = TLB_INVALID_TAG; } - if (mode3_tlb1[i].flags & type) { - mode3_tlb1[i].tag = TLB_INVALID_TAG; + if (dtlb1_mode3[i].flags & type) { + dtlb1_mode3[i].tag = TLB_INVALID_TAG; } } // Flush BAT entries from the secondary TLBs for (i = 0; i < TLB_SIZE * TLB2_WAYS; i++) { - if (mode2_tlb2[i].flags & type) { - mode2_tlb2[i].tag = TLB_INVALID_TAG; + if (dtlb2_mode2[i].flags & type) { + dtlb2_mode2[i].tag = TLB_INVALID_TAG; } - if (mode3_tlb2[i].flags & type) { - mode3_tlb2[i].tag = TLB_INVALID_TAG; + if (dtlb2_mode3[i].flags & type) { + dtlb2_mode3[i].tag = TLB_INVALID_TAG; } } } @@ -970,11 +1088,11 @@ static inline uint64_t tlb_translate_addr(uint32_t guest_va) const uint32_t tag = guest_va & ~0xFFFUL; // look up address in the primary TLB - tlb1_entry = &pCurTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; + tlb1_entry = &pCurDTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; if (tlb1_entry->tag == tag) { // primary TLB hit -> fast path return tlb1_entry->host_va_offset + guest_va; } else { // primary TLB miss -> look up address in the secondary TLB - tlb2_entry = &pCurTLB2[((guest_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; + tlb2_entry = &pCurDTLB2[((guest_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; if (tlb2_entry->tag == tag) { // update LRU bits tlb2_entry[0].lru_bits = 0x3; @@ -1004,7 +1122,7 @@ static inline uint64_t tlb_translate_addr(uint32_t guest_va) tlb2_entry[3].lru_bits = 0x3; } else { // secondary TLB miss -> // perform full address translation and refill the secondary TLB - tlb2_entry = tlb2_refill(guest_va, 0); + tlb2_entry = dtlb2_refill(guest_va, 0); } if (tlb2_entry->flags & TLBFlags::PAGE_MEM) { // is it a real memory region? @@ -1070,10 +1188,16 @@ static uint32_t mem_grab_unaligned(uint32_t addr, uint32_t size) { return ret; } -static inline TLBEntry * lookup_secondary_tlb(uint32_t guest_va, uint32_t tag) { +template +static inline TLBEntry* lookup_secondary_tlb(uint32_t guest_va, uint32_t tag) { TLBEntry *tlb_entry; - tlb_entry = &pCurTLB2[((guest_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; + if (tlb_type == TLBType::ITLB) { + tlb_entry = &pCurITLB2[((guest_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; + } else { + tlb_entry = &pCurDTLB2[((guest_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; + } + if (tlb_entry->tag == tag) { // update LRU bits tlb_entry[0].lru_bits = 0x3; @@ -1120,26 +1244,26 @@ inline T mmu_read_vmem(uint32_t guest_va) { const uint32_t tag = guest_va & ~0xFFFUL; // look up guest virtual address in the primary TLB - tlb1_entry = &pCurTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; + tlb1_entry = &pCurDTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; if (tlb1_entry->tag == tag) { // primary TLB hit -> fast path #ifdef TLB_PROFILING - num_primary_tlb_hits++; + num_primary_dtlb_hits++; #endif host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); } else { // primary TLB miss -> look up address in the secondary TLB - tlb2_entry = lookup_secondary_tlb(guest_va, tag); + tlb2_entry = lookup_secondary_tlb(guest_va, tag); if (tlb2_entry == nullptr) { #ifdef TLB_PROFILING - num_tlb_refills++; + num_dtlb_refills++; #endif // secondary TLB miss -> // perform full address translation and refill the secondary TLB - tlb2_entry = tlb2_refill(guest_va, 0); + tlb2_entry = dtlb2_refill(guest_va, 0); } #ifdef TLB_PROFILING else { - num_secondary_tlb_hits++; + num_secondary_dtlb_hits++; } #endif @@ -1196,10 +1320,10 @@ inline void mmu_write_vmem(uint32_t guest_va, T value) { const uint32_t tag = guest_va & ~0xFFFUL; // look up guest virtual address in the primary TLB - tlb1_entry = &pCurTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; + tlb1_entry = &pCurDTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; if (tlb1_entry->tag == tag) { // primary TLB hit -> fast path #ifdef TLB_PROFILING - num_primary_tlb_hits++; + num_primary_dtlb_hits++; #endif if (!(tlb1_entry->flags & TLBFlags::PAGE_WRITABLE)) { ppc_state.spr[SPR::DSISR] = 0x08000000 | (1 << 25); @@ -1213,7 +1337,7 @@ inline void mmu_write_vmem(uint32_t guest_va, T value) { tlb1_entry->flags |= TLBFlags::PTE_SET_C; // don't forget to update the secondary TLB as well - tlb2_entry = lookup_secondary_tlb(guest_va, tag); + tlb2_entry = lookup_secondary_tlb(guest_va, tag); if (tlb2_entry != nullptr) { tlb2_entry->flags |= TLBFlags::PTE_SET_C; } @@ -1221,18 +1345,18 @@ inline void mmu_write_vmem(uint32_t guest_va, T value) { host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); } else { // primary TLB miss -> look up address in the secondary TLB - tlb2_entry = lookup_secondary_tlb(guest_va, tag); + tlb2_entry = lookup_secondary_tlb(guest_va, tag); if (tlb2_entry == nullptr) { #ifdef TLB_PROFILING - num_tlb_refills++; + num_dtlb_refills++; #endif // secondary TLB miss -> // perform full address translation and refill the secondary TLB - tlb2_entry = tlb2_refill(guest_va, 1); + tlb2_entry = dtlb2_refill(guest_va, 1); } #ifdef TLB_PROFILING else { - num_secondary_tlb_hits++; + num_secondary_dtlb_hits++; } #endif @@ -1481,6 +1605,52 @@ uint64_t mem_grab_qword(uint32_t addr) { return read_phys_mem(&last_read_area, addr); } +uint8_t *mmu_translate_imem(uint32_t vaddr) +{ + TLBEntry *tlb1_entry, *tlb2_entry; + uint8_t *host_va; + +#ifdef MMU_PROFILING + exec_reads_total++; +#endif + + const uint32_t tag = vaddr & ~0xFFFUL; + + // look up guest virtual address in the primary ITLB + tlb1_entry = &pCurITLB1[(vaddr >> PAGE_SIZE_BITS) & tlb_size_mask]; + if (tlb1_entry->tag == tag) { // primary ITLB hit -> fast path +#ifdef TLB_PROFILING + num_primary_itlb_hits++; +#endif + host_va = (uint8_t *)(tlb1_entry->host_va_offset + vaddr); + } else { + // primary ITLB miss -> look up address in the secondary ITLB + tlb2_entry = lookup_secondary_tlb(vaddr, tag); + if (tlb2_entry == nullptr) { +#ifdef TLB_PROFILING + num_itlb_refills++; +#endif + // secondary ITLB miss -> + // perform full address translation and refill the secondary ITLB + tlb2_entry = itlb2_refill(vaddr); + } +#ifdef TLB_PROFILING + else { + num_secondary_itlb_hits++; + } +#endif + // refill the primary ITLB + tlb1_entry->tag = tag; + tlb1_entry->flags = tlb2_entry->flags; + tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; + host_va = (uint8_t *)(tlb1_entry->host_va_offset + vaddr); + } + + ppc_set_cur_instruction(host_va); + + return host_va; +} + uint8_t* quickinstruction_translate(uint32_t addr) { uint8_t* real_addr; @@ -1560,43 +1730,86 @@ uint64_t mem_read_dbg(uint32_t virt_addr, uint32_t size) { void ppc_mmu_init() { mmu_exception_handler = ppc_exception_handler; - // invalidate all TLB entries - for(auto &tlb_el : mode1_tlb1) { + // invalidate all IDTLB entries + for (auto &tlb_el : itlb1_mode1) { tlb_el.tag = TLB_INVALID_TAG; tlb_el.flags = 0; tlb_el.lru_bits = 0; tlb_el.host_va_offset = 0; } - for(auto &tlb_el : mode2_tlb1) { + for (auto &tlb_el : itlb1_mode2) { tlb_el.tag = TLB_INVALID_TAG; tlb_el.flags = 0; tlb_el.lru_bits = 0; tlb_el.host_va_offset = 0; } - for(auto &tlb_el : mode3_tlb1) { + for (auto &tlb_el : itlb1_mode3) { tlb_el.tag = TLB_INVALID_TAG; tlb_el.flags = 0; tlb_el.lru_bits = 0; tlb_el.host_va_offset = 0; } - for(auto &tlb_el : mode1_tlb2) { + for (auto &tlb_el : itlb2_mode1) { tlb_el.tag = TLB_INVALID_TAG; tlb_el.flags = 0; tlb_el.lru_bits = 0; tlb_el.host_va_offset = 0; } - for(auto &tlb_el : mode2_tlb2) { + for (auto &tlb_el : itlb2_mode2) { tlb_el.tag = TLB_INVALID_TAG; tlb_el.flags = 0; tlb_el.lru_bits = 0; tlb_el.host_va_offset = 0; } - for(auto &tlb_el : mode3_tlb2) { + for (auto &tlb_el : itlb2_mode3) { + tlb_el.tag = TLB_INVALID_TAG; + tlb_el.flags = 0; + tlb_el.lru_bits = 0; + tlb_el.host_va_offset = 0; + } + + // invalidate all DTLB entries + for (auto &tlb_el : dtlb1_mode1) { + tlb_el.tag = TLB_INVALID_TAG; + tlb_el.flags = 0; + tlb_el.lru_bits = 0; + tlb_el.host_va_offset = 0; + } + + for (auto &tlb_el : dtlb1_mode2) { + tlb_el.tag = TLB_INVALID_TAG; + tlb_el.flags = 0; + tlb_el.lru_bits = 0; + tlb_el.host_va_offset = 0; + } + + for (auto &tlb_el : dtlb1_mode3) { + tlb_el.tag = TLB_INVALID_TAG; + tlb_el.flags = 0; + tlb_el.lru_bits = 0; + tlb_el.host_va_offset = 0; + } + + for (auto &tlb_el : dtlb2_mode1) { + tlb_el.tag = TLB_INVALID_TAG; + tlb_el.flags = 0; + tlb_el.lru_bits = 0; + tlb_el.host_va_offset = 0; + } + + for (auto &tlb_el : dtlb2_mode2) { + tlb_el.tag = TLB_INVALID_TAG; + tlb_el.flags = 0; + tlb_el.lru_bits = 0; + tlb_el.host_va_offset = 0; + } + + for (auto &tlb_el : dtlb2_mode3) { tlb_el.tag = TLB_INVALID_TAG; tlb_el.flags = 0; tlb_el.lru_bits = 0; diff --git a/cpu/ppc/ppcmmu.h b/cpu/ppc/ppcmmu.h index 6fcb8f2..b9dc5fa 100644 --- a/cpu/ppc/ppcmmu.h +++ b/cpu/ppc/ppcmmu.h @@ -43,8 +43,14 @@ typedef struct PPC_BAT_entry { /** Block address translation types. */ enum BATType : int { - Instruction, - Data + IBAT, + DBAT +}; + +/** TLB types. */ +enum TLBType : int { + ITLB, + DTLB }; /** Result of the block address translation. */ @@ -106,6 +112,8 @@ extern uint64_t mem_grab_qword(uint32_t addr); extern uint64_t mem_read_dbg(uint32_t virt_addr, uint32_t size); extern uint8_t* quickinstruction_translate(uint32_t address_grab); +uint8_t *mmu_translate_imem(uint32_t vaddr); + template extern T mmu_read_vmem(uint32_t guest_va); template From 5b54cd69ef048edbd231399e43e0697b5c0b46de Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Sat, 25 Sep 2021 10:26:28 +0200 Subject: [PATCH 12/14] ppcmmu: better fatal error handling with ABORT_F. --- cpu/ppc/ppcmmu.cpp | 28 +++++++++------------------- main.cpp | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/cpu/ppc/ppcmmu.cpp b/cpu/ppc/ppcmmu.cpp index bf88fe7..03d23a9 100644 --- a/cpu/ppc/ppcmmu.cpp +++ b/cpu/ppc/ppcmmu.cpp @@ -327,8 +327,7 @@ uint8_t* mmu_get_dma_mem(uint32_t addr, uint32_t size) { last_dma_area.mem_ptr = entry->mem_ptr; return last_dma_area.mem_ptr + (addr - last_dma_area.start); } else { - LOG_F(ERROR, "SOS: DMA access to unmapped memory %08X!\n", addr); - exit(-1); // FIXME: ugly error handling, must be the proper exception! + ABORT_F("SOS: DMA access to unmapped memory %08X!\n", addr); } } } @@ -458,8 +457,7 @@ static inline uint8_t* calc_pteg_addr(uint32_t hash) { last_ptab_area.mem_ptr = entry->mem_ptr; return last_ptab_area.mem_ptr + (pteg_addr - last_ptab_area.start); } else { - LOG_F(ERROR, "SOS: no page table region was found at %08X!\n", pteg_addr); - exit(-1); // FIXME: ugly error handling, must be the proper exception! + ABORT_F("SOS: no page table region was found at %08X!\n", pteg_addr); } } } @@ -479,8 +477,7 @@ static bool search_pteg( if (pte_check == READ_DWORD_BE_A(pteg_addr)) { if (match_found) { if ((READ_DWORD_BE_A(pteg_addr) & 0xFFFFF07B) != pte_word2_check) { - LOG_F(ERROR, "Multiple PTEs with different RPN/WIMG/PP found!\n"); - exit(-1); + ABORT_F("Multiple PTEs with different RPN/WIMG/PP found!\n"); } } else { /* isolate RPN, WIMG and PP fields */ @@ -510,8 +507,7 @@ static PATResult page_address_translation(uint32_t la, bool is_instr_fetch, sr_val = ppc_state.sr[(la >> 28) & 0x0F]; if (sr_val & 0x80000000) { - LOG_F(ERROR, "Direct-store segments not supported, LA=%0xX\n", la); - exit(-1); // FIXME: ugly error handling, must be the proper exception! + ABORT_F("Direct-store segments not supported, LA=%0xX\n", la); } /* instruction fetch from a no-execute segment will cause ISI exception */ @@ -923,11 +919,8 @@ static TLBEntry* dtlb2_refill(uint32_t guest_va, int is_write) if (bat_res.hit) { // check block protection if (!bat_res.prot || ((bat_res.prot & 1) && is_write)) { - LOG_F(WARNING, "BAT DSI exception in TLB2 refill!"); - LOG_F(WARNING, "Attempt to write to read-only region, LA=0x%08X, PC=0x%08X!", guest_va, ppc_state.pc); - //UnmappedMem.tag = tag; - //UnmappedMem.host_va_offset = (int64_t)(&UnmappedVal) - guest_va; - //return &UnmappedMem; + LOG_F(9, "BAT DSI exception in TLB2 refill!"); + LOG_F(9, "Attempt to write to read-only region, LA=0x%08X, PC=0x%08X!", guest_va, ppc_state.pc); ppc_state.spr[SPR::DSISR] = 0x08000000 | (is_write << 25); ppc_state.spr[SPR::DAR] = guest_va; mmu_exception_handler(Except_Type::EXC_DSI, 0); @@ -1535,8 +1528,7 @@ void mem_write_qword(uint32_t addr, uint64_t value) { mmu_write_vmem(addr, value); if (addr & 7) { - LOG_F(ERROR, "SOS! Attempt to write unaligned QWORD to 0x%08X\n", addr); - exit(-1); // FIXME! + ABORT_F("SOS! Attempt to write unaligned QWORD to 0x%08X\n", addr); } /* data address translation if enabled */ @@ -1593,8 +1585,7 @@ uint64_t mem_grab_qword(uint32_t addr) { tlb_translate_addr(addr); if (addr & 7) { - LOG_F(ERROR, "SOS! Attempt to read unaligned QWORD at 0x%08X\n", addr); - exit(-1); // FIXME! + ABORT_F("SOS! Attempt to read unaligned QWORD at 0x%08X\n", addr); } /* data address translation if enabled */ @@ -1675,8 +1666,7 @@ uint8_t* quickinstruction_translate(uint32_t addr) { real_addr = last_exec_area.mem_ptr + (addr - last_exec_area.start); ppc_set_cur_instruction(real_addr); } else { - LOG_F(WARNING, "attempt to execute code at %08X!\n", addr); - exit(-1); // FIXME: ugly error handling, must be the proper exception! + ABORT_F("Attempt to execute code at %08X!\n", addr); } } diff --git a/main.cpp b/main.cpp index 915a894..2b601ca 100644 --- a/main.cpp +++ b/main.cpp @@ -50,6 +50,12 @@ void sigint_handler(int signum) { exit(0); } +void sigabrt_handler(int signum) { + LOG_F(INFO, "Shutting down..."); + + delete gMachineObj.release(); +} + static string appDescription = string( "\nDingusPPC - Prototype 5bf5 (8/23/2020) " "\nWritten by divingkatae and maximumspatium " @@ -171,9 +177,19 @@ int main(int argc, char** argv) { goto bail; } + // graceful handling of fatal errors + loguru::set_fatal_handler([](const loguru::Message& message) { + enter_debugger(); + + abort(); + }); + // redirect SIGINT to our own handler signal(SIGINT, sigint_handler); + // redirect SIGABRT to our own handler + signal(SIGABRT, sigabrt_handler); + #ifdef SDL if (SDL_Init(SDL_INIT_AUDIO)){ LOG_F(ERROR, "SDL_Init error: %s", SDL_GetError()); From 212cd58f4015c6e5fa60d18b909d586587bace7e Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Sat, 25 Sep 2021 15:57:35 +0200 Subject: [PATCH 13/14] ppcmmu: refactor and clean up. --- cpu/ppc/ppcmmu.cpp | 2191 +++++++++++++++++++++-------------------- cpu/ppc/ppcmmu.h | 19 +- debugger/debugger.cpp | 2 +- 3 files changed, 1117 insertions(+), 1095 deletions(-) diff --git a/cpu/ppc/ppcmmu.cpp b/cpu/ppc/ppcmmu.cpp index 03d23a9..46bfcc7 100644 --- a/cpu/ppc/ppcmmu.cpp +++ b/cpu/ppc/ppcmmu.cpp @@ -22,9 +22,7 @@ along with this program. If not, see . /** @file PowerPC Memory Management Unit emulation. */ /* TODO: - - implement TLB - implement 601-style BATs - - add proper error and exception handling */ #include "ppcmmu.h" @@ -49,7 +47,6 @@ PPC_BAT_entry dbat_array[4] = {{0}}; //#define MMU_PROFILING // uncomment this to enable MMU profiling //#define TLB_PROFILING // uncomment this to enable SoftTLB profiling -/* MMU profiling */ #ifdef MMU_PROFILING /* global variables for lightweight MMU profiling */ @@ -65,6 +62,1103 @@ uint64_t unaligned_writes = 0; // counts unaligned writes uint64_t unaligned_crossp_r = 0; // counts unaligned crosspage reads uint64_t unaligned_crossp_w = 0; // counts unaligned crosspage writes +#endif // MMU_PROFILING + +#ifdef TLB_PROFILING + +/* global variables for lightweight SoftTLB profiling */ +uint64_t num_primary_itlb_hits = 0; // number of hits in the primary ITLB +uint64_t num_secondary_itlb_hits = 0; // number of hits in the secondary ITLB +uint64_t num_itlb_refills = 0; // number of ITLB refills +uint64_t num_primary_dtlb_hits = 0; // number of hits in the primary DTLB +uint64_t num_secondary_dtlb_hits = 0; // number of hits in the secondary DTLB +uint64_t num_dtlb_refills = 0; // number of DTLB refills +uint64_t num_entry_replacements = 0; // number of entry replacements + +#endif // TLB_PROFILING + +/** remember recently used physical memory regions for quicker translation. */ +AddressMapEntry last_read_area = {0xFFFFFFFF, 0xFFFFFFFF}; +AddressMapEntry last_write_area = {0xFFFFFFFF, 0xFFFFFFFF}; +AddressMapEntry last_exec_area = {0xFFFFFFFF, 0xFFFFFFFF}; +AddressMapEntry last_ptab_area = {0xFFFFFFFF, 0xFFFFFFFF}; +AddressMapEntry last_dma_area = {0xFFFFFFFF, 0xFFFFFFFF}; + +void ppc_set_cur_instruction(const uint8_t* ptr) { + ppc_cur_instruction = READ_DWORD_BE_A(ptr); +} + +/** PowerPC-style block address translation. */ +template +static BATResult ppc_block_address_translation(uint32_t la) +{ + uint32_t pa; // translated physical address + uint8_t prot; // protection bits for the translated address + PPC_BAT_entry *bat_array; + + bool bat_hit = false; + unsigned msr_pr = !!(ppc_state.msr & 0x4000); + + bat_array = (type == BATType::IBAT) ? ibat_array : dbat_array; + + // Format: %XY + // X - supervisor access bit, Y - problem/user access bit + // Those bits are mutually exclusive + unsigned access_bits = ((msr_pr ^ 1) << 1) | msr_pr; + + for (int bat_index = 0; bat_index < 4; bat_index++) { + PPC_BAT_entry* bat_entry = &bat_array[bat_index]; + + if ((bat_entry->access & access_bits) && ((la & bat_entry->hi_mask) == bat_entry->bepi)) { + bat_hit = true; + +#ifdef MMU_PROFILING + bat_transl_total++; +#endif + // logical to physical translation + pa = bat_entry->phys_hi | (la & ~bat_entry->hi_mask); + prot = bat_entry->prot; + break; + } + } + + return BATResult{bat_hit, prot, pa}; +} + +static inline uint8_t* calc_pteg_addr(uint32_t hash) +{ + uint32_t sdr1_val, pteg_addr; + + sdr1_val = ppc_state.spr[SPR::SDR1]; + + pteg_addr = sdr1_val & 0xFE000000; + pteg_addr |= (sdr1_val & 0x01FF0000) | (((sdr1_val & 0x1FF) << 16) & ((hash & 0x7FC00) << 6)); + pteg_addr |= (hash & 0x3FF) << 6; + + if (pteg_addr >= last_ptab_area.start && pteg_addr <= last_ptab_area.end) { + return last_ptab_area.mem_ptr + (pteg_addr - last_ptab_area.start); + } else { + AddressMapEntry* entry = mem_ctrl_instance->find_range(pteg_addr); + if (entry && entry->type & (RT_ROM | RT_RAM)) { + last_ptab_area.start = entry->start; + last_ptab_area.end = entry->end; + last_ptab_area.mem_ptr = entry->mem_ptr; + return last_ptab_area.mem_ptr + (pteg_addr - last_ptab_area.start); + } else { + ABORT_F("SOS: no page table region was found at %08X!\n", pteg_addr); + } + } +} + +static bool search_pteg(uint8_t* pteg_addr, uint8_t** ret_pte_addr, uint32_t vsid, + uint16_t page_index, uint8_t pteg_num) +{ + /* construct PTE matching word */ + uint32_t pte_check = 0x80000000 | (vsid << 7) | (pteg_num << 6) | (page_index >> 10); + +#ifdef MMU_INTEGRITY_CHECKS + /* PTEG integrity check that ensures that all matching PTEs have + identical RPN, WIMG and PP bits (PPC PEM 32-bit 7.6.2, rule 5). */ + uint32_t pte_word2_check; + bool match_found = false; + + for (int i = 0; i < 8; i++, pteg_addr += 8) { + if (pte_check == READ_DWORD_BE_A(pteg_addr)) { + if (match_found) { + if ((READ_DWORD_BE_A(pteg_addr) & 0xFFFFF07B) != pte_word2_check) { + ABORT_F("Multiple PTEs with different RPN/WIMG/PP found!\n"); + } + } else { + /* isolate RPN, WIMG and PP fields */ + pte_word2_check = READ_DWORD_BE_A(pteg_addr) & 0xFFFFF07B; + *ret_pte_addr = pteg_addr; + } + } + } +#else + for (int i = 0; i < 8; i++, pteg_addr += 8) { + if (pte_check == READ_DWORD_BE_A(pteg_addr)) { + *ret_pte_addr = pteg_addr; + return true; + } + } +#endif + + return false; +} + +static PATResult page_address_translation(uint32_t la, bool is_instr_fetch, + unsigned msr_pr, int is_write) +{ + uint32_t sr_val, page_index, pteg_hash1, vsid, pte_word2; + unsigned key, pp; + uint8_t* pte_addr; + + sr_val = ppc_state.sr[(la >> 28) & 0x0F]; + if (sr_val & 0x80000000) { + ABORT_F("Direct-store segments not supported, LA=%0xX\n", la); + } + + /* instruction fetch from a no-execute segment will cause ISI exception */ + if ((sr_val & 0x10000000) && is_instr_fetch) { + mmu_exception_handler(Except_Type::EXC_ISI, 0x10000000); + } + + page_index = (la >> 12) & 0xFFFF; + pteg_hash1 = (sr_val & 0x7FFFF) ^ page_index; + vsid = sr_val & 0x0FFFFFF; + + if (!search_pteg(calc_pteg_addr(pteg_hash1), &pte_addr, vsid, page_index, 0)) { + if (!search_pteg(calc_pteg_addr(~pteg_hash1), &pte_addr, vsid, page_index, 1)) { + if (is_instr_fetch) { + mmu_exception_handler(Except_Type::EXC_ISI, 0x40000000); + } else { + ppc_state.spr[SPR::DSISR] = 0x40000000 | (is_write << 25); + ppc_state.spr[SPR::DAR] = la; + mmu_exception_handler(Except_Type::EXC_DSI, 0); + } + } + } + + pte_word2 = READ_DWORD_BE_A(pte_addr + 4); + + key = (((sr_val >> 29) & 1) & msr_pr) | (((sr_val >> 30) & 1) & (msr_pr ^ 1)); + + /* check page access */ + pp = pte_word2 & 3; + + // the following scenarios cause DSI/ISI exception: + // any access with key = 1 and PP = %00 + // write access with key = 1 and PP = %01 + // write access with PP = %11 + if ((key && (!pp || (pp == 1 && is_write))) || (pp == 3 && is_write)) { + if (is_instr_fetch) { + mmu_exception_handler(Except_Type::EXC_ISI, 0x08000000); + } else { + ppc_state.spr[SPR::DSISR] = 0x08000000 | (is_write << 25); + ppc_state.spr[SPR::DAR] = la; + mmu_exception_handler(Except_Type::EXC_DSI, 0); + } + } + + /* update R and C bits */ + /* For simplicity, R is set on each access, C is set only for writes */ + pte_addr[6] |= 0x01; + if (is_write) { + pte_addr[7] |= 0x80; + } + + /* return physical address, access protection and C status */ + return PATResult{ + ((pte_word2 & 0xFFFFF000) | (la & 0x00000FFF)), + static_cast((key << 2) | pp), + static_cast(pte_word2 & 0x80) + }; +} + +/** PowerPC-style MMU instruction address translation. */ +static uint32_t mmu_instr_translation(uint32_t la) +{ + uint32_t pa; /* translated physical address */ + + bool bat_hit = false; + unsigned msr_pr = !!(ppc_state.msr & 0x4000); + + // Format: %XY + // X - supervisor access bit, Y - problem/user access bit + // Those bits are mutually exclusive + unsigned access_bits = ((msr_pr ^ 1) << 1) | msr_pr; + + for (int bat_index = 0; bat_index < 4; bat_index++) { + PPC_BAT_entry* bat_entry = &ibat_array[bat_index]; + + if ((bat_entry->access & access_bits) && ((la & bat_entry->hi_mask) == bat_entry->bepi)) { + bat_hit = true; + +#ifdef MMU_PROFILING + bat_transl_total++; +#endif + + if (!bat_entry->prot) { + mmu_exception_handler(Except_Type::EXC_ISI, 0x08000000); + } + + // logical to physical translation + pa = bat_entry->phys_hi | (la & ~bat_entry->hi_mask); + break; + } + } + + /* page address translation */ + if (!bat_hit) { + PATResult pat_res = page_address_translation(la, true, msr_pr, 0); + pa = pat_res.phys; + +#ifdef MMU_PROFILING + ptab_transl_total++; +#endif + } + + return pa; +} + +/** PowerPC-style MMU data address translation. */ +static uint32_t ppc_mmu_addr_translate(uint32_t la, int is_write) +{ + uint32_t pa; /* translated physical address */ + + bool bat_hit = false; + unsigned msr_pr = !!(ppc_state.msr & 0x4000); + + // Format: %XY + // X - supervisor access bit, Y - problem/user access bit + // Those bits are mutually exclusive + unsigned access_bits = ((msr_pr ^ 1) << 1) | msr_pr; + + for (int bat_index = 0; bat_index < 4; bat_index++) { + PPC_BAT_entry* bat_entry = &dbat_array[bat_index]; + + if ((bat_entry->access & access_bits) && ((la & bat_entry->hi_mask) == bat_entry->bepi)) { + bat_hit = true; + +#ifdef MMU_PROFILING + bat_transl_total++; +#endif + + if (!bat_entry->prot || ((bat_entry->prot & 1) && is_write)) { + ppc_state.spr[SPR::DSISR] = 0x08000000 | (is_write << 25); + ppc_state.spr[SPR::DAR] = la; + mmu_exception_handler(Except_Type::EXC_DSI, 0); + } + + // logical to physical translation + pa = bat_entry->phys_hi | (la & ~bat_entry->hi_mask); + break; + } + } + + /* page address translation */ + if (!bat_hit) { + PATResult pat_res = page_address_translation(la, false, msr_pr, is_write); + pa = pat_res.phys; + +#ifdef MMU_PROFILING + ptab_transl_total++; +#endif + } + + return pa; +} + +uint8_t* mmu_get_dma_mem(uint32_t addr, uint32_t size) +{ + if (addr >= last_dma_area.start && (addr + size) <= last_dma_area.end) { + return last_dma_area.mem_ptr + (addr - last_dma_area.start); + } else { + AddressMapEntry* entry = mem_ctrl_instance->find_range(addr); + if (entry && entry->type & (RT_ROM | RT_RAM)) { + last_dma_area.start = entry->start; + last_dma_area.end = entry->end; + last_dma_area.mem_ptr = entry->mem_ptr; + return last_dma_area.mem_ptr + (addr - last_dma_area.start); + } else { + ABORT_F("SOS: DMA access to unmapped memory %08X!\n", addr); + } + } +} + +// primary ITLB for all MMU modes +static std::array itlb1_mode1; +static std::array itlb1_mode2; +static std::array itlb1_mode3; + +// secondary ITLB for all MMU modes +static std::array itlb2_mode1; +static std::array itlb2_mode2; +static std::array itlb2_mode3; + +// primary DTLB for all MMU modes +static std::array dtlb1_mode1; +static std::array dtlb1_mode2; +static std::array dtlb1_mode3; + +// secondary DTLB for all MMU modes +static std::array dtlb2_mode1; +static std::array dtlb2_mode2; +static std::array dtlb2_mode3; + +TLBEntry *pCurITLB1; // current primary ITLB +TLBEntry *pCurITLB2; // current secondary ITLB +TLBEntry *pCurDTLB1; // current primary DTLB +TLBEntry *pCurDTLB2; // current secondary DTLB + +uint32_t tlb_size_mask = TLB_SIZE - 1; + +// fake TLB entry for handling of unmapped memory accesses +uint64_t UnmappedVal = -1ULL; +TLBEntry UnmappedMem = {TLB_INVALID_TAG, 0, 0, 0}; + +uint8_t CurITLBMode = {0xFF}; // current ITLB mode +uint8_t CurDTLBMode = {0xFF}; // current DTLB mode + +void mmu_change_mode() +{ + uint8_t mmu_mode; + + // switch ITLB tables first + mmu_mode = ((ppc_state.msr >> 4) & 0x2) | ((ppc_state.msr >> 14) & 1); + + if (CurITLBMode != mmu_mode) { + switch(mmu_mode) { + case 0: // real address mode + pCurITLB1 = &dtlb1_mode1[0]; + pCurITLB2 = &dtlb2_mode1[0]; + break; + case 2: // supervisor mode with instruction translation enabled + pCurITLB1 = &dtlb1_mode2[0]; + pCurITLB2 = &dtlb2_mode2[0]; + break; + case 3: // user mode with instruction translation enabled + pCurITLB1 = &dtlb1_mode3[0]; + pCurITLB2 = &dtlb2_mode3[0]; + break; + } + CurITLBMode = mmu_mode; + } + + // then switch DTLB tables + mmu_mode = ((ppc_state.msr >> 3) & 0x2) | ((ppc_state.msr >> 14) & 1); + + if (CurDTLBMode != mmu_mode) { + switch(mmu_mode) { + case 0: // real address mode + pCurDTLB1 = &dtlb1_mode1[0]; + pCurDTLB2 = &dtlb2_mode1[0]; + break; + case 2: // supervisor mode with data translation enabled + pCurDTLB1 = &dtlb1_mode2[0]; + pCurDTLB2 = &dtlb2_mode2[0]; + break; + case 3: // user mode with data translation enabled + pCurDTLB1 = &dtlb1_mode3[0]; + pCurDTLB2 = &dtlb2_mode3[0]; + break; + } + CurDTLBMode = mmu_mode; + } +} + +template +static TLBEntry* tlb2_target_entry(uint32_t gp_va) +{ + TLBEntry *tlb_entry; + + if (tlb_type == TLBType::ITLB) { + tlb_entry = &pCurITLB2[((gp_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; + } else { + tlb_entry = &pCurDTLB2[((gp_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; + } + + // select the target from invalid blocks first + if (tlb_entry[0].tag == TLB_INVALID_TAG) { + // update LRU bits + tlb_entry[0].lru_bits = 0x3; + tlb_entry[1].lru_bits = 0x2; + tlb_entry[2].lru_bits &= 0x1; + tlb_entry[3].lru_bits &= 0x1; + return tlb_entry; + } else if (tlb_entry[1].tag == TLB_INVALID_TAG) { + // update LRU bits + tlb_entry[0].lru_bits = 0x2; + tlb_entry[1].lru_bits = 0x3; + tlb_entry[2].lru_bits &= 0x1; + tlb_entry[3].lru_bits &= 0x1; + return &tlb_entry[1]; + } else if (tlb_entry[2].tag == TLB_INVALID_TAG) { + // update LRU bits + tlb_entry[0].lru_bits &= 0x1; + tlb_entry[1].lru_bits &= 0x1; + tlb_entry[2].lru_bits = 0x3; + tlb_entry[3].lru_bits = 0x2; + return &tlb_entry[2]; + } else if (tlb_entry[3].tag == TLB_INVALID_TAG) { + // update LRU bits + tlb_entry[0].lru_bits &= 0x1; + tlb_entry[1].lru_bits &= 0x1; + tlb_entry[2].lru_bits = 0x2; + tlb_entry[3].lru_bits = 0x3; + return &tlb_entry[3]; + } else { // no free entries, replace an existing one according with the hLRU policy +#ifdef TLB_PROFILING + num_entry_replacements++; +#endif + if (tlb_entry[0].lru_bits == 0) { + // update LRU bits + tlb_entry[0].lru_bits = 0x3; + tlb_entry[1].lru_bits = 0x2; + tlb_entry[2].lru_bits &= 0x1; + tlb_entry[3].lru_bits &= 0x1; + return tlb_entry; + } else if (tlb_entry[1].lru_bits == 0) { + // update LRU bits + tlb_entry[0].lru_bits = 0x2; + tlb_entry[1].lru_bits = 0x3; + tlb_entry[2].lru_bits &= 0x1; + tlb_entry[3].lru_bits &= 0x1; + return &tlb_entry[1]; + } else if (tlb_entry[2].lru_bits == 0) { + // update LRU bits + tlb_entry[0].lru_bits &= 0x1; + tlb_entry[1].lru_bits &= 0x1; + tlb_entry[2].lru_bits = 0x3; + tlb_entry[3].lru_bits = 0x2; + return &tlb_entry[2]; + } else { + // update LRU bits + tlb_entry[0].lru_bits &= 0x1; + tlb_entry[1].lru_bits &= 0x1; + tlb_entry[2].lru_bits = 0x2; + tlb_entry[3].lru_bits = 0x3; + return &tlb_entry[3]; + } + } +} + +static TLBEntry* itlb2_refill(uint32_t guest_va) +{ + uint32_t phys_addr; + TLBEntry *tlb_entry; + uint16_t flags = 0; + + /* instruction address translation if enabled */ + if (ppc_state.msr & 0x20) { + // attempt block address translation first + BATResult bat_res = ppc_block_address_translation(guest_va); + if (bat_res.hit) { + // check block protection + // only PP = 0 (no access) causes ISI exception + if (!bat_res.prot) { + mmu_exception_handler(Except_Type::EXC_ISI, 0x08000000); + } + phys_addr = bat_res.phys; + flags |= TLBFlags::TLBE_FROM_BAT; // tell the world we come from + } else { + // page address translation + PATResult pat_res = page_address_translation(guest_va, true, + !!(ppc_state.msr & 0x4000), 0); + phys_addr = pat_res.phys; + flags = TLBFlags::TLBE_FROM_PAT; // tell the world we come from + } + } else { // instruction translation disabled + phys_addr = guest_va; + } + + // look up host virtual address + AddressMapEntry* reg_desc = mem_ctrl_instance->find_range(phys_addr); + if (reg_desc) { + if (reg_desc->type & RT_MMIO) { + ABORT_F("Instruction fetch from MMIO region at 0x%08X!\n", phys_addr); + } + // refill the secondary TLB + const uint32_t tag = guest_va & ~0xFFFUL; + tlb_entry = tlb2_target_entry(tag); + tlb_entry->tag = tag; + tlb_entry->flags = flags | TLBFlags::PAGE_MEM; + tlb_entry->host_va_offset = (int64_t)reg_desc->mem_ptr - guest_va + + (phys_addr - reg_desc->start); + } else { + ABORT_F("Instruction fetch from unmapped memory at 0x%08X!\n", phys_addr); + } + + return tlb_entry; +} + +static TLBEntry* dtlb2_refill(uint32_t guest_va, int is_write) +{ + uint32_t phys_addr; + uint16_t flags = 0; + TLBEntry *tlb_entry; + + const uint32_t tag = guest_va & ~0xFFFUL; + + /* data address translation if enabled */ + if (ppc_state.msr & 0x10) { + // attempt block address translation first + BATResult bat_res = ppc_block_address_translation(guest_va); + if (bat_res.hit) { + // check block protection + if (!bat_res.prot || ((bat_res.prot & 1) && is_write)) { + LOG_F(9, "BAT DSI exception in TLB2 refill!"); + LOG_F(9, "Attempt to write to read-only region, LA=0x%08X, PC=0x%08X!", guest_va, ppc_state.pc); + ppc_state.spr[SPR::DSISR] = 0x08000000 | (is_write << 25); + ppc_state.spr[SPR::DAR] = guest_va; + mmu_exception_handler(Except_Type::EXC_DSI, 0); + } + phys_addr = bat_res.phys; + flags = TLBFlags::PTE_SET_C; // prevent PTE.C updates for BAT + flags |= TLBFlags::TLBE_FROM_BAT; // tell the world we come from + if (bat_res.prot == 2) { + flags |= TLBFlags::PAGE_WRITABLE; + } + } else { + // page address translation + PATResult pat_res = page_address_translation(guest_va, false, + !!(ppc_state.msr & 0x4000), is_write); + phys_addr = pat_res.phys; + flags = TLBFlags::TLBE_FROM_PAT; // tell the world we come from + if (pat_res.prot <= 2 || pat_res.prot == 6) { + flags |= TLBFlags::PAGE_WRITABLE; + } + if (is_write || pat_res.pte_c_status) { + // C-bit of the PTE is already set so the TLB logic + // doesn't need to update it anymore + flags |= TLBFlags::PTE_SET_C; + } + } + } else { // data translation disabled + phys_addr = guest_va; + flags = TLBFlags::PTE_SET_C; // no PTE.C updates in real addressing mode + flags |= TLBFlags::PAGE_WRITABLE; // assume physical pages are writable + } + + // look up host virtual address + AddressMapEntry* reg_desc = mem_ctrl_instance->find_range(phys_addr); + if (reg_desc) { + // refill the secondary TLB + tlb_entry = tlb2_target_entry(tag); + tlb_entry->tag = tag; + if (reg_desc->type & RT_MMIO) { // MMIO region + tlb_entry->flags = flags | TLBFlags::PAGE_IO; + tlb_entry->reg_desc = reg_desc; + } else { // memory region backed by host memory + tlb_entry->flags = flags | TLBFlags::PAGE_MEM; + tlb_entry->host_va_offset = (int64_t)reg_desc->mem_ptr - guest_va + + (phys_addr - reg_desc->start); + } + return tlb_entry; + } else { + LOG_F(ERROR, "Read from unmapped memory at 0x%08X!\n", phys_addr); + UnmappedMem.tag = tag; + UnmappedMem.host_va_offset = (int64_t)(&UnmappedVal) - guest_va; + return &UnmappedMem; + } +} + +template +static inline TLBEntry* lookup_secondary_tlb(uint32_t guest_va, uint32_t tag) { + TLBEntry *tlb_entry; + + if (tlb_type == TLBType::ITLB) { + tlb_entry = &pCurITLB2[((guest_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; + } else { + tlb_entry = &pCurDTLB2[((guest_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; + } + + if (tlb_entry->tag == tag) { + // update LRU bits + tlb_entry[0].lru_bits = 0x3; + tlb_entry[1].lru_bits = 0x2; + tlb_entry[2].lru_bits &= 0x1; + tlb_entry[3].lru_bits &= 0x1; + } else if (tlb_entry[1].tag == tag) { + tlb_entry = &tlb_entry[1]; + // update LRU bits + tlb_entry[0].lru_bits = 0x2; + tlb_entry[1].lru_bits = 0x3; + tlb_entry[2].lru_bits &= 0x1; + tlb_entry[3].lru_bits &= 0x1; + } else if (tlb_entry[2].tag == tag) { + tlb_entry = &tlb_entry[2]; + // update LRU bits + tlb_entry[0].lru_bits &= 0x1; + tlb_entry[1].lru_bits &= 0x1; + tlb_entry[2].lru_bits = 0x3; + tlb_entry[3].lru_bits = 0x2; + } else if (tlb_entry[3].tag == tag) { + tlb_entry = &tlb_entry[3]; + // update LRU bits + tlb_entry[0].lru_bits &= 0x1; + tlb_entry[1].lru_bits &= 0x1; + tlb_entry[2].lru_bits = 0x2; + tlb_entry[3].lru_bits = 0x3; + } else { + return nullptr; + } + return tlb_entry; +} + +uint8_t *mmu_translate_imem(uint32_t vaddr) +{ + TLBEntry *tlb1_entry, *tlb2_entry; + uint8_t *host_va; + +#ifdef MMU_PROFILING + exec_reads_total++; +#endif + + const uint32_t tag = vaddr & ~0xFFFUL; + + // look up guest virtual address in the primary ITLB + tlb1_entry = &pCurITLB1[(vaddr >> PAGE_SIZE_BITS) & tlb_size_mask]; + if (tlb1_entry->tag == tag) { // primary ITLB hit -> fast path +#ifdef TLB_PROFILING + num_primary_itlb_hits++; +#endif + host_va = (uint8_t *)(tlb1_entry->host_va_offset + vaddr); + } else { + // primary ITLB miss -> look up address in the secondary ITLB + tlb2_entry = lookup_secondary_tlb(vaddr, tag); + if (tlb2_entry == nullptr) { +#ifdef TLB_PROFILING + num_itlb_refills++; +#endif + // secondary ITLB miss -> + // perform full address translation and refill the secondary ITLB + tlb2_entry = itlb2_refill(vaddr); + } +#ifdef TLB_PROFILING + else { + num_secondary_itlb_hits++; + } +#endif + // refill the primary ITLB + tlb1_entry->tag = tag; + tlb1_entry->flags = tlb2_entry->flags; + tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; + host_va = (uint8_t *)(tlb1_entry->host_va_offset + vaddr); + } + + ppc_set_cur_instruction(host_va); + + return host_va; +} + +// Forward declarations. +static uint32_t read_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t size); +static void write_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t value, + uint32_t size); + +template +inline T mmu_read_vmem(uint32_t guest_va) +{ + TLBEntry *tlb1_entry, *tlb2_entry; + uint8_t *host_va; + + const uint32_t tag = guest_va & ~0xFFFUL; + + // look up guest virtual address in the primary TLB + tlb1_entry = &pCurDTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; + if (tlb1_entry->tag == tag) { // primary TLB hit -> fast path +#ifdef TLB_PROFILING + num_primary_dtlb_hits++; +#endif + host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); + } else { + // primary TLB miss -> look up address in the secondary TLB + tlb2_entry = lookup_secondary_tlb(guest_va, tag); + if (tlb2_entry == nullptr) { +#ifdef TLB_PROFILING + num_dtlb_refills++; +#endif + // secondary TLB miss -> + // perform full address translation and refill the secondary TLB + tlb2_entry = dtlb2_refill(guest_va, 0); + } +#ifdef TLB_PROFILING + else { + num_secondary_dtlb_hits++; + } +#endif + + if (tlb2_entry->flags & TLBFlags::PAGE_MEM) { // is it a real memory region? + // refill the primary TLB + tlb1_entry->tag = tag; + tlb1_entry->flags = tlb2_entry->flags; + tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; + host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); + } else { // otherwise, it's an access to a memory-mapped device +#ifdef MMU_PROFILING + iomem_reads_total++; +#endif + return ( + tlb2_entry->reg_desc->devobj->read(tlb2_entry->reg_desc->start, + guest_va - tlb2_entry->reg_desc->start, + sizeof(T)) + ); + } + } + +#ifdef MMU_PROFILING + dmem_reads_total++; +#endif + + // handle unaligned memory accesses + if (sizeof(T) > 1 && (guest_va & (sizeof(T) - 1))) { + return read_unaligned(guest_va, host_va, sizeof(T)); + } + + // handle aligned memory accesses + switch(sizeof(T)) { + case 1: + return *host_va; + case 2: + return READ_WORD_BE_A(host_va); + case 4: + return READ_DWORD_BE_A(host_va); + case 8: + return READ_QWORD_BE_A(host_va); + } +} + +// explicitely instantiate all required mmu_read_vmem variants +template uint8_t mmu_read_vmem(uint32_t guest_va); +template uint16_t mmu_read_vmem(uint32_t guest_va); +template uint32_t mmu_read_vmem(uint32_t guest_va); +template uint64_t mmu_read_vmem(uint32_t guest_va); + +template +inline void mmu_write_vmem(uint32_t guest_va, T value) +{ + TLBEntry *tlb1_entry, *tlb2_entry; + uint8_t *host_va; + + const uint32_t tag = guest_va & ~0xFFFUL; + + // look up guest virtual address in the primary TLB + tlb1_entry = &pCurDTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; + if (tlb1_entry->tag == tag) { // primary TLB hit -> fast path +#ifdef TLB_PROFILING + num_primary_dtlb_hits++; +#endif + if (!(tlb1_entry->flags & TLBFlags::PAGE_WRITABLE)) { + ppc_state.spr[SPR::DSISR] = 0x08000000 | (1 << 25); + ppc_state.spr[SPR::DAR] = guest_va; + mmu_exception_handler(Except_Type::EXC_DSI, 0); + } + if (!(tlb1_entry->flags & TLBFlags::PTE_SET_C)) { + // perform full page address translation to update PTE.C bit + PATResult pat_res = page_address_translation(guest_va, false, + !!(ppc_state.msr & 0x4000), true); + tlb1_entry->flags |= TLBFlags::PTE_SET_C; + + // don't forget to update the secondary TLB as well + tlb2_entry = lookup_secondary_tlb(guest_va, tag); + if (tlb2_entry != nullptr) { + tlb2_entry->flags |= TLBFlags::PTE_SET_C; + } + } + host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); + } else { + // primary TLB miss -> look up address in the secondary TLB + tlb2_entry = lookup_secondary_tlb(guest_va, tag); + if (tlb2_entry == nullptr) { +#ifdef TLB_PROFILING + num_dtlb_refills++; +#endif + // secondary TLB miss -> + // perform full address translation and refill the secondary TLB + tlb2_entry = dtlb2_refill(guest_va, 1); + } +#ifdef TLB_PROFILING + else { + num_secondary_dtlb_hits++; + } +#endif + + if (!(tlb2_entry->flags & TLBFlags::PAGE_WRITABLE)) { + ppc_state.spr[SPR::DSISR] = 0x08000000 | (1 << 25); + ppc_state.spr[SPR::DAR] = guest_va; + mmu_exception_handler(Except_Type::EXC_DSI, 0); + } + + if (!(tlb2_entry->flags & TLBFlags::PTE_SET_C)) { + // perform full page address translation to update PTE.C bit + PATResult pat_res = page_address_translation(guest_va, false, + !!(ppc_state.msr & 0x4000), true); + tlb2_entry->flags |= TLBFlags::PTE_SET_C; + } + + if (tlb2_entry->flags & TLBFlags::PAGE_MEM) { // is it a real memory region? + // refill the primary TLB + tlb1_entry->tag = tag; + tlb1_entry->flags = tlb2_entry->flags; + tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; + host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); + } else { // otherwise, it's an access to a memory-mapped device +#ifdef MMU_PROFILING + iomem_writes_total++; +#endif + tlb2_entry->reg_desc->devobj->write(tlb2_entry->reg_desc->start, + guest_va - tlb2_entry->reg_desc->start, + value, sizeof(T)); + return; + } + } + +#ifdef MMU_PROFILING + dmem_writes_total++; +#endif + + // handle unaligned memory accesses + if (sizeof(T) > 1 && (guest_va & (sizeof(T) - 1))) { + write_unaligned(guest_va, host_va, value, sizeof(T)); + return; + } + + // handle aligned memory accesses + switch(sizeof(T)) { + case 1: + *host_va = value; + break; + case 2: + WRITE_WORD_BE_A(host_va, value); + break; + case 4: + WRITE_DWORD_BE_A(host_va, value); + break; + case 8: + WRITE_QWORD_BE_A(host_va, value); + break; + } +} + +// explicitely instantiate all required mmu_write_vmem variants +template void mmu_write_vmem(uint32_t guest_va, uint8_t value); +template void mmu_write_vmem(uint32_t guest_va, uint16_t value); +template void mmu_write_vmem(uint32_t guest_va, uint32_t value); +template void mmu_write_vmem(uint32_t guest_va, uint64_t value); + +static uint32_t read_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t size) +{ + uint32_t result = 0; + + // is it a misaligned cross-page read? + if (((guest_va & 0xFFF) + size) > 0x1000) { +#ifdef MMU_PROFILING + unaligned_crossp_r++; +#endif + // Break such a memory access into multiple, bytewise accesses. + // Because such accesses suffer a performance penalty, they will be + // presumably very rare so don't waste time optimizing the code below. + for (int i = 0; i < size; guest_va++, i++) { + result = (result << 8) | mmu_read_vmem(guest_va); + } + } else { +#ifdef MMU_PROFILING + unaligned_reads++; +#endif + switch(size) { + case 2: + return READ_WORD_BE_U(host_va); + case 4: + return READ_DWORD_BE_U(host_va); + case 8: // FIXME: should we raise alignment exception here? + return READ_QWORD_BE_U(host_va); + } + } + return result; +} + +static void write_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t value, + uint32_t size) +{ + // is it a misaligned cross-page write? + if (((guest_va & 0xFFF) + size) > 0x1000) { +#ifdef MMU_PROFILING + unaligned_crossp_w++; +#endif + // Break such a memory access into multiple, bytewise accesses. + // Because such accesses suffer a performance penalty, they will be + // presumably very rare so don't waste time optimizing the code below. + + uint32_t shift = (size - 1) * 8; + + for (int i = 0; i < size; shift -= 8, guest_va++, i++) { + mmu_write_vmem(guest_va, (value >> shift) & 0xFF); + } + } else { +#ifdef MMU_PROFILING + unaligned_writes++; +#endif + switch(size) { + case 2: + WRITE_WORD_BE_U(host_va, value); + break; + case 4: + WRITE_DWORD_BE_U(host_va, value); + break; + case 8: // FIXME: should we raise alignment exception here? + WRITE_QWORD_BE_U(host_va, value); + break; + } + } +} + +void tlb_flush_entry(uint32_t ea) +{ + TLBEntry *tlb_entry, *tlb1, *tlb2; + + const uint32_t tag = ea & ~0xFFFUL; + + for (int m = 0; m < 6; m++) { + switch (m) { + case 0: + tlb1 = &itlb1_mode1[0]; + tlb2 = &itlb2_mode1[0]; + break; + case 1: + tlb1 = &itlb1_mode2[0]; + tlb2 = &itlb2_mode2[0]; + break; + case 2: + tlb1 = &itlb1_mode3[0]; + tlb2 = &itlb2_mode3[0]; + break; + case 3: + tlb1 = &dtlb1_mode1[0]; + tlb2 = &dtlb2_mode1[0]; + break; + case 4: + tlb1 = &dtlb1_mode2[0]; + tlb2 = &dtlb2_mode2[0]; + break; + case 5: + tlb1 = &dtlb1_mode3[0]; + tlb2 = &dtlb2_mode3[0]; + break; + } + + // flush primary TLB + tlb_entry = &tlb1[(ea >> PAGE_SIZE_BITS) & tlb_size_mask]; + if (tlb_entry->tag == tag) { + tlb_entry->tag = TLB_INVALID_TAG; + //LOG_F(INFO, "Invalidated primary TLB entry at 0x%X", ea); + } + + // flush secondary TLB + tlb_entry = &tlb2[((ea >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; + for (int i = 0; i < TLB2_WAYS; i++) { + if (tlb_entry[i].tag == tag) { + tlb_entry[i].tag = TLB_INVALID_TAG; + //LOG_F(INFO, "Invalidated secondary TLB entry at 0x%X", ea); + } + } + } +} + +void tlb_flush_entries(TLBFlags type) +{ + int i; + + // Flush BAT entries from the primary TLBs + for (i = 0; i < TLB_SIZE; i++) { + if (dtlb1_mode2[i].flags & type) { + dtlb1_mode2[i].tag = TLB_INVALID_TAG; + } + + if (dtlb1_mode3[i].flags & type) { + dtlb1_mode3[i].tag = TLB_INVALID_TAG; + } + } + + // Flush BAT entries from the secondary TLBs + for (i = 0; i < TLB_SIZE * TLB2_WAYS; i++) { + if (dtlb2_mode2[i].flags & type) { + dtlb2_mode2[i].tag = TLB_INVALID_TAG; + } + + if (dtlb2_mode3[i].flags & type) { + dtlb2_mode3[i].tag = TLB_INVALID_TAG; + } + } +} + +bool gTLBFlushBatEntries = false; +bool gTLBFlushPatEntries = false; + +void tlb_flush_bat_entries() +{ + if (!gTLBFlushBatEntries) + return; + + tlb_flush_entries(TLBE_FROM_BAT); + + gTLBFlushBatEntries = false; +} + +void tlb_flush_pat_entries() +{ + if (!gTLBFlushPatEntries) + return; + + tlb_flush_entries(TLBE_FROM_PAT); + + gTLBFlushPatEntries = false; +} + +void ibat_update(uint32_t bat_reg) +{ + int upper_reg_num; + uint32_t bl, hi_mask; + PPC_BAT_entry* bat_entry; + + upper_reg_num = bat_reg & 0xFFFFFFFE; + + if (ppc_state.spr[upper_reg_num] & 3) { // is that BAT pair valid? + bat_entry = &ibat_array[(bat_reg - 528) >> 1]; + bl = (ppc_state.spr[upper_reg_num] >> 2) & 0x7FF; + hi_mask = ~((bl << 17) | 0x1FFFF); + + bat_entry->access = ppc_state.spr[upper_reg_num] & 3; + bat_entry->prot = ppc_state.spr[upper_reg_num + 1] & 3; + bat_entry->hi_mask = hi_mask; + bat_entry->phys_hi = ppc_state.spr[upper_reg_num + 1] & hi_mask; + bat_entry->bepi = ppc_state.spr[upper_reg_num] & hi_mask; + + if (!gTLBFlushBatEntries) { + gTLBFlushBatEntries = true; + add_ctx_sync_action(&tlb_flush_bat_entries); + } + } +} + +void dbat_update(uint32_t bat_reg) +{ + int upper_reg_num; + uint32_t bl, hi_mask; + PPC_BAT_entry* bat_entry; + + upper_reg_num = bat_reg & 0xFFFFFFFE; + + if (ppc_state.spr[upper_reg_num] & 3) { // is that BAT pair valid? + bat_entry = &dbat_array[(bat_reg - 536) >> 1]; + bl = (ppc_state.spr[upper_reg_num] >> 2) & 0x7FF; + hi_mask = ~((bl << 17) | 0x1FFFF); + + bat_entry->access = ppc_state.spr[upper_reg_num] & 3; + bat_entry->prot = ppc_state.spr[upper_reg_num + 1] & 3; + bat_entry->hi_mask = hi_mask; + bat_entry->phys_hi = ppc_state.spr[upper_reg_num + 1] & hi_mask; + bat_entry->bepi = ppc_state.spr[upper_reg_num] & hi_mask; + + if (!gTLBFlushBatEntries) { + gTLBFlushBatEntries = true; + add_ctx_sync_action(&tlb_flush_bat_entries); + } + } +} + +void mmu_pat_ctx_changed() +{ + if (!gTLBFlushPatEntries) { + gTLBFlushPatEntries = true; + add_ctx_sync_action(&tlb_flush_pat_entries); + } +} + +/* MMU profiling. */ +#ifdef MMU_PROFILING + #include "utils/profiler.h" #include @@ -139,15 +1233,6 @@ public: /* SoftTLB profiling. */ #ifdef TLB_PROFILING -/* global variables for lightweight SoftTLB profiling */ -uint64_t num_primary_itlb_hits = 0; // number of hits in the primary ITLB -uint64_t num_secondary_itlb_hits = 0; // number of hits in the secondary ITLB -uint64_t num_itlb_refills = 0; // number of ITLB refills -uint64_t num_primary_dtlb_hits = 0; // number of hits in the primary DTLB -uint64_t num_secondary_dtlb_hits = 0; // number of hits in the secondary DTLB -uint64_t num_dtlb_refills = 0; // number of DTLB refills -uint64_t num_entry_replacements = 0; // number of entry replacements - #include "utils/profiler.h" #include @@ -196,15 +1281,8 @@ public: }; #endif - -/** remember recently used physical memory regions for quicker translation. */ -AddressMapEntry last_read_area = {0xFFFFFFFF, 0xFFFFFFFF}; -AddressMapEntry last_write_area = {0xFFFFFFFF, 0xFFFFFFFF}; -AddressMapEntry last_exec_area = {0xFFFFFFFF, 0xFFFFFFFF}; -AddressMapEntry last_ptab_area = {0xFFFFFFFF, 0xFFFFFFFF}; -AddressMapEntry last_dma_area = {0xFFFFFFFF, 0xFFFFFFFF}; - - +//=================== Old and slow code. Kept for reference ================= +#if 0 template static inline T read_phys_mem(AddressMapEntry *mru_rgn, uint32_t addr) { @@ -316,349 +1394,6 @@ static inline void write_phys_mem(AddressMapEntry *mru_rgn, uint32_t addr, T val } } -uint8_t* mmu_get_dma_mem(uint32_t addr, uint32_t size) { - if (addr >= last_dma_area.start && (addr + size) <= last_dma_area.end) { - return last_dma_area.mem_ptr + (addr - last_dma_area.start); - } else { - AddressMapEntry* entry = mem_ctrl_instance->find_range(addr); - if (entry && entry->type & (RT_ROM | RT_RAM)) { - last_dma_area.start = entry->start; - last_dma_area.end = entry->end; - last_dma_area.mem_ptr = entry->mem_ptr; - return last_dma_area.mem_ptr + (addr - last_dma_area.start); - } else { - ABORT_F("SOS: DMA access to unmapped memory %08X!\n", addr); - } - } -} - -void ppc_set_cur_instruction(const uint8_t* ptr) { - ppc_cur_instruction = READ_DWORD_BE_A(ptr); -} - -bool gTLBFlushBatEntries = false; -bool gTLBFlushPatEntries = false; - -// Forward declarations. -void tlb_flush_bat_entries(); -void tlb_flush_pat_entries(); - -void ibat_update(uint32_t bat_reg) { - int upper_reg_num; - uint32_t bl, hi_mask; - PPC_BAT_entry* bat_entry; - - upper_reg_num = bat_reg & 0xFFFFFFFE; - - if (ppc_state.spr[upper_reg_num] & 3) { // is that BAT pair valid? - bat_entry = &ibat_array[(bat_reg - 528) >> 1]; - bl = (ppc_state.spr[upper_reg_num] >> 2) & 0x7FF; - hi_mask = ~((bl << 17) | 0x1FFFF); - - bat_entry->access = ppc_state.spr[upper_reg_num] & 3; - bat_entry->prot = ppc_state.spr[upper_reg_num + 1] & 3; - bat_entry->hi_mask = hi_mask; - bat_entry->phys_hi = ppc_state.spr[upper_reg_num + 1] & hi_mask; - bat_entry->bepi = ppc_state.spr[upper_reg_num] & hi_mask; - - if (!gTLBFlushBatEntries) { - gTLBFlushBatEntries = true; - add_ctx_sync_action(&tlb_flush_bat_entries); - } - } -} - -void dbat_update(uint32_t bat_reg) { - int upper_reg_num; - uint32_t bl, hi_mask; - PPC_BAT_entry* bat_entry; - - upper_reg_num = bat_reg & 0xFFFFFFFE; - - if (ppc_state.spr[upper_reg_num] & 3) { // is that BAT pair valid? - bat_entry = &dbat_array[(bat_reg - 536) >> 1]; - bl = (ppc_state.spr[upper_reg_num] >> 2) & 0x7FF; - hi_mask = ~((bl << 17) | 0x1FFFF); - - bat_entry->access = ppc_state.spr[upper_reg_num] & 3; - bat_entry->prot = ppc_state.spr[upper_reg_num + 1] & 3; - bat_entry->hi_mask = hi_mask; - bat_entry->phys_hi = ppc_state.spr[upper_reg_num + 1] & hi_mask; - bat_entry->bepi = ppc_state.spr[upper_reg_num] & hi_mask; - - if (!gTLBFlushBatEntries) { - gTLBFlushBatEntries = true; - add_ctx_sync_action(&tlb_flush_bat_entries); - } - } -} - -void mmu_pat_ctx_changed() -{ - if (!gTLBFlushPatEntries) { - gTLBFlushPatEntries = true; - add_ctx_sync_action(&tlb_flush_pat_entries); - } -} - -/** PowerPC-style block address translation. */ -template -static BATResult ppc_block_address_translation(uint32_t la) -{ - uint32_t pa; // translated physical address - uint8_t prot; // protection bits for the translated address - PPC_BAT_entry *bat_array; - - bool bat_hit = false; - unsigned msr_pr = !!(ppc_state.msr & 0x4000); - - bat_array = (type == BATType::IBAT) ? ibat_array : dbat_array; - - // Format: %XY - // X - supervisor access bit, Y - problem/user access bit - // Those bits are mutually exclusive - unsigned access_bits = ((msr_pr ^ 1) << 1) | msr_pr; - - for (int bat_index = 0; bat_index < 4; bat_index++) { - PPC_BAT_entry* bat_entry = &bat_array[bat_index]; - - if ((bat_entry->access & access_bits) && ((la & bat_entry->hi_mask) == bat_entry->bepi)) { - bat_hit = true; - -#ifdef MMU_PROFILING - bat_transl_total++; -#endif - // logical to physical translation - pa = bat_entry->phys_hi | (la & ~bat_entry->hi_mask); - prot = bat_entry->prot; - break; - } - } - - return BATResult{bat_hit, prot, pa}; -} - -static inline uint8_t* calc_pteg_addr(uint32_t hash) { - uint32_t sdr1_val, pteg_addr; - - sdr1_val = ppc_state.spr[SPR::SDR1]; - - pteg_addr = sdr1_val & 0xFE000000; - pteg_addr |= (sdr1_val & 0x01FF0000) | (((sdr1_val & 0x1FF) << 16) & ((hash & 0x7FC00) << 6)); - pteg_addr |= (hash & 0x3FF) << 6; - - if (pteg_addr >= last_ptab_area.start && pteg_addr <= last_ptab_area.end) { - return last_ptab_area.mem_ptr + (pteg_addr - last_ptab_area.start); - } else { - AddressMapEntry* entry = mem_ctrl_instance->find_range(pteg_addr); - if (entry && entry->type & (RT_ROM | RT_RAM)) { - last_ptab_area.start = entry->start; - last_ptab_area.end = entry->end; - last_ptab_area.mem_ptr = entry->mem_ptr; - return last_ptab_area.mem_ptr + (pteg_addr - last_ptab_area.start); - } else { - ABORT_F("SOS: no page table region was found at %08X!\n", pteg_addr); - } - } -} - -static bool search_pteg( - uint8_t* pteg_addr, uint8_t** ret_pte_addr, uint32_t vsid, uint16_t page_index, uint8_t pteg_num) { - /* construct PTE matching word */ - uint32_t pte_check = 0x80000000 | (vsid << 7) | (pteg_num << 6) | (page_index >> 10); - -#ifdef MMU_INTEGRITY_CHECKS - /* PTEG integrity check that ensures that all matching PTEs have - identical RPN, WIMG and PP bits (PPC PEM 32-bit 7.6.2, rule 5). */ - uint32_t pte_word2_check; - bool match_found = false; - - for (int i = 0; i < 8; i++, pteg_addr += 8) { - if (pte_check == READ_DWORD_BE_A(pteg_addr)) { - if (match_found) { - if ((READ_DWORD_BE_A(pteg_addr) & 0xFFFFF07B) != pte_word2_check) { - ABORT_F("Multiple PTEs with different RPN/WIMG/PP found!\n"); - } - } else { - /* isolate RPN, WIMG and PP fields */ - pte_word2_check = READ_DWORD_BE_A(pteg_addr) & 0xFFFFF07B; - *ret_pte_addr = pteg_addr; - } - } - } -#else - for (int i = 0; i < 8; i++, pteg_addr += 8) { - if (pte_check == READ_DWORD_BE_A(pteg_addr)) { - *ret_pte_addr = pteg_addr; - return true; - } - } -#endif - - return false; -} - -static PATResult page_address_translation(uint32_t la, bool is_instr_fetch, - unsigned msr_pr, int is_write) -{ - uint32_t sr_val, page_index, pteg_hash1, vsid, pte_word2; - unsigned key, pp; - uint8_t* pte_addr; - - sr_val = ppc_state.sr[(la >> 28) & 0x0F]; - if (sr_val & 0x80000000) { - ABORT_F("Direct-store segments not supported, LA=%0xX\n", la); - } - - /* instruction fetch from a no-execute segment will cause ISI exception */ - if ((sr_val & 0x10000000) && is_instr_fetch) { - mmu_exception_handler(Except_Type::EXC_ISI, 0x10000000); - } - - page_index = (la >> 12) & 0xFFFF; - pteg_hash1 = (sr_val & 0x7FFFF) ^ page_index; - vsid = sr_val & 0x0FFFFFF; - - if (!search_pteg(calc_pteg_addr(pteg_hash1), &pte_addr, vsid, page_index, 0)) { - if (!search_pteg(calc_pteg_addr(~pteg_hash1), &pte_addr, vsid, page_index, 1)) { - if (is_instr_fetch) { - mmu_exception_handler(Except_Type::EXC_ISI, 0x40000000); - } else { - ppc_state.spr[SPR::DSISR] = 0x40000000 | (is_write << 25); - ppc_state.spr[SPR::DAR] = la; - mmu_exception_handler(Except_Type::EXC_DSI, 0); - } - } - } - - pte_word2 = READ_DWORD_BE_A(pte_addr + 4); - - key = (((sr_val >> 29) & 1) & msr_pr) | (((sr_val >> 30) & 1) & (msr_pr ^ 1)); - - /* check page access */ - pp = pte_word2 & 3; - - // the following scenarios cause DSI/ISI exception: - // any access with key = 1 and PP = %00 - // write access with key = 1 and PP = %01 - // write access with PP = %11 - if ((key && (!pp || (pp == 1 && is_write))) || (pp == 3 && is_write)) { - if (is_instr_fetch) { - mmu_exception_handler(Except_Type::EXC_ISI, 0x08000000); - } else { - ppc_state.spr[SPR::DSISR] = 0x08000000 | (is_write << 25); - ppc_state.spr[SPR::DAR] = la; - mmu_exception_handler(Except_Type::EXC_DSI, 0); - } - } - - /* update R and C bits */ - /* For simplicity, R is set on each access, C is set only for writes */ - pte_addr[6] |= 0x01; - if (is_write) { - pte_addr[7] |= 0x80; - } - - /* return physical address, access protection and C status */ - return PATResult{ - ((pte_word2 & 0xFFFFF000) | (la & 0x00000FFF)), - static_cast((key << 2) | pp), - static_cast(pte_word2 & 0x80) - }; -} - -/** PowerPC-style MMU instruction address translation. */ -static uint32_t mmu_instr_translation(uint32_t la) { - uint32_t pa; /* translated physical address */ - - bool bat_hit = false; - unsigned msr_pr = !!(ppc_state.msr & 0x4000); - - // Format: %XY - // X - supervisor access bit, Y - problem/user access bit - // Those bits are mutually exclusive - unsigned access_bits = ((msr_pr ^ 1) << 1) | msr_pr; - - for (int bat_index = 0; bat_index < 4; bat_index++) { - PPC_BAT_entry* bat_entry = &ibat_array[bat_index]; - - if ((bat_entry->access & access_bits) && ((la & bat_entry->hi_mask) == bat_entry->bepi)) { - bat_hit = true; - -#ifdef MMU_PROFILING - bat_transl_total++; -#endif - - if (!bat_entry->prot) { - mmu_exception_handler(Except_Type::EXC_ISI, 0x08000000); - } - - // logical to physical translation - pa = bat_entry->phys_hi | (la & ~bat_entry->hi_mask); - break; - } - } - - /* page address translation */ - if (!bat_hit) { - PATResult pat_res = page_address_translation(la, true, msr_pr, 0); - pa = pat_res.phys; - -#ifdef MMU_PROFILING - ptab_transl_total++; -#endif - } - - return pa; -} - -/** PowerPC-style MMU data address translation. */ -static uint32_t ppc_mmu_addr_translate(uint32_t la, int is_write) { - uint32_t pa; /* translated physical address */ - - bool bat_hit = false; - unsigned msr_pr = !!(ppc_state.msr & 0x4000); - - // Format: %XY - // X - supervisor access bit, Y - problem/user access bit - // Those bits are mutually exclusive - unsigned access_bits = ((msr_pr ^ 1) << 1) | msr_pr; - - for (int bat_index = 0; bat_index < 4; bat_index++) { - PPC_BAT_entry* bat_entry = &dbat_array[bat_index]; - - if ((bat_entry->access & access_bits) && ((la & bat_entry->hi_mask) == bat_entry->bepi)) { - bat_hit = true; - -#ifdef MMU_PROFILING - bat_transl_total++; -#endif - - if (!bat_entry->prot || ((bat_entry->prot & 1) && is_write)) { - ppc_state.spr[SPR::DSISR] = 0x08000000 | (is_write << 25); - ppc_state.spr[SPR::DAR] = la; - mmu_exception_handler(Except_Type::EXC_DSI, 0); - } - - // logical to physical translation - pa = bat_entry->phys_hi | (la & ~bat_entry->hi_mask); - break; - } - } - - /* page address translation */ - if (!bat_hit) { - PATResult pat_res = page_address_translation(la, false, msr_pr, is_write); - pa = pat_res.phys; - -#ifdef MMU_PROFILING - ptab_transl_total++; -#endif - } - - return pa; -} - static void mem_write_unaligned(uint32_t addr, uint32_t value, uint32_t size) { #ifdef MMU_DEBUG LOG_F(WARNING, "Attempt to write unaligned %d bytes to 0x%08X\n", size, addr); @@ -703,377 +1438,6 @@ static void mem_write_unaligned(uint32_t addr, uint32_t value, uint32_t size) { } } -// primary ITLB for all MMU modes -static std::array itlb1_mode1; -static std::array itlb1_mode2; -static std::array itlb1_mode3; - -// secondary ITLB for all MMU modes -static std::array itlb2_mode1; -static std::array itlb2_mode2; -static std::array itlb2_mode3; - -// primary DTLB for all MMU modes -static std::array dtlb1_mode1; -static std::array dtlb1_mode2; -static std::array dtlb1_mode3; - -// secondary DTLB for all MMU modes -static std::array dtlb2_mode1; -static std::array dtlb2_mode2; -static std::array dtlb2_mode3; - -TLBEntry *pCurITLB1; // current primary ITLB -TLBEntry *pCurITLB2; // current secondary ITLB -TLBEntry *pCurDTLB1; // current primary DTLB -TLBEntry *pCurDTLB2; // current secondary DTLB - -uint32_t tlb_size_mask = TLB_SIZE - 1; - -// fake TLB entry for handling of unmapped memory accesses -uint64_t UnmappedVal = -1ULL; -TLBEntry UnmappedMem = {TLB_INVALID_TAG, 0, 0, 0}; - -uint8_t CurITLBMode = {0xFF}; // current ITLB mode -uint8_t CurDTLBMode = {0xFF}; // current DTLB mode - -void mmu_change_mode() -{ - uint8_t mmu_mode; - - // switch ITLB tables first - mmu_mode = ((ppc_state.msr >> 4) & 0x2) | ((ppc_state.msr >> 14) & 1); - - if (CurITLBMode != mmu_mode) { - switch(mmu_mode) { - case 0: // real address mode - pCurITLB1 = &dtlb1_mode1[0]; - pCurITLB2 = &dtlb2_mode1[0]; - break; - case 2: // supervisor mode with instruction translation enabled - pCurITLB1 = &dtlb1_mode2[0]; - pCurITLB2 = &dtlb2_mode2[0]; - break; - case 3: // user mode with instruction translation enabled - pCurITLB1 = &dtlb1_mode3[0]; - pCurITLB2 = &dtlb2_mode3[0]; - break; - } - CurITLBMode = mmu_mode; - } - - // then switch DTLB tables - mmu_mode = ((ppc_state.msr >> 3) & 0x2) | ((ppc_state.msr >> 14) & 1); - - if (CurDTLBMode != mmu_mode) { - switch(mmu_mode) { - case 0: // real address mode - pCurDTLB1 = &dtlb1_mode1[0]; - pCurDTLB2 = &dtlb2_mode1[0]; - break; - case 2: // supervisor mode with data translation enabled - pCurDTLB1 = &dtlb1_mode2[0]; - pCurDTLB2 = &dtlb2_mode2[0]; - break; - case 3: // user mode with data translation enabled - pCurDTLB1 = &dtlb1_mode3[0]; - pCurDTLB2 = &dtlb2_mode3[0]; - break; - } - CurDTLBMode = mmu_mode; - } -} - -static TLBEntry* tlb2_target_entry(uint32_t gp_va) -{ - TLBEntry *tlb_entry; - - tlb_entry = &pCurDTLB2[((gp_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; - - // select the target from invalid blocks first - if (tlb_entry[0].tag == TLB_INVALID_TAG) { - // update LRU bits - tlb_entry[0].lru_bits = 0x3; - tlb_entry[1].lru_bits = 0x2; - tlb_entry[2].lru_bits &= 0x1; - tlb_entry[3].lru_bits &= 0x1; - return tlb_entry; - } else if (tlb_entry[1].tag == TLB_INVALID_TAG) { - // update LRU bits - tlb_entry[0].lru_bits = 0x2; - tlb_entry[1].lru_bits = 0x3; - tlb_entry[2].lru_bits &= 0x1; - tlb_entry[3].lru_bits &= 0x1; - return &tlb_entry[1]; - } else if (tlb_entry[2].tag == TLB_INVALID_TAG) { - // update LRU bits - tlb_entry[0].lru_bits &= 0x1; - tlb_entry[1].lru_bits &= 0x1; - tlb_entry[2].lru_bits = 0x3; - tlb_entry[3].lru_bits = 0x2; - return &tlb_entry[2]; - } else if (tlb_entry[3].tag == TLB_INVALID_TAG) { - // update LRU bits - tlb_entry[0].lru_bits &= 0x1; - tlb_entry[1].lru_bits &= 0x1; - tlb_entry[2].lru_bits = 0x2; - tlb_entry[3].lru_bits = 0x3; - return &tlb_entry[3]; - } else { // no free entries, replace an existing one according with the hLRU policy -#ifdef TLB_PROFILING - num_entry_replacements++; -#endif - if (tlb_entry[0].lru_bits == 0) { - // update LRU bits - tlb_entry[0].lru_bits = 0x3; - tlb_entry[1].lru_bits = 0x2; - tlb_entry[2].lru_bits &= 0x1; - tlb_entry[3].lru_bits &= 0x1; - return tlb_entry; - } else if (tlb_entry[1].lru_bits == 0) { - // update LRU bits - tlb_entry[0].lru_bits = 0x2; - tlb_entry[1].lru_bits = 0x3; - tlb_entry[2].lru_bits &= 0x1; - tlb_entry[3].lru_bits &= 0x1; - return &tlb_entry[1]; - } else if (tlb_entry[2].lru_bits == 0) { - // update LRU bits - tlb_entry[0].lru_bits &= 0x1; - tlb_entry[1].lru_bits &= 0x1; - tlb_entry[2].lru_bits = 0x3; - tlb_entry[3].lru_bits = 0x2; - return &tlb_entry[2]; - } else { - // update LRU bits - tlb_entry[0].lru_bits &= 0x1; - tlb_entry[1].lru_bits &= 0x1; - tlb_entry[2].lru_bits = 0x2; - tlb_entry[3].lru_bits = 0x3; - return &tlb_entry[3]; - } - } -} - -static TLBEntry* itlb2_refill(uint32_t guest_va) -{ - uint32_t phys_addr; - TLBEntry *tlb_entry; - uint16_t flags = 0; - - /* instruction address translation if enabled */ - if (ppc_state.msr & 0x20) { - // attempt block address translation first - BATResult bat_res = ppc_block_address_translation(guest_va); - if (bat_res.hit) { - // check block protection - // only PP = 0 (no access) causes ISI exception - if (!bat_res.prot) { - mmu_exception_handler(Except_Type::EXC_ISI, 0x08000000); - } - phys_addr = bat_res.phys; - flags |= TLBFlags::TLBE_FROM_BAT; // tell the world we come from - } else { - // page address translation - PATResult pat_res = page_address_translation(guest_va, true, - !!(ppc_state.msr & 0x4000), 0); - phys_addr = pat_res.phys; - flags = TLBFlags::TLBE_FROM_PAT; // tell the world we come from - } - } else { // instruction translation disabled - phys_addr = guest_va; - } - - // look up host virtual address - AddressMapEntry* reg_desc = mem_ctrl_instance->find_range(phys_addr); - if (reg_desc) { - if (reg_desc->type & RT_MMIO) { - ABORT_F("Instruction fetch from MMIO region at 0x%08X!\n", phys_addr); - } - // refill the secondary TLB - const uint32_t tag = guest_va & ~0xFFFUL; - tlb_entry = tlb2_target_entry(tag); - tlb_entry->tag = tag; - tlb_entry->flags = flags | TLBFlags::PAGE_MEM; - tlb_entry->host_va_offset = (int64_t)reg_desc->mem_ptr - guest_va + - (phys_addr - reg_desc->start); - } else { - ABORT_F("Instruction fetch from unmapped memory at 0x%08X!\n", phys_addr); - } - - return tlb_entry; -} - -static TLBEntry* dtlb2_refill(uint32_t guest_va, int is_write) -{ - uint32_t phys_addr; - uint16_t flags = 0; - TLBEntry *tlb_entry; - - const uint32_t tag = guest_va & ~0xFFFUL; - - /* data address translation if enabled */ - if (ppc_state.msr & 0x10) { - // attempt block address translation first - BATResult bat_res = ppc_block_address_translation(guest_va); - if (bat_res.hit) { - // check block protection - if (!bat_res.prot || ((bat_res.prot & 1) && is_write)) { - LOG_F(9, "BAT DSI exception in TLB2 refill!"); - LOG_F(9, "Attempt to write to read-only region, LA=0x%08X, PC=0x%08X!", guest_va, ppc_state.pc); - ppc_state.spr[SPR::DSISR] = 0x08000000 | (is_write << 25); - ppc_state.spr[SPR::DAR] = guest_va; - mmu_exception_handler(Except_Type::EXC_DSI, 0); - } - phys_addr = bat_res.phys; - flags = TLBFlags::PTE_SET_C; // prevent PTE.C updates for BAT - flags |= TLBFlags::TLBE_FROM_BAT; // tell the world we come from - if (bat_res.prot == 2) { - flags |= TLBFlags::PAGE_WRITABLE; - } - } else { - // page address translation - PATResult pat_res = page_address_translation(guest_va, false, - !!(ppc_state.msr & 0x4000), is_write); - phys_addr = pat_res.phys; - flags = TLBFlags::TLBE_FROM_PAT; // tell the world we come from - if (pat_res.prot <= 2 || pat_res.prot == 6) { - flags |= TLBFlags::PAGE_WRITABLE; - } - if (is_write || pat_res.pte_c_status) { - // C-bit of the PTE is already set so the TLB logic - // doesn't need to update it anymore - flags |= TLBFlags::PTE_SET_C; - } - } - } else { // data translation disabled - phys_addr = guest_va; - flags = TLBFlags::PTE_SET_C; // no PTE.C updates in real addressing mode - flags |= TLBFlags::PAGE_WRITABLE; // assume physical pages are writable - } - - // look up host virtual address - AddressMapEntry* reg_desc = mem_ctrl_instance->find_range(phys_addr); - if (reg_desc) { - // refill the secondary TLB - tlb_entry = tlb2_target_entry(tag); - tlb_entry->tag = tag; - if (reg_desc->type & RT_MMIO) { // MMIO region - tlb_entry->flags = flags | TLBFlags::PAGE_IO; - tlb_entry->reg_desc = reg_desc; - } else { // memory region backed by host memory - tlb_entry->flags = flags | TLBFlags::PAGE_MEM; - tlb_entry->host_va_offset = (int64_t)reg_desc->mem_ptr - guest_va + - (phys_addr - reg_desc->start); - } - return tlb_entry; - } else { - LOG_F(ERROR, "Read from unmapped memory at 0x%08X!\n", phys_addr); - UnmappedMem.tag = tag; - UnmappedMem.host_va_offset = (int64_t)(&UnmappedVal) - guest_va; - return &UnmappedMem; - } -} - -void tlb_flush_entry(uint32_t ea) -{ - TLBEntry *tlb_entry, *tlb1, *tlb2; - - const uint32_t tag = ea & ~0xFFFUL; - - for (int m = 0; m < 6; m++) { - switch (m) { - case 0: - tlb1 = &itlb1_mode1[0]; - tlb2 = &itlb2_mode1[0]; - break; - case 1: - tlb1 = &itlb1_mode2[0]; - tlb2 = &itlb2_mode2[0]; - break; - case 2: - tlb1 = &itlb1_mode3[0]; - tlb2 = &itlb2_mode3[0]; - break; - case 3: - tlb1 = &dtlb1_mode1[0]; - tlb2 = &dtlb2_mode1[0]; - break; - case 4: - tlb1 = &dtlb1_mode2[0]; - tlb2 = &dtlb2_mode2[0]; - break; - case 5: - tlb1 = &dtlb1_mode3[0]; - tlb2 = &dtlb2_mode3[0]; - break; - } - - // flush primary TLB - tlb_entry = &tlb1[(ea >> PAGE_SIZE_BITS) & tlb_size_mask]; - if (tlb_entry->tag == tag) { - tlb_entry->tag = TLB_INVALID_TAG; - //LOG_F(INFO, "Invalidated primary TLB entry at 0x%X", ea); - } - - // flush secondary TLB - tlb_entry = &tlb2[((ea >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; - for (int i = 0; i < TLB2_WAYS; i++) { - if (tlb_entry[i].tag == tag) { - tlb_entry[i].tag = TLB_INVALID_TAG; - //LOG_F(INFO, "Invalidated secondary TLB entry at 0x%X", ea); - } - } - } -} - -void tlb_flush_entries(TLBFlags type) -{ - int i; - - // Flush BAT entries from the primary TLBs - for (i = 0; i < TLB_SIZE; i++) { - if (dtlb1_mode2[i].flags & type) { - dtlb1_mode2[i].tag = TLB_INVALID_TAG; - } - - if (dtlb1_mode3[i].flags & type) { - dtlb1_mode3[i].tag = TLB_INVALID_TAG; - } - } - - // Flush BAT entries from the secondary TLBs - for (i = 0; i < TLB_SIZE * TLB2_WAYS; i++) { - if (dtlb2_mode2[i].flags & type) { - dtlb2_mode2[i].tag = TLB_INVALID_TAG; - } - - if (dtlb2_mode3[i].flags & type) { - dtlb2_mode3[i].tag = TLB_INVALID_TAG; - } - } -} - -void tlb_flush_bat_entries() -{ - if (!gTLBFlushBatEntries) - return; - - tlb_flush_entries(TLBE_FROM_BAT); - - gTLBFlushBatEntries = false; -} - -void tlb_flush_pat_entries() -{ - if (!gTLBFlushPatEntries) - return; - - tlb_flush_entries(TLBE_FROM_PAT); - - gTLBFlushPatEntries = false; -} - static inline uint64_t tlb_translate_addr(uint32_t guest_va) { TLBEntry *tlb1_entry, *tlb2_entry; @@ -1181,306 +1545,6 @@ static uint32_t mem_grab_unaligned(uint32_t addr, uint32_t size) { return ret; } -template -static inline TLBEntry* lookup_secondary_tlb(uint32_t guest_va, uint32_t tag) { - TLBEntry *tlb_entry; - - if (tlb_type == TLBType::ITLB) { - tlb_entry = &pCurITLB2[((guest_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; - } else { - tlb_entry = &pCurDTLB2[((guest_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS]; - } - - if (tlb_entry->tag == tag) { - // update LRU bits - tlb_entry[0].lru_bits = 0x3; - tlb_entry[1].lru_bits = 0x2; - tlb_entry[2].lru_bits &= 0x1; - tlb_entry[3].lru_bits &= 0x1; - } else if (tlb_entry[1].tag == tag) { - tlb_entry = &tlb_entry[1]; - // update LRU bits - tlb_entry[0].lru_bits = 0x2; - tlb_entry[1].lru_bits = 0x3; - tlb_entry[2].lru_bits &= 0x1; - tlb_entry[3].lru_bits &= 0x1; - } else if (tlb_entry[2].tag == tag) { - tlb_entry = &tlb_entry[2]; - // update LRU bits - tlb_entry[0].lru_bits &= 0x1; - tlb_entry[1].lru_bits &= 0x1; - tlb_entry[2].lru_bits = 0x3; - tlb_entry[3].lru_bits = 0x2; - } else if (tlb_entry[3].tag == tag) { - tlb_entry = &tlb_entry[3]; - // update LRU bits - tlb_entry[0].lru_bits &= 0x1; - tlb_entry[1].lru_bits &= 0x1; - tlb_entry[2].lru_bits = 0x2; - tlb_entry[3].lru_bits = 0x3; - } else { - return nullptr; - } - return tlb_entry; -} - -// Forward declarations. -static uint32_t read_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t size); -static void write_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t value, - uint32_t size); - -template -inline T mmu_read_vmem(uint32_t guest_va) { - TLBEntry *tlb1_entry, *tlb2_entry; - uint8_t *host_va; - - const uint32_t tag = guest_va & ~0xFFFUL; - - // look up guest virtual address in the primary TLB - tlb1_entry = &pCurDTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; - if (tlb1_entry->tag == tag) { // primary TLB hit -> fast path -#ifdef TLB_PROFILING - num_primary_dtlb_hits++; -#endif - host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); - } else { - // primary TLB miss -> look up address in the secondary TLB - tlb2_entry = lookup_secondary_tlb(guest_va, tag); - if (tlb2_entry == nullptr) { -#ifdef TLB_PROFILING - num_dtlb_refills++; -#endif - // secondary TLB miss -> - // perform full address translation and refill the secondary TLB - tlb2_entry = dtlb2_refill(guest_va, 0); - } -#ifdef TLB_PROFILING - else { - num_secondary_dtlb_hits++; - } -#endif - - if (tlb2_entry->flags & TLBFlags::PAGE_MEM) { // is it a real memory region? - // refill the primary TLB - tlb1_entry->tag = tag; - tlb1_entry->flags = tlb2_entry->flags; - tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; - host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); - } else { // otherwise, it's an access to a memory-mapped device -#ifdef MMU_PROFILING - iomem_reads_total++; -#endif - return ( - tlb2_entry->reg_desc->devobj->read(tlb2_entry->reg_desc->start, - guest_va - tlb2_entry->reg_desc->start, sizeof(T)) - ); - } - } - -#ifdef MMU_PROFILING - dmem_reads_total++; -#endif - - // handle unaligned memory accesses - if (sizeof(T) > 1 && (guest_va & (sizeof(T) - 1))) { - return read_unaligned(guest_va, host_va, sizeof(T)); - } - - // handle aligned memory accesses - switch(sizeof(T)) { - case 1: - return *host_va; - case 2: - return READ_WORD_BE_A(host_va); - case 4: - return READ_DWORD_BE_A(host_va); - case 8: - return READ_QWORD_BE_A(host_va); - } -} - -// explicitely instantiate all required mmu_read_vmem variants -template uint8_t mmu_read_vmem(uint32_t guest_va); -template uint16_t mmu_read_vmem(uint32_t guest_va); -template uint32_t mmu_read_vmem(uint32_t guest_va); -template uint64_t mmu_read_vmem(uint32_t guest_va); - -template -inline void mmu_write_vmem(uint32_t guest_va, T value) { - TLBEntry *tlb1_entry, *tlb2_entry; - uint8_t *host_va; - - const uint32_t tag = guest_va & ~0xFFFUL; - - // look up guest virtual address in the primary TLB - tlb1_entry = &pCurDTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask]; - if (tlb1_entry->tag == tag) { // primary TLB hit -> fast path -#ifdef TLB_PROFILING - num_primary_dtlb_hits++; -#endif - if (!(tlb1_entry->flags & TLBFlags::PAGE_WRITABLE)) { - ppc_state.spr[SPR::DSISR] = 0x08000000 | (1 << 25); - ppc_state.spr[SPR::DAR] = guest_va; - mmu_exception_handler(Except_Type::EXC_DSI, 0); - } - if (!(tlb1_entry->flags & TLBFlags::PTE_SET_C)) { - // perform full page address translation to update PTE.C bit - PATResult pat_res = page_address_translation(guest_va, false, - !!(ppc_state.msr & 0x4000), true); - tlb1_entry->flags |= TLBFlags::PTE_SET_C; - - // don't forget to update the secondary TLB as well - tlb2_entry = lookup_secondary_tlb(guest_va, tag); - if (tlb2_entry != nullptr) { - tlb2_entry->flags |= TLBFlags::PTE_SET_C; - } - } - host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); - } else { - // primary TLB miss -> look up address in the secondary TLB - tlb2_entry = lookup_secondary_tlb(guest_va, tag); - if (tlb2_entry == nullptr) { -#ifdef TLB_PROFILING - num_dtlb_refills++; -#endif - // secondary TLB miss -> - // perform full address translation and refill the secondary TLB - tlb2_entry = dtlb2_refill(guest_va, 1); - } -#ifdef TLB_PROFILING - else { - num_secondary_dtlb_hits++; - } -#endif - - if (!(tlb2_entry->flags & TLBFlags::PAGE_WRITABLE)) { - ppc_state.spr[SPR::DSISR] = 0x08000000 | (1 << 25); - ppc_state.spr[SPR::DAR] = guest_va; - mmu_exception_handler(Except_Type::EXC_DSI, 0); - } - - if (!(tlb2_entry->flags & TLBFlags::PTE_SET_C)) { - // perform full page address translation to update PTE.C bit - PATResult pat_res = page_address_translation(guest_va, false, - !!(ppc_state.msr & 0x4000), true); - tlb2_entry->flags |= TLBFlags::PTE_SET_C; - } - - if (tlb2_entry->flags & TLBFlags::PAGE_MEM) { // is it a real memory region? - // refill the primary TLB - tlb1_entry->tag = tag; - tlb1_entry->flags = tlb2_entry->flags; - tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; - host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va); - } else { // otherwise, it's an access to a memory-mapped device -#ifdef MMU_PROFILING - iomem_writes_total++; -#endif - tlb2_entry->reg_desc->devobj->write(tlb2_entry->reg_desc->start, - guest_va - tlb2_entry->reg_desc->start, value, sizeof(T)); - return; - } - } - -#ifdef MMU_PROFILING - dmem_writes_total++; -#endif - - // handle unaligned memory accesses - if (sizeof(T) > 1 && (guest_va & (sizeof(T) - 1))) { - write_unaligned(guest_va, host_va, value, sizeof(T)); - return; - } - - // handle aligned memory accesses - switch(sizeof(T)) { - case 1: - *host_va = value; - break; - case 2: - WRITE_WORD_BE_A(host_va, value); - break; - case 4: - WRITE_DWORD_BE_A(host_va, value); - break; - case 8: - WRITE_QWORD_BE_A(host_va, value); - break; - } -} - -// explicitely instantiate all required mmu_write_vmem variants -template void mmu_write_vmem(uint32_t guest_va, uint8_t value); -template void mmu_write_vmem(uint32_t guest_va, uint16_t value); -template void mmu_write_vmem(uint32_t guest_va, uint32_t value); -template void mmu_write_vmem(uint32_t guest_va, uint64_t value); - -static uint32_t read_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t size) -{ - uint32_t result = 0; - - // is it a misaligned cross-page read? - if (((guest_va & 0xFFF) + size) > 0x1000) { -#ifdef MMU_PROFILING - unaligned_crossp_r++; -#endif - // Break such a memory access into multiple, bytewise accesses. - // Because such accesses suffer a performance penalty, they will be - // presumably very rare so don't waste time optimizing the code below. - for (int i = 0; i < size; guest_va++, i++) { - result = (result << 8) | mmu_read_vmem(guest_va); - } - } else { -#ifdef MMU_PROFILING - unaligned_reads++; -#endif - switch(size) { - case 2: - return READ_WORD_BE_U(host_va); - case 4: - return READ_DWORD_BE_U(host_va); - case 8: // FIXME: should we raise alignment exception here? - return READ_QWORD_BE_U(host_va); - } - } - return result; -} - -static void write_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t value, - uint32_t size) -{ - // is it a misaligned cross-page write? - if (((guest_va & 0xFFF) + size) > 0x1000) { -#ifdef MMU_PROFILING - unaligned_crossp_w++; -#endif - // Break such a memory access into multiple, bytewise accesses. - // Because such accesses suffer a performance penalty, they will be - // presumably very rare so don't waste time optimizing the code below. - - uint32_t shift = (size - 1) * 8; - - for (int i = 0; i < size; shift -= 8, guest_va++, i++) { - mmu_write_vmem(guest_va, (value >> shift) & 0xFF); - } - } else { -#ifdef MMU_PROFILING - unaligned_writes++; -#endif - switch(size) { - case 2: - WRITE_WORD_BE_U(host_va, value); - break; - case 4: - WRITE_DWORD_BE_U(host_va, value); - break; - case 8: // FIXME: should we raise alignment exception here? - WRITE_QWORD_BE_U(host_va, value); - break; - } - } -} - void mem_write_byte(uint32_t addr, uint8_t value) { mmu_write_vmem(addr, value); @@ -1596,52 +1660,6 @@ uint64_t mem_grab_qword(uint32_t addr) { return read_phys_mem(&last_read_area, addr); } -uint8_t *mmu_translate_imem(uint32_t vaddr) -{ - TLBEntry *tlb1_entry, *tlb2_entry; - uint8_t *host_va; - -#ifdef MMU_PROFILING - exec_reads_total++; -#endif - - const uint32_t tag = vaddr & ~0xFFFUL; - - // look up guest virtual address in the primary ITLB - tlb1_entry = &pCurITLB1[(vaddr >> PAGE_SIZE_BITS) & tlb_size_mask]; - if (tlb1_entry->tag == tag) { // primary ITLB hit -> fast path -#ifdef TLB_PROFILING - num_primary_itlb_hits++; -#endif - host_va = (uint8_t *)(tlb1_entry->host_va_offset + vaddr); - } else { - // primary ITLB miss -> look up address in the secondary ITLB - tlb2_entry = lookup_secondary_tlb(vaddr, tag); - if (tlb2_entry == nullptr) { -#ifdef TLB_PROFILING - num_itlb_refills++; -#endif - // secondary ITLB miss -> - // perform full address translation and refill the secondary ITLB - tlb2_entry = itlb2_refill(vaddr); - } -#ifdef TLB_PROFILING - else { - num_secondary_itlb_hits++; - } -#endif - // refill the primary ITLB - tlb1_entry->tag = tag; - tlb1_entry->flags = tlb2_entry->flags; - tlb1_entry->host_va_offset = tlb2_entry->host_va_offset; - host_va = (uint8_t *)(tlb1_entry->host_va_offset + vaddr); - } - - ppc_set_cur_instruction(host_va); - - return host_va; -} - uint8_t* quickinstruction_translate(uint32_t addr) { uint8_t* real_addr; @@ -1672,6 +1690,7 @@ uint8_t* quickinstruction_translate(uint32_t addr) { return real_addr; } +#endif uint64_t mem_read_dbg(uint32_t virt_addr, uint32_t size) { uint32_t save_dsisr, save_dar; @@ -1685,19 +1704,19 @@ uint64_t mem_read_dbg(uint32_t virt_addr, uint32_t size) { try { switch (size) { case 1: - ret_val = mem_grab_byte(virt_addr); + ret_val = mmu_read_vmem(virt_addr); break; case 2: - ret_val = mem_grab_word(virt_addr); + ret_val = mmu_read_vmem(virt_addr); break; case 4: - ret_val = mem_grab_dword(virt_addr); + ret_val = mmu_read_vmem(virt_addr); break; case 8: - ret_val = mem_grab_qword(virt_addr); + ret_val = mmu_read_vmem(virt_addr); break; default: - ret_val = mem_grab_byte(virt_addr); + ret_val = mmu_read_vmem(virt_addr); } } catch (std::invalid_argument& exc) { /* restore MMU-related CPU state */ diff --git a/cpu/ppc/ppcmmu.h b/cpu/ppc/ppcmmu.h index b9dc5fa..649e927 100644 --- a/cpu/ppc/ppcmmu.h +++ b/cpu/ppc/ppcmmu.h @@ -101,6 +101,16 @@ extern void mmu_pat_ctx_changed(); extern void tlb_flush_entry(uint32_t ea); extern void ppc_set_cur_instruction(const uint8_t* ptr); +extern uint64_t mem_read_dbg(uint32_t virt_addr, uint32_t size); +uint8_t *mmu_translate_imem(uint32_t vaddr); + +template +extern T mmu_read_vmem(uint32_t guest_va); +template +extern void mmu_write_vmem(uint32_t guest_va, T value); + +//====================== Deprecated calls ========================= +#if 0 extern void mem_write_byte(uint32_t addr, uint8_t value); extern void mem_write_word(uint32_t addr, uint16_t value); extern void mem_write_dword(uint32_t addr, uint32_t value); @@ -109,14 +119,7 @@ extern uint8_t mem_grab_byte(uint32_t addr); extern uint16_t mem_grab_word(uint32_t addr); extern uint32_t mem_grab_dword(uint32_t addr); extern uint64_t mem_grab_qword(uint32_t addr); -extern uint64_t mem_read_dbg(uint32_t virt_addr, uint32_t size); extern uint8_t* quickinstruction_translate(uint32_t address_grab); - -uint8_t *mmu_translate_imem(uint32_t vaddr); - -template -extern T mmu_read_vmem(uint32_t guest_va); -template -extern void mmu_write_vmem(uint32_t guest_va, T value); +#endif #endif // PPCMMU_H diff --git a/debugger/debugger.cpp b/debugger/debugger.cpp index 983d824..3af759b 100644 --- a/debugger/debugger.cpp +++ b/debugger/debugger.cpp @@ -151,7 +151,7 @@ void exec_single_68k() /* calculate address of the current opcode table entry as follows: get_word(68k_PC) * entry_size + table_base */ - cur_instr_tab_entry = mem_grab_word(cur_68k_pc) * 8 + emu_table_virt; + cur_instr_tab_entry = mmu_read_vmem(cur_68k_pc) * 8 + emu_table_virt; /* grab the PPC PC too */ reg = "PC"; From 40d0aa70da6e95aba8a6871d50c4c988cb5dfc5c Mon Sep 17 00:00:00 2001 From: Maxim Poliakovski Date: Tue, 3 Aug 2021 16:03:03 +0200 Subject: [PATCH 14/14] zdocs: improve MMU emulation documentation. --- zdocs/cpu/powerpc/mmu.md | 23 ++++++++ zdocs/cpu/powerpc/mmuemu.md | 107 ++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 zdocs/cpu/powerpc/mmu.md create mode 100644 zdocs/cpu/powerpc/mmuemu.md diff --git a/zdocs/cpu/powerpc/mmu.md b/zdocs/cpu/powerpc/mmu.md new file mode 100644 index 0000000..399ac36 --- /dev/null +++ b/zdocs/cpu/powerpc/mmu.md @@ -0,0 +1,23 @@ +## Disabling BAT translation + +BAT translation can be disabled by invalidating BAT registers. This is somewhat CPU specific. +MPC601 implements its own format for BAT registers that differs from the PowerPC specification. + +MPC601-specific lower BAT registers has the "V" bit. If it's cleared, the corresponding BAT pair +is invalid and won't be used for address translation. To invalidate BATs on MPC601, it's enough +to write NULL to lower BAT registers. That's exactly what PowerMac 6100 ROM does: + ``` +li r0, 0 +mtspr ibat0l, r0 +mtspr ibat1l, r0 +mtspr ibat2l, r0 +``` + +PowerPC CPUs starting with 603 uses the BAT register format described in the PowerPC specification. +The upper BAT registers contain two bits: Vs (supervisor state valid bit) and Vp (problem/user state valid bit). +PowerPC Architecture First Edition from 1993 gives the following code: + +```BAT_entry_valid = (Vs & ~MSR_PR) | (Vp & MSR_PR)``` + +If neither Vs nor Vp is set, the corresponding BAT pair isn't valid and doesn't participate in address translation. +To invalidate BATs on non-601, it's sufficient to set the upper BAT register to 0x00000000. diff --git a/zdocs/cpu/powerpc/mmuemu.md b/zdocs/cpu/powerpc/mmuemu.md new file mode 100644 index 0000000..4e6e988 --- /dev/null +++ b/zdocs/cpu/powerpc/mmuemu.md @@ -0,0 +1,107 @@ +# PowerPC Memory Management Unit Emulation + +Emulation of a [memory management unit](https://en.wikipedia.org/wiki/Memory_management_unit) +(MMU) in a full system emulator is considered a hard task. The biggest challenge is to do it fast. + +In this article, I'm going to present a solution for a reasonably fast emulation +of the PowerPC MMU. + +This article is based on ideas presented in the paper "Optimizing Memory Emulation +in Full System Emulators" by Xin Tong and Motohiro Kawahito (IBM Research Laboratory). + +## PowerPC MMU operation + +The operation of the PowerPC MMU can be described using the following pseudocode: + +``` +VA is the virtual address of some memory to be accessed +PA is the physical address of some memory translated by the MMU +AT is access type we want to perform + +if address translation is enabled: + PA = block_address_translation(VA) + if not PA: + PA = page_address_translation(VA) +else: + PA = VA + +if access_permitted(PA, AT): + perfom_phys_access(PA) +else: + generate_mmu_exception(VA, PA, AT) +``` + +A HW MMU usually performs several operations in a parallel fashion so the final +address translation and memory access only take a few CPU cycles. +The slowest part is the page address translation because it requires accessing +system memory that is usually an order of magnitudes slower than the CPU. +To mitigate this, a PowerPC CPU includes some very fast on-chip memory used for +building various [caches](https://en.wikipedia.org/wiki/CPU_cache) like +instruction/data cache as well as +[translation lookaside buffers (TLB)](https://en.wikipedia.org/wiki/Translation_lookaside_buffer). + +## PowerPC MMU emulation + +### Issues + +An attempt to mimic the HW MMU operation in software will likely have a poor +performance. That's because modern hardware can perform several tasks in parallel. +However, software has to do almost everything serially. Thus, accessing some memory +with address translation enabled can take up to 300 host instructions! Considering +the fact that every 10th instruction is a load and every 15th instruction is a store, +it will be nearly impossible to achieve a performance comparable to that of the +original system. + +Off-loading some operations to the MMU of the host CPU for speeding up emulation +isn't feasible because Apple's computers often have hardware being accessed like an +usual memory. Thus, an emulator needs to distinguish between accesses to real memory +(ROM or RAM) from accesses to memory-mapped peripheral devices. The only way to +do that is to maintain special software descriptors for each virtual memory region +and consult them on each memory access. + +### Solution + +My solution for a reasonable fast MMU emulation employs a software TLB. It's +used for all memory accesses even when address translation is disabled. + +The first stage of the SoftTLB uses a +[direct-mapped cache](https://en.wikipedia.org/wiki/Cache_placement_policies#Direct-mapped_cache) +called **primary TLB** in my implementation. That's because this kind of cache +is the fastest one - one lookup requires up to 15 host instructions. Unfortunately, +this cache is not enough to cover all memory accesses due to a high number of +collisions, i.e. when several distinct memory pages are mapped to the same cache +location. + +That's why, the so-called **secondary TLB** was introduced. Secondary TLB is a +4-way fully associative cache. A lookup in this cache is slower than a lookup in the +primary TLB. But it's still much faster than performing a full page table walk +requiring hundreds of host instructions. + +All translations for memory-mapped I/O go into the secondary TLB because accesses +to such devices tend to be slower than real memory accesses in the real HW anyways. +Moreover, they usually bypass CPU caches (cache-inhibited accesses). But there +are exceptions from this rule, for example, video memory. + +When no translation for a virtual address was found in either cache, a full address +translation including the full page table walk is performed. This path is the +slowest one. Fortunately, the probabilty that this path will be taken seems to be +very low. + +The complete algorithm looks like that: +``` +VA is the virtual address of some memory to be accessed +PA is the physical address of some memory translated by the MMU +AT is access type we want to perform + +PA = lookup_primary_tlb(VA) +if VA in the primary TLB: + perform_memory_access(PA, ART) +else: + PA = lookup_secondary_tlb(VA) + if VA not in the secondary TLB: + PA = perform_full_address_translation(VA) + refill_secondary_tlb(VA, PA) + if is_real_memory(PA): + refill_primary_tlb(VA, PA) + perform_memory_access(PA, ART) +```