Make emulated memory loads to use SoftTLB.

This commit is contained in:
Maxim Poliakovski 2021-06-20 22:33:03 +02:00
parent 592d32017e
commit 4da95a66d7
4 changed files with 179 additions and 27 deletions

View File

@ -776,7 +776,8 @@ void dppc_interpreter::ppc_lfd() {
ppc_grab_regsfpdia();
ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF));
ppc_effective_address += (reg_a) ? val_reg_a : 0;
ppc_result64_d = mem_grab_qword(ppc_effective_address);
//ppc_result64_d = mem_grab_qword(ppc_effective_address);
ppc_result64_d = mmu_read_vmem<uint64_t>(ppc_effective_address);
ppc_store_dfpresult_int(reg_d);
}
@ -785,7 +786,8 @@ void dppc_interpreter::ppc_lfdu() {
if (reg_a != 0) {
ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF));
ppc_effective_address += val_reg_a;
ppc_result64_d = mem_grab_qword(ppc_effective_address);
//ppc_result64_d = mem_grab_qword(ppc_effective_address);
ppc_result64_d = mmu_read_vmem<uint64_t>(ppc_effective_address);
ppc_store_dfpresult_int(reg_d);
ppc_state.gpr[reg_a] = ppc_effective_address;
} else {
@ -796,7 +798,8 @@ void dppc_interpreter::ppc_lfdu() {
void dppc_interpreter::ppc_lfdx() {
ppc_grab_regsfpdiab();
ppc_effective_address = (reg_a) ? val_reg_a + val_reg_b : val_reg_b;
ppc_result64_d = mem_grab_qword(ppc_effective_address);
//ppc_result64_d = mem_grab_qword(ppc_effective_address);
ppc_result64_d = mmu_read_vmem<uint64_t>(ppc_effective_address);
ppc_store_dfpresult_int(reg_d);
}
@ -804,7 +807,8 @@ void dppc_interpreter::ppc_lfdux() {
ppc_grab_regsfpdiab();
if (reg_a) {
ppc_effective_address = val_reg_a + val_reg_b;
ppc_result64_d = mem_grab_qword(ppc_effective_address);
//ppc_result64_d = mem_grab_qword(ppc_effective_address);
ppc_result64_d = mmu_read_vmem<uint64_t>(ppc_effective_address);
ppc_store_dfpresult_int(reg_d);
ppc_state.gpr[reg_a] = ppc_effective_address;
} else {

View File

@ -1019,6 +1019,129 @@ static uint32_t mem_grab_unaligned(uint32_t addr, uint32_t size) {
return ret;
}
static inline TLBEntry * lookup_secondary_tlb(uint32_t guest_va, uint32_t tag) {
TLBEntry *tlb_entry;
tlb_entry = &pCurTLB2[((guest_va >> PAGE_SIZE_BITS) & tlb_size_mask) * TLB2_WAYS];
if (tlb_entry->tag == tag) {
// update LRU bits
tlb_entry[0].lru_bits = 0x3;
tlb_entry[1].lru_bits = 0x2;
tlb_entry[2].lru_bits &= 0x1;
tlb_entry[3].lru_bits &= 0x1;
} else if (tlb_entry[1].tag == tag) {
tlb_entry = &tlb_entry[1];
// update LRU bits
tlb_entry[0].lru_bits = 0x2;
tlb_entry[1].lru_bits = 0x3;
tlb_entry[2].lru_bits &= 0x1;
tlb_entry[3].lru_bits &= 0x1;
} else if (tlb_entry[2].tag == tag) {
tlb_entry = &tlb_entry[2];
// update LRU bits
tlb_entry[0].lru_bits &= 0x1;
tlb_entry[1].lru_bits &= 0x1;
tlb_entry[2].lru_bits = 0x3;
tlb_entry[3].lru_bits = 0x2;
} else if (tlb_entry[3].tag == tag) {
tlb_entry = &tlb_entry[3];
// update LRU bits
tlb_entry[0].lru_bits &= 0x1;
tlb_entry[1].lru_bits &= 0x1;
tlb_entry[2].lru_bits = 0x2;
tlb_entry[3].lru_bits = 0x3;
} else {
return nullptr;
}
return tlb_entry;
}
static uint32_t read_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t size);
template <class T>
inline T mmu_read_vmem(uint32_t guest_va) {
TLBEntry *tlb1_entry, *tlb2_entry;
uint8_t *host_va;
const uint32_t tag = guest_va & ~0xFFFUL;
// look up guest virtual address in the primary TLB
tlb1_entry = &pCurTLB1[(guest_va >> PAGE_SIZE_BITS) & tlb_size_mask];
if (tlb1_entry->tag == tag) { // primary TLB hit -> fast path
host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va);
} else {
// primary TLB miss -> look up address in the secondary TLB
tlb2_entry = lookup_secondary_tlb(guest_va, tag);
if (tlb2_entry == nullptr) {
// secondary TLB miss ->
// perform full address translation and refill the secondary TLB
tlb2_entry = tlb2_refill(guest_va, 0);
}
if (tlb2_entry->flags & 1) { // is it a real memory region?
// refill the primary TLB
tlb1_entry->tag = tag;
tlb1_entry->flags = 1;
tlb1_entry->host_va_offset = tlb2_entry->host_va_offset;
host_va = (uint8_t *)(tlb1_entry->host_va_offset + guest_va);
} else { // otherwise, it's an access to a memory-mapped device
return (
tlb2_entry->reg_desc->devobj->read(tlb2_entry->reg_desc->start,
guest_va - tlb2_entry->reg_desc->start, sizeof(T))
);
}
}
// handle unaligned memory accesses
if (sizeof(T) > 1 && (guest_va & (sizeof(T) - 1))) {
return read_unaligned(guest_va, host_va, sizeof(T));
}
// handle aligned memory accesses
switch(sizeof(T)) {
case 1:
return *host_va;
case 2:
return READ_WORD_BE_A(host_va);
case 4:
return READ_DWORD_BE_A(host_va);
case 8:
return READ_QWORD_BE_A(host_va);
}
}
// explicitely instantiate all required mmu_read_vmem variants
// to avoid linking errors
template uint8_t mmu_read_vmem<uint8_t>(uint32_t guest_va);
template uint16_t mmu_read_vmem<uint16_t>(uint32_t guest_va);
template uint32_t mmu_read_vmem<uint32_t>(uint32_t guest_va);
template uint64_t mmu_read_vmem<uint64_t>(uint32_t guest_va);
static uint32_t read_unaligned(uint32_t guest_va, uint8_t *host_va, uint32_t size)
{
uint32_t result = 0;
// is it a misaligned cross-page read?
if (((guest_va & 0xFFF) + size) > 0x1000) {
// Break such a memory access into multiple, bytewise accesses.
// Because such accesses suffer a performance penalty, they will be
// presumably very rare so don't waste time optimizing the code below.
for (int i = 0; i < size; guest_va++, i++) {
result = (result << 8) | mmu_read_vmem<uint8_t>(guest_va);
}
} else {
switch(size) {
case 2:
return READ_WORD_BE_U(host_va);
case 4:
return READ_DWORD_BE_U(host_va);
case 8: // FIXME: should we raise alignment exception here?
return READ_QWORD_BE_U(host_va);
}
}
return result;
}
/** Grab a value from memory into a register */
uint8_t mem_grab_byte(uint32_t addr) {
tlb_translate_addr(addr);

View File

@ -74,4 +74,7 @@ extern uint64_t mem_grab_qword(uint32_t addr);
extern uint64_t mem_read_dbg(uint32_t virt_addr, uint32_t size);
extern uint8_t* quickinstruction_translate(uint32_t address_grab);
template <class T>
extern inline T mmu_read_vmem(uint32_t guest_va);
#endif // PPCMEMORY_H

View File

@ -1580,7 +1580,8 @@ void dppc_interpreter::ppc_lbz() {
ppc_grab_regsda();
ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF));
ppc_effective_address += (reg_a > 0) ? ppc_result_a : 0;
ppc_result_d = mem_grab_byte(ppc_effective_address);
//ppc_result_d = mem_grab_byte(ppc_effective_address);
ppc_result_d = mmu_read_vmem<uint8_t>(ppc_effective_address);
ppc_store_result_regd();
}
@ -1592,7 +1593,8 @@ void dppc_interpreter::ppc_lbzu() {
ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF));
if ((reg_a != reg_d) || reg_a != 0) {
ppc_effective_address += ppc_result_a;
ppc_result_d = mem_grab_byte(ppc_effective_address);
//ppc_result_d = mem_grab_byte(ppc_effective_address);
ppc_result_d = mmu_read_vmem<uint8_t>(ppc_effective_address);
ppc_result_a = ppc_effective_address;
ppc_store_result_regd();
ppc_store_result_rega();
@ -1607,7 +1609,8 @@ void dppc_interpreter::ppc_lbzx() {
#endif
ppc_grab_regsdab();
ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b;
ppc_result_d = mem_grab_byte(ppc_effective_address);
//ppc_result_d = mem_grab_byte(ppc_effective_address);
ppc_result_d = mmu_read_vmem<uint8_t>(ppc_effective_address);
ppc_store_result_regd();
}
@ -1618,7 +1621,8 @@ void dppc_interpreter::ppc_lbzux() {
ppc_grab_regsdab();
if ((reg_a != reg_d) || reg_a != 0) {
ppc_effective_address = ppc_result_a + ppc_result_b;
ppc_result_d = mem_grab_byte(ppc_effective_address);
//ppc_result_d = mem_grab_byte(ppc_effective_address);
ppc_result_d = mmu_read_vmem<uint8_t>(ppc_effective_address);
ppc_result_a = ppc_effective_address;
ppc_store_result_regd();
ppc_store_result_rega();
@ -1635,7 +1639,8 @@ void dppc_interpreter::ppc_lhz() {
ppc_grab_regsda();
ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF));
ppc_effective_address += reg_a ? ppc_result_a : 0;
ppc_result_d = mem_grab_word(ppc_effective_address);
//ppc_result_d = mem_grab_word(ppc_effective_address);
ppc_result_d = mmu_read_vmem<uint16_t>(ppc_effective_address);
ppc_store_result_regd();
}
@ -1647,7 +1652,8 @@ void dppc_interpreter::ppc_lhzu() {
if ((reg_a != reg_d) || reg_a != 0) {
ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF));
ppc_effective_address += ppc_result_a;
ppc_result_d = mem_grab_word(ppc_effective_address);
//ppc_result_d = mem_grab_word(ppc_effective_address);
ppc_result_d = mmu_read_vmem<uint16_t>(ppc_effective_address);
ppc_result_a = ppc_effective_address;
ppc_store_result_regd();
ppc_store_result_rega();
@ -1662,7 +1668,8 @@ void dppc_interpreter::ppc_lhzx() {
#endif
ppc_grab_regsdab();
ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b;
ppc_result_d = mem_grab_word(ppc_effective_address);
//ppc_result_d = mem_grab_word(ppc_effective_address);
ppc_result_d = mmu_read_vmem<uint16_t>(ppc_effective_address);
ppc_store_result_regd();
}
@ -1673,8 +1680,9 @@ void dppc_interpreter::ppc_lhzux() {
ppc_grab_regsdab();
if ((reg_a != reg_d) || reg_a != 0) {
ppc_effective_address = ppc_result_a + ppc_result_b;
ppc_result_d = mem_grab_word(ppc_effective_address);
ppc_result_a = ppc_effective_address;
//ppc_result_d = mem_grab_word(ppc_effective_address);
ppc_result_d = mmu_read_vmem<uint16_t>(ppc_effective_address);
ppc_result_a = ppc_effective_address;
ppc_store_result_regd();
ppc_store_result_rega();
} else {
@ -1689,7 +1697,8 @@ void dppc_interpreter::ppc_lha() {
ppc_grab_regsda();
ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF));
ppc_effective_address += (reg_a > 0) ? ppc_result_a : 0;
uint16_t val = mem_grab_word(ppc_effective_address);
//uint16_t val = mem_grab_word(ppc_effective_address);
uint16_t val = mmu_read_vmem<uint16_t>(ppc_effective_address);
if (val & 0x8000) {
ppc_result_d = 0xFFFF0000UL | (uint32_t)val;
} else {
@ -1706,7 +1715,8 @@ void dppc_interpreter::ppc_lhau() {
if ((reg_a != reg_d) || reg_a != 0) {
ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF));
ppc_effective_address += ppc_result_a;
uint16_t val = mem_grab_word(ppc_effective_address);
//uint16_t val = mem_grab_word(ppc_effective_address);
uint16_t val = mmu_read_vmem<uint16_t>(ppc_effective_address);
if (val & 0x8000) {
ppc_result_d = 0xFFFF0000UL | (uint32_t)val;
} else {
@ -1726,7 +1736,8 @@ void dppc_interpreter::ppc_lhaux() {
#endif
ppc_grab_regsdab();
ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b;
uint16_t val = mem_grab_word(ppc_effective_address);
//uint16_t val = mem_grab_word(ppc_effective_address);
uint16_t val = mmu_read_vmem<uint16_t>(ppc_effective_address);
if (val & 0x8000) {
ppc_result_d = 0xFFFF0000UL | (uint32_t)val;
} else {
@ -1743,7 +1754,8 @@ void dppc_interpreter::ppc_lhax() {
#endif
ppc_grab_regsdab();
ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b;
uint16_t val = mem_grab_word(ppc_effective_address);
//uint16_t val = mem_grab_word(ppc_effective_address);
uint16_t val = mmu_read_vmem<uint16_t>(ppc_effective_address);
if (val & 0x8000) {
ppc_result_d = 0xFFFF0000UL | (uint32_t)val;
} else {
@ -1758,7 +1770,8 @@ void dppc_interpreter::ppc_lhbrx() {
#endif
ppc_grab_regsdab();
ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b;
ppc_result_d = (uint32_t)(BYTESWAP_16(mem_grab_word(ppc_effective_address)));
//ppc_result_d = (uint32_t)(BYTESWAP_16(mem_grab_word(ppc_effective_address)));
ppc_result_d = (uint32_t)(BYTESWAP_16(mmu_read_vmem<uint16_t>(ppc_effective_address)));
ppc_store_result_regd();
}
@ -1769,7 +1782,8 @@ void dppc_interpreter::ppc_lwz() {
ppc_grab_regsda();
ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF));
ppc_effective_address += (reg_a > 0) ? ppc_result_a : 0;
ppc_result_d = mem_grab_dword(ppc_effective_address);
//ppc_result_d = mem_grab_dword(ppc_effective_address);
ppc_result_d = mmu_read_vmem<uint32_t>(ppc_effective_address);
ppc_store_result_regd();
}
@ -1779,7 +1793,8 @@ void dppc_interpreter::ppc_lwbrx() {
#endif
ppc_grab_regsdab();
ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b;
ppc_result_d = BYTESWAP_32(mem_grab_dword(ppc_effective_address));
//ppc_result_d = BYTESWAP_32(mem_grab_dword(ppc_effective_address));
ppc_result_d = BYTESWAP_32(mmu_read_vmem<uint32_t>(ppc_effective_address));
ppc_store_result_regd();
}
@ -1791,7 +1806,8 @@ void dppc_interpreter::ppc_lwzu() {
ppc_effective_address = (int32_t)((int16_t)(ppc_cur_instruction & 0xFFFF));
if ((reg_a != reg_d) || reg_a != 0) {
ppc_effective_address += ppc_result_a;
ppc_result_d = mem_grab_dword(ppc_effective_address);
//ppc_result_d = mem_grab_dword(ppc_effective_address);
ppc_result_d = mmu_read_vmem<uint32_t>(ppc_effective_address);
ppc_store_result_regd();
ppc_result_a = ppc_effective_address;
ppc_store_result_rega();
@ -1806,7 +1822,8 @@ void dppc_interpreter::ppc_lwzx() {
#endif
ppc_grab_regsdab();
ppc_effective_address = reg_a ? (ppc_result_a + ppc_result_b) : ppc_result_b;
ppc_result_d = mem_grab_dword(ppc_effective_address);
//ppc_result_d = mem_grab_dword(ppc_effective_address);
ppc_result_d = mmu_read_vmem<uint32_t>(ppc_effective_address);
ppc_store_result_regd();
}
@ -1820,7 +1837,8 @@ void dppc_interpreter::ppc_lwzux() {
} else {
ppc_exception_handler(Except_Type::EXC_PROGRAM, Exc_Cause::ILLEGAL_OP);
}
ppc_result_d = mem_grab_dword(ppc_effective_address);
//ppc_result_d = mem_grab_dword(ppc_effective_address);
ppc_result_d = mmu_read_vmem<uint32_t>(ppc_effective_address);
ppc_result_a = ppc_effective_address;
ppc_store_result_regd();
ppc_store_result_rega();
@ -1834,7 +1852,8 @@ void dppc_interpreter::ppc_lwarx() {
ppc_grab_regsdab();
ppc_effective_address = (reg_a == 0) ? ppc_result_b : (ppc_result_a + ppc_result_b);
ppc_state.reserve = true;
ppc_result_d = mem_grab_dword(ppc_effective_address);
//ppc_result_d = mem_grab_dword(ppc_effective_address);
ppc_result_d = mmu_read_vmem<uint32_t>(ppc_effective_address);
ppc_store_result_regd();
}
@ -1847,7 +1866,8 @@ void dppc_interpreter::ppc_lmw() {
ppc_effective_address += (reg_a > 0) ? ppc_result_a : 0;
// How many words to load in memory - using a do-while for this
do {
ppc_state.gpr[reg_d] = mem_grab_dword(ppc_effective_address);
//ppc_state.gpr[reg_d] = mem_grab_dword(ppc_effective_address);
ppc_state.gpr[reg_d] = mmu_read_vmem<uint32_t>(ppc_effective_address);
ppc_effective_address += 4;
reg_d++;
} while (reg_d < 32);
@ -1884,7 +1904,8 @@ void dppc_interpreter::ppc_lswi() {
grab_inb = 0;
break;
default:
ppc_state.gpr[reg_d] = mem_grab_word(ppc_effective_address);
//ppc_state.gpr[reg_d] = mem_grab_word(ppc_effective_address);
ppc_state.gpr[reg_d] = mmu_read_vmem<uint16_t>(ppc_effective_address);
reg_d++;
ppc_effective_address += 4;
grab_inb -= 4;
@ -1931,7 +1952,8 @@ void dppc_interpreter::ppc_lswx() {
grab_inb = 0;
break;
default:
ppc_state.gpr[reg_d] = mem_grab_word(ppc_effective_address);
//ppc_state.gpr[reg_d] = mem_grab_word(ppc_effective_address);
ppc_state.gpr[reg_d] = mmu_read_vmem<uint16_t>(ppc_effective_address);
reg_d++;
ppc_effective_address += 4;
grab_inb -= 4;