From 9b49603c72ec22f125185472458ff731bcf969f3 Mon Sep 17 00:00:00 2001 From: Mihai Parparita Date: Sun, 28 Jul 2024 13:24:39 -0700 Subject: [PATCH 1/2] memctrl: keep address_map sorted by address We do a linear scan in find_range (which is called on all TLB misses) to find the entries. The largest and most frequently hit entry is the system memory (which starts at 0). By ensuring that it's the first entry in the list, we end up only doing one iteration through the loop. --- devices/memctrl/memctrlbase.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/devices/memctrl/memctrlbase.cpp b/devices/memctrl/memctrlbase.cpp index 677dc8d..1951f62 100644 --- a/devices/memctrl/memctrlbase.cpp +++ b/devices/memctrl/memctrlbase.cpp @@ -168,7 +168,18 @@ bool MemCtrlBase::add_mem_region(uint32_t start_addr, uint32_t size, entry->devobj = nullptr; entry->mem_ptr = reg_content; - this->address_map.push_back(entry); + // Keep address_map sorted, that way the RAM region (which starts at 0 and + // is most often requested) will be found by find_range on the first + // iteration. + this->address_map.insert( + std::upper_bound( + this->address_map.begin(), + this->address_map.end(), + entry, + [](const auto& lhs, const auto& rhs) { + return lhs->start < rhs->start; + }), + entry); LOG_F(INFO, "Added mem region 0x%X..0x%X (%s%s%s%s) -> 0x%X", start_addr, end, entry->type & RT_ROM ? "ROM," : "", From 31d7b056afd8f0814ea48e95a36b0abbfd6f2b0e Mon Sep 17 00:00:00 2001 From: Mihai Parparita Date: Sun, 28 Jul 2024 13:22:43 -0700 Subject: [PATCH 2/2] ppcopcodes: avoid TLB flushes for noop SR changes They happen surprisingly often, and flushing the TLB is expensive because we need to walk over all entries. Takes booting 10.2 on a Beige G3 from binary start to "Welcome to Macintosh" from 58s to 38s on my machine. --- cpu/ppc/ppcopcodes.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/cpu/ppc/ppcopcodes.cpp b/cpu/ppc/ppcopcodes.cpp index 27c08a5..d9c1dce 100644 --- a/cpu/ppc/ppcopcodes.cpp +++ b/cpu/ppc/ppcopcodes.cpp @@ -728,8 +728,10 @@ void dppc_interpreter::ppc_mtsr() { } int reg_s = (ppc_cur_instruction >> 21) & 0x1F; uint32_t grab_sr = (ppc_cur_instruction >> 16) & 0x0F; - ppc_state.sr[grab_sr] = ppc_state.gpr[reg_s]; - mmu_pat_ctx_changed(); + if (ppc_state.sr[grab_sr] != ppc_state.gpr[reg_s]) { + ppc_state.sr[grab_sr] = ppc_state.gpr[reg_s]; + mmu_pat_ctx_changed(); + } } void dppc_interpreter::ppc_mtsrin() { @@ -741,8 +743,10 @@ void dppc_interpreter::ppc_mtsrin() { } ppc_grab_regssb(ppc_cur_instruction); uint32_t grab_sr = ppc_result_b >> 28; - ppc_state.sr[grab_sr] = ppc_result_d; - mmu_pat_ctx_changed(); + if (ppc_state.sr[grab_sr] != ppc_result_d) { + ppc_state.sr[grab_sr] = ppc_result_d; + mmu_pat_ctx_changed(); + } } void dppc_interpreter::ppc_mfsr() { @@ -969,8 +973,10 @@ void dppc_interpreter::ppc_mtspr() { ppc_state.spr[ref_spr] = val & 0xe000ff7f; break; case SPR::SDR1: - ppc_state.spr[ref_spr] = val; - mmu_pat_ctx_changed(); // adapt to SDR1 changes + if (ppc_state.spr[ref_spr] != val) { + ppc_state.spr[ref_spr] = val; + mmu_pat_ctx_changed(); // adapt to SDR1 changes + } break; case SPR::RTCL_S: calc_rtcl_value();