Merge pull request #1369 from TomHarte/UnmaskedSubAddresses

Mildly reduce ARM/Archimedes hot-path costs.
This commit is contained in:
Thomas Harte 2024-04-29 22:58:22 -04:00 committed by GitHub
commit a882faa7f6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 79 additions and 63 deletions

View File

@ -58,7 +58,9 @@ struct Executor {
/// @returns @c true if @c condition implies an appropriate perform call should be made for this instruction,
/// @c false otherwise.
bool should_schedule(Condition condition) {
return registers_.test(condition);
// This short-circuit of registers_.test provides the necessary compiler clue that
// Condition::AL is not only [[likely]] but [[exceedingly likely]].
return condition == Condition::AL ? true : registers_.test(condition);
}
template <bool allow_register, bool set_carry, typename FieldsT>

View File

@ -421,6 +421,7 @@ class ConcreteMachine:
void did_set_status() {
// This might have been a change of mode, so...
trans_ = executor_.registers().mode() == InstructionSet::ARM::Mode::User;
fill_pipeline(executor_.pc());
update_interrupts();
}
@ -540,6 +541,7 @@ class ConcreteMachine:
static constexpr auto arm_model = InstructionSet::ARM::Model::ARMv2;
using Executor = InstructionSet::ARM::Executor<arm_model, MemoryController<ConcreteMachine, ConcreteMachine>, ConcreteMachine>;
Executor executor_;
bool trans_ = false;
void fill_pipeline(uint32_t pc) {
if(pipeline_.interrupt_next()) return;
@ -549,7 +551,7 @@ class ConcreteMachine:
uint32_t advance_pipeline(uint32_t pc) {
uint32_t instruction = 0; // Value should never be used; this avoids a spurious GCC warning.
const bool did_read = executor_.bus.read(pc, instruction, executor_.registers().mode() == InstructionSet::ARM::Mode::User);
const bool did_read = executor_.bus.read(pc, instruction, trans_);
return pipeline_.exchange(
did_read ? instruction : Pipeline::SWI,
did_read ? Pipeline::SWISubversion::None : Pipeline::SWISubversion::DataAbort);

View File

@ -35,9 +35,9 @@ template <typename InterruptObserverT, typename ClockRateObserverT>
struct MemoryController {
MemoryController(InterruptObserverT &observer, ClockRateObserverT &clock_rate_observer) :
ioc_(observer, clock_rate_observer, ram_.data()) {
read_zones_[0] = Zone::HighROM; // Temporarily put high ROM at address 0.
// TODO: could I just copy it in? Or, at least,
// could I detect at ROM loading time whether I can?
read_zones_[0] = ReadZone::HighROM; // Temporarily put high ROM at address 0.
// TODO: could I just copy it in? Or, at least,
// could I detect at ROM loading time whether I can?
}
int interrupt_mask() const {
@ -72,7 +72,20 @@ struct MemoryController {
template <typename IntT>
bool write(uint32_t address, IntT source, InstructionSet::ARM::Mode, bool trans) {
switch(write_zones_[(address >> 21) & 31]) {
case Zone::DMAAndMEMC: {
case WriteZone::LogicallyMappedRAM: {
const auto item = logical_ram<IntT, false>(address, trans);
if(item < reinterpret_cast<IntT *>(ram_.data())) {
return false;
}
*item = source;
} break;
case WriteZone::PhysicallyMappedRAM:
if(trans) return false;
physical_ram<IntT>(address) = source;
break;
case WriteZone::DMAAndMEMC: {
if(trans) return false;
const auto buffer_address = [](uint32_t source) -> uint32_t {
@ -133,40 +146,23 @@ struct MemoryController {
}
} break;
case Zone::LogicallyMappedRAM: {
const auto item = logical_ram<IntT, false>(address, trans);
if(!item) {
return false;
}
*item = source;
} break;
case Zone::IOControllers:
case WriteZone::IOControllers:
if(trans) return false;
ioc_.template write<IntT>(address, source);
break;
case Zone::VideoController:
case WriteZone::VideoController:
if(trans) return false;
// TODO: handle byte writes correctly.
ioc_.video().write(source);
break;
case Zone::PhysicallyMappedRAM:
if(trans) return false;
physical_ram<IntT>(address) = source;
break;
case Zone::AddressTranslator:
case WriteZone::AddressTranslator:
if(trans) return false;
// printf("Translator write at %08x; replaces %08x\n", address, pages_[address & 0x7f]);
pages_[address & 0x7f] = address;
map_dirty_ = true;
break;
default:
printf("TODO: write of %08x to %08x [%lu]\n", source, address, sizeof(IntT));
break;
}
return true;
@ -174,39 +170,35 @@ struct MemoryController {
template <typename IntT>
bool read(uint32_t address, IntT &source, bool trans) {
switch (read_zones_[(address >> 21) & 31]) {
case Zone::PhysicallyMappedRAM:
if(trans) return false;
source = physical_ram<IntT>(address);
break;
case Zone::LogicallyMappedRAM: {
switch(read_zones_[(address >> 21) & 31]) {
case ReadZone::LogicallyMappedRAM: {
const auto item = logical_ram<IntT, true>(address, trans);
if(!item) {
if(item < reinterpret_cast<IntT *>(ram_.data())) {
return false;
}
source = *item;
} break;
case Zone::LowROM:
case ReadZone::HighROM:
// Real test is: require A24=A25=0, then A25=1.
read_zones_[0] = ReadZone::LogicallyMappedRAM;
source = high_rom<IntT>(address);
break;
case ReadZone::PhysicallyMappedRAM:
if(trans) return false;
source = physical_ram<IntT>(address);
break;
case ReadZone::LowROM:
// logger.error().append("TODO: Low ROM read from %08x", address);
source = IntT(~0);
break;
case Zone::HighROM:
// Real test is: require A24=A25=0, then A25=1.
read_zones_[0] = Zone::LogicallyMappedRAM;
source = high_rom<IntT>(address);
break;
case Zone::IOControllers:
case ReadZone::IOControllers:
if(trans) return false;
ioc_.template read<IntT>(address, source);
break;
default:
logger.error().append("TODO: read from %08x", address);
break;
}
return true;
@ -244,40 +236,61 @@ struct MemoryController {
private:
Log::Logger<Log::Source::ARMIOC> logger;
enum class Zone {
enum class ReadZone {
LogicallyMappedRAM,
PhysicallyMappedRAM,
IOControllers,
LowROM,
HighROM,
};
enum class WriteZone {
LogicallyMappedRAM,
PhysicallyMappedRAM,
IOControllers,
VideoController,
DMAAndMEMC,
AddressTranslator,
};
static std::array<Zone, 0x20> zones(bool is_read) {
std::array<Zone, 0x20> zones{};
template <bool is_read>
using Zone = std::conditional_t<is_read, ReadZone, WriteZone>;
template <bool is_read>
static std::array<Zone<is_read>, 0x20> zones() {
std::array<Zone<is_read>, 0x20> zones{};
for(size_t c = 0; c < zones.size(); c++) {
const auto address = c << 21;
if(address < 0x200'0000) {
zones[c] = Zone::LogicallyMappedRAM;
zones[c] = Zone<is_read>::LogicallyMappedRAM;
} else if(address < 0x300'0000) {
zones[c] = Zone::PhysicallyMappedRAM;
zones[c] = Zone<is_read>::PhysicallyMappedRAM;
} else if(address < 0x340'0000) {
zones[c] = Zone::IOControllers;
zones[c] = Zone<is_read>::IOControllers;
} else if(address < 0x360'0000) {
zones[c] = is_read ? Zone::LowROM : Zone::VideoController;
if constexpr (is_read) {
zones[c] = Zone<is_read>::LowROM;
} else {
zones[c] = Zone<is_read>::VideoController;
}
} else if(address < 0x380'0000) {
zones[c] = is_read ? Zone::LowROM : Zone::DMAAndMEMC;
if constexpr (is_read) {
zones[c] = Zone<is_read>::LowROM;
} else {
zones[c] = Zone<is_read>::DMAAndMEMC;
}
} else {
zones[c] = is_read ? Zone::HighROM : Zone::AddressTranslator;
if constexpr (is_read) {
zones[c] = Zone<is_read>::HighROM;
} else {
zones[c] = Zone<is_read>::AddressTranslator;
}
}
}
return zones;
}
bool has_moved_rom_ = false;
std::array<uint8_t, 4*1024*1024> ram_{};
std::array<uint8_t, 2*1024*1024> rom_;
std::array<uint8_t, 4*1024*1024> ram_{};
InputOutputController<InterruptObserverT, ClockRateObserverT> ioc_;
template <typename IntT>
@ -293,8 +306,8 @@ struct MemoryController {
return *reinterpret_cast<IntT *>(&rom_[address & (rom_.size() - 1)]);
}
std::array<Zone, 0x20> read_zones_ = zones(true);
const std::array<Zone, 0x20> write_zones_ = zones(false);
std::array<ReadZone, 0x20> read_zones_ = zones<true>();
const std::array<WriteZone, 0x20> write_zones_ = zones<false>();
// Control register values.
bool os_mode_ = false;
@ -353,6 +366,7 @@ struct MemoryController {
bool map_dirty_ = true;
/// @returns A pointer to somewhere in @c ram_ if RAM is mapped to this area, or a pointer to somewhere lower than @c ram_.data() otherwise.
template <typename IntT, bool is_read>
IntT *logical_ram(uint32_t address, bool trans) {
// Possibly TODO: this recompute-if-dirty flag is supposed to ameliorate for an expensive
@ -366,10 +380,6 @@ struct MemoryController {
const size_t page = address >> page_address_shift_;
const auto &map = mapping<is_read>(trans, os_mode_);
if(!map[page]) {
return nullptr;
}
address &= page_adddress_mask_;
return reinterpret_cast<IntT *>(&map[page][address]);
}
@ -389,7 +399,9 @@ struct MemoryController {
void update_mapping() {
// Clear all logical mappings.
for(auto &map: mapping_) {
std::fill(map.begin(), map.end(), nullptr);
// Seed all pointers to an address sufficiently far lower than the beginning of RAM as to mark
// the entire page as unmapped no matter what offset is added.
std::fill(map.begin(), map.end(), ram_.data() - 32768);
}
// For each physical page, project it into logical space