1
0
mirror of https://github.com/TomHarte/CLK.git synced 2024-06-09 17:29:36 +00:00

Merge pull request #1369 from TomHarte/UnmaskedSubAddresses

Mildly reduce ARM/Archimedes hot-path costs.
This commit is contained in:
Thomas Harte 2024-04-29 22:58:22 -04:00 committed by GitHub
commit a882faa7f6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 79 additions and 63 deletions

View File

@ -58,7 +58,9 @@ struct Executor {
/// @returns @c true if @c condition implies an appropriate perform call should be made for this instruction, /// @returns @c true if @c condition implies an appropriate perform call should be made for this instruction,
/// @c false otherwise. /// @c false otherwise.
bool should_schedule(Condition condition) { bool should_schedule(Condition condition) {
return registers_.test(condition); // This short-circuit of registers_.test provides the necessary compiler clue that
// Condition::AL is not only [[likely]] but [[exceedingly likely]].
return condition == Condition::AL ? true : registers_.test(condition);
} }
template <bool allow_register, bool set_carry, typename FieldsT> template <bool allow_register, bool set_carry, typename FieldsT>

View File

@ -421,6 +421,7 @@ class ConcreteMachine:
void did_set_status() { void did_set_status() {
// This might have been a change of mode, so... // This might have been a change of mode, so...
trans_ = executor_.registers().mode() == InstructionSet::ARM::Mode::User;
fill_pipeline(executor_.pc()); fill_pipeline(executor_.pc());
update_interrupts(); update_interrupts();
} }
@ -540,6 +541,7 @@ class ConcreteMachine:
static constexpr auto arm_model = InstructionSet::ARM::Model::ARMv2; static constexpr auto arm_model = InstructionSet::ARM::Model::ARMv2;
using Executor = InstructionSet::ARM::Executor<arm_model, MemoryController<ConcreteMachine, ConcreteMachine>, ConcreteMachine>; using Executor = InstructionSet::ARM::Executor<arm_model, MemoryController<ConcreteMachine, ConcreteMachine>, ConcreteMachine>;
Executor executor_; Executor executor_;
bool trans_ = false;
void fill_pipeline(uint32_t pc) { void fill_pipeline(uint32_t pc) {
if(pipeline_.interrupt_next()) return; if(pipeline_.interrupt_next()) return;
@ -549,7 +551,7 @@ class ConcreteMachine:
uint32_t advance_pipeline(uint32_t pc) { uint32_t advance_pipeline(uint32_t pc) {
uint32_t instruction = 0; // Value should never be used; this avoids a spurious GCC warning. uint32_t instruction = 0; // Value should never be used; this avoids a spurious GCC warning.
const bool did_read = executor_.bus.read(pc, instruction, executor_.registers().mode() == InstructionSet::ARM::Mode::User); const bool did_read = executor_.bus.read(pc, instruction, trans_);
return pipeline_.exchange( return pipeline_.exchange(
did_read ? instruction : Pipeline::SWI, did_read ? instruction : Pipeline::SWI,
did_read ? Pipeline::SWISubversion::None : Pipeline::SWISubversion::DataAbort); did_read ? Pipeline::SWISubversion::None : Pipeline::SWISubversion::DataAbort);

View File

@ -35,9 +35,9 @@ template <typename InterruptObserverT, typename ClockRateObserverT>
struct MemoryController { struct MemoryController {
MemoryController(InterruptObserverT &observer, ClockRateObserverT &clock_rate_observer) : MemoryController(InterruptObserverT &observer, ClockRateObserverT &clock_rate_observer) :
ioc_(observer, clock_rate_observer, ram_.data()) { ioc_(observer, clock_rate_observer, ram_.data()) {
read_zones_[0] = Zone::HighROM; // Temporarily put high ROM at address 0. read_zones_[0] = ReadZone::HighROM; // Temporarily put high ROM at address 0.
// TODO: could I just copy it in? Or, at least, // TODO: could I just copy it in? Or, at least,
// could I detect at ROM loading time whether I can? // could I detect at ROM loading time whether I can?
} }
int interrupt_mask() const { int interrupt_mask() const {
@ -72,7 +72,20 @@ struct MemoryController {
template <typename IntT> template <typename IntT>
bool write(uint32_t address, IntT source, InstructionSet::ARM::Mode, bool trans) { bool write(uint32_t address, IntT source, InstructionSet::ARM::Mode, bool trans) {
switch(write_zones_[(address >> 21) & 31]) { switch(write_zones_[(address >> 21) & 31]) {
case Zone::DMAAndMEMC: { case WriteZone::LogicallyMappedRAM: {
const auto item = logical_ram<IntT, false>(address, trans);
if(item < reinterpret_cast<IntT *>(ram_.data())) {
return false;
}
*item = source;
} break;
case WriteZone::PhysicallyMappedRAM:
if(trans) return false;
physical_ram<IntT>(address) = source;
break;
case WriteZone::DMAAndMEMC: {
if(trans) return false; if(trans) return false;
const auto buffer_address = [](uint32_t source) -> uint32_t { const auto buffer_address = [](uint32_t source) -> uint32_t {
@ -133,40 +146,23 @@ struct MemoryController {
} }
} break; } break;
case Zone::LogicallyMappedRAM: { case WriteZone::IOControllers:
const auto item = logical_ram<IntT, false>(address, trans);
if(!item) {
return false;
}
*item = source;
} break;
case Zone::IOControllers:
if(trans) return false; if(trans) return false;
ioc_.template write<IntT>(address, source); ioc_.template write<IntT>(address, source);
break; break;
case Zone::VideoController: case WriteZone::VideoController:
if(trans) return false; if(trans) return false;
// TODO: handle byte writes correctly. // TODO: handle byte writes correctly.
ioc_.video().write(source); ioc_.video().write(source);
break; break;
case Zone::PhysicallyMappedRAM: case WriteZone::AddressTranslator:
if(trans) return false;
physical_ram<IntT>(address) = source;
break;
case Zone::AddressTranslator:
if(trans) return false; if(trans) return false;
// printf("Translator write at %08x; replaces %08x\n", address, pages_[address & 0x7f]); // printf("Translator write at %08x; replaces %08x\n", address, pages_[address & 0x7f]);
pages_[address & 0x7f] = address; pages_[address & 0x7f] = address;
map_dirty_ = true; map_dirty_ = true;
break; break;
default:
printf("TODO: write of %08x to %08x [%lu]\n", source, address, sizeof(IntT));
break;
} }
return true; return true;
@ -174,39 +170,35 @@ struct MemoryController {
template <typename IntT> template <typename IntT>
bool read(uint32_t address, IntT &source, bool trans) { bool read(uint32_t address, IntT &source, bool trans) {
switch (read_zones_[(address >> 21) & 31]) { switch(read_zones_[(address >> 21) & 31]) {
case Zone::PhysicallyMappedRAM: case ReadZone::LogicallyMappedRAM: {
if(trans) return false;
source = physical_ram<IntT>(address);
break;
case Zone::LogicallyMappedRAM: {
const auto item = logical_ram<IntT, true>(address, trans); const auto item = logical_ram<IntT, true>(address, trans);
if(!item) { if(item < reinterpret_cast<IntT *>(ram_.data())) {
return false; return false;
} }
source = *item; source = *item;
} break; } break;
case Zone::LowROM: case ReadZone::HighROM:
// Real test is: require A24=A25=0, then A25=1.
read_zones_[0] = ReadZone::LogicallyMappedRAM;
source = high_rom<IntT>(address);
break;
case ReadZone::PhysicallyMappedRAM:
if(trans) return false;
source = physical_ram<IntT>(address);
break;
case ReadZone::LowROM:
// logger.error().append("TODO: Low ROM read from %08x", address); // logger.error().append("TODO: Low ROM read from %08x", address);
source = IntT(~0); source = IntT(~0);
break; break;
case Zone::HighROM: case ReadZone::IOControllers:
// Real test is: require A24=A25=0, then A25=1.
read_zones_[0] = Zone::LogicallyMappedRAM;
source = high_rom<IntT>(address);
break;
case Zone::IOControllers:
if(trans) return false; if(trans) return false;
ioc_.template read<IntT>(address, source); ioc_.template read<IntT>(address, source);
break; break;
default:
logger.error().append("TODO: read from %08x", address);
break;
} }
return true; return true;
@ -244,40 +236,61 @@ struct MemoryController {
private: private:
Log::Logger<Log::Source::ARMIOC> logger; Log::Logger<Log::Source::ARMIOC> logger;
enum class Zone { enum class ReadZone {
LogicallyMappedRAM, LogicallyMappedRAM,
PhysicallyMappedRAM, PhysicallyMappedRAM,
IOControllers, IOControllers,
LowROM, LowROM,
HighROM, HighROM,
};
enum class WriteZone {
LogicallyMappedRAM,
PhysicallyMappedRAM,
IOControllers,
VideoController, VideoController,
DMAAndMEMC, DMAAndMEMC,
AddressTranslator, AddressTranslator,
}; };
static std::array<Zone, 0x20> zones(bool is_read) { template <bool is_read>
std::array<Zone, 0x20> zones{}; using Zone = std::conditional_t<is_read, ReadZone, WriteZone>;
template <bool is_read>
static std::array<Zone<is_read>, 0x20> zones() {
std::array<Zone<is_read>, 0x20> zones{};
for(size_t c = 0; c < zones.size(); c++) { for(size_t c = 0; c < zones.size(); c++) {
const auto address = c << 21; const auto address = c << 21;
if(address < 0x200'0000) { if(address < 0x200'0000) {
zones[c] = Zone::LogicallyMappedRAM; zones[c] = Zone<is_read>::LogicallyMappedRAM;
} else if(address < 0x300'0000) { } else if(address < 0x300'0000) {
zones[c] = Zone::PhysicallyMappedRAM; zones[c] = Zone<is_read>::PhysicallyMappedRAM;
} else if(address < 0x340'0000) { } else if(address < 0x340'0000) {
zones[c] = Zone::IOControllers; zones[c] = Zone<is_read>::IOControllers;
} else if(address < 0x360'0000) { } else if(address < 0x360'0000) {
zones[c] = is_read ? Zone::LowROM : Zone::VideoController; if constexpr (is_read) {
zones[c] = Zone<is_read>::LowROM;
} else {
zones[c] = Zone<is_read>::VideoController;
}
} else if(address < 0x380'0000) { } else if(address < 0x380'0000) {
zones[c] = is_read ? Zone::LowROM : Zone::DMAAndMEMC; if constexpr (is_read) {
zones[c] = Zone<is_read>::LowROM;
} else {
zones[c] = Zone<is_read>::DMAAndMEMC;
}
} else { } else {
zones[c] = is_read ? Zone::HighROM : Zone::AddressTranslator; if constexpr (is_read) {
zones[c] = Zone<is_read>::HighROM;
} else {
zones[c] = Zone<is_read>::AddressTranslator;
}
} }
} }
return zones; return zones;
} }
bool has_moved_rom_ = false; bool has_moved_rom_ = false;
std::array<uint8_t, 4*1024*1024> ram_{};
std::array<uint8_t, 2*1024*1024> rom_; std::array<uint8_t, 2*1024*1024> rom_;
std::array<uint8_t, 4*1024*1024> ram_{};
InputOutputController<InterruptObserverT, ClockRateObserverT> ioc_; InputOutputController<InterruptObserverT, ClockRateObserverT> ioc_;
template <typename IntT> template <typename IntT>
@ -293,8 +306,8 @@ struct MemoryController {
return *reinterpret_cast<IntT *>(&rom_[address & (rom_.size() - 1)]); return *reinterpret_cast<IntT *>(&rom_[address & (rom_.size() - 1)]);
} }
std::array<Zone, 0x20> read_zones_ = zones(true); std::array<ReadZone, 0x20> read_zones_ = zones<true>();
const std::array<Zone, 0x20> write_zones_ = zones(false); const std::array<WriteZone, 0x20> write_zones_ = zones<false>();
// Control register values. // Control register values.
bool os_mode_ = false; bool os_mode_ = false;
@ -353,6 +366,7 @@ struct MemoryController {
bool map_dirty_ = true; bool map_dirty_ = true;
/// @returns A pointer to somewhere in @c ram_ if RAM is mapped to this area, or a pointer to somewhere lower than @c ram_.data() otherwise.
template <typename IntT, bool is_read> template <typename IntT, bool is_read>
IntT *logical_ram(uint32_t address, bool trans) { IntT *logical_ram(uint32_t address, bool trans) {
// Possibly TODO: this recompute-if-dirty flag is supposed to ameliorate for an expensive // Possibly TODO: this recompute-if-dirty flag is supposed to ameliorate for an expensive
@ -366,10 +380,6 @@ struct MemoryController {
const size_t page = address >> page_address_shift_; const size_t page = address >> page_address_shift_;
const auto &map = mapping<is_read>(trans, os_mode_); const auto &map = mapping<is_read>(trans, os_mode_);
if(!map[page]) {
return nullptr;
}
address &= page_adddress_mask_; address &= page_adddress_mask_;
return reinterpret_cast<IntT *>(&map[page][address]); return reinterpret_cast<IntT *>(&map[page][address]);
} }
@ -389,7 +399,9 @@ struct MemoryController {
void update_mapping() { void update_mapping() {
// Clear all logical mappings. // Clear all logical mappings.
for(auto &map: mapping_) { for(auto &map: mapping_) {
std::fill(map.begin(), map.end(), nullptr); // Seed all pointers to an address sufficiently far lower than the beginning of RAM as to mark
// the entire page as unmapped no matter what offset is added.
std::fill(map.begin(), map.end(), ram_.data() - 32768);
} }
// For each physical page, project it into logical space // For each physical page, project it into logical space