From 2c816db45e8fe81fd02c385cdd6290a897a68c7f Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Tue, 1 Mar 2022 09:36:37 -0500 Subject: [PATCH] Refactor: (i) to expose effective address calculation; and (ii) to include address size in Instruction. --- InstructionSets/x86/DataPointerResolver.hpp | 352 +++++++++++--------- InstructionSets/x86/Instruction.hpp | 22 +- 2 files changed, 213 insertions(+), 161 deletions(-) diff --git a/InstructionSets/x86/DataPointerResolver.hpp b/InstructionSets/x86/DataPointerResolver.hpp index 4ad3ea766..5abbacead 100644 --- a/InstructionSets/x86/DataPointerResolver.hpp +++ b/InstructionSets/x86/DataPointerResolver.hpp @@ -39,6 +39,63 @@ enum class Register: uint8_t { None }; +template constexpr Register register_for_source(Source source) { + static_assert(sizeof(DataT) == 4 || sizeof(DataT) == 2 || sizeof(DataT) == 1); + + if constexpr (sizeof(DataT) == 4) { + switch(source) { + case Source::eAX: return Register::EAX; + case Source::eCX: return Register::ECX; + case Source::eDX: return Register::EDX; + case Source::eBX: return Register::EBX; + case Source::eSPorAH: return Register::ESP; + case Source::eBPorCH: return Register::EBP; + case Source::eSIorDH: return Register::ESI; + case Source::eDIorBH: return Register::EDI; + + default: break; + } + } + + if constexpr (sizeof(DataT) == 2) { + switch(source) { + case Source::eAX: return Register::AX; + case Source::eCX: return Register::CX; + case Source::eDX: return Register::DX; + case Source::eBX: return Register::BX; + case Source::eSPorAH: return Register::SP; + case Source::eBPorCH: return Register::BP; + case Source::eSIorDH: return Register::SI; + case Source::eDIorBH: return Register::DI; + case Source::ES: return Register::ES; + case Source::CS: return Register::CS; + case Source::SS: return Register::SS; + case Source::DS: return Register::DS; + case Source::FS: return Register::FS; + case Source::GS: return Register::GS; + + default: break; + } + } + + if constexpr (sizeof(DataT) == 1) { + switch(source) { + case Source::eAX: return Register::AL; + case Source::eCX: return Register::CL; + case Source::eDX: return Register::DL; + case Source::eBX: return Register::BL; + case Source::eSPorAH: return Register::AH; + case Source::eBPorCH: return Register::CH; + case Source::eSIorDH: return Register::DH; + case Source::eDIorBH: return Register::BH; + + default: break; + } + } + + return Register::None; +} + /// Reads from or writes to the source or target identified by a DataPointer, relying upon two user-supplied classes: /// /// * a register bank; and @@ -51,26 +108,28 @@ enum class Register: uint8_t { /// `template void write(Source segment, uint32_t address, DataT value)`. template class DataPointerResolver { public: + public: + /// Reads the data pointed to by @c pointer, referencing @c instruction, @c memory and @c registers as necessary. template static DataT read( RegistersT ®isters, MemoryT &memory, const Instruction &instruction, - DataPointer pointer, - typename Instruction::AddressT memory_mask = ~0) { - DataT result; - access(registers, memory, instruction, pointer, memory_mask, result); - return result; - } + DataPointer pointer); + /// Writes @c value to the data pointed to by @c pointer, referencing @c instruction, @c memory and @c registers as necessary. template static void write( RegistersT ®isters, MemoryT &memory, const Instruction &instruction, DataPointer pointer, - DataT value, - typename Instruction::AddressT memory_mask = ~0) { - access(registers, memory, instruction, pointer, memory_mask, value); - } + DataT value); + + /// Computes the effective address of @c pointer including any displacement applied by @c instruction. + /// @c pointer must be of type Source::Indirect. + static uint32_t effective_address( + RegistersT ®isters, + const Instruction &instruction, + DataPointer pointer); private: template static void access( @@ -78,150 +137,139 @@ template class DataPointerR MemoryT &memory, const Instruction &instruction, DataPointer pointer, - typename Instruction::AddressT memory_mask, - DataT &value) { - assert(memory_mask == 0xffff'ffff || memory_mask == 0xffff); - const Source source = pointer.source(); - -#define read_or_write(v, x, is_for_indirection) \ - case Source::x: \ - if constexpr (!is_for_indirection && is_write) { \ - registers.template write(Source::x)>(v); \ - } else { \ - v = registers.template read(Source::x)>(); \ - } \ - break; - -#define ALLREGS(v) f(v, eAX); f(v, eCX); f(v, eDX); f(v, eBX); \ - f(v, eSPorAH); f(v, eBPorCH); f(v, eSIorDH); f(v, eDIorBH); \ - f(v, ES); f(v, CS); f(v, SS); f(v, DS); f(v, FS); f(v, GS); - - switch(source) { - default: - if constexpr (!is_write) { - value = 0; - } - return; - -#define f(x, y) read_or_write(x, y, false) - ALLREGS(value); -#undef f - - case Source::DirectAddress: - if constexpr(is_write) { - memory.template write(instruction.data_segment(), instruction.displacement(), value); - } else { - value = memory.template read(instruction.data_segment(), instruction.displacement()); - } - break; - case Source::Immediate: - value = DataT(instruction.operand()); - break; - - case Source::Indirect: { - using AddressT = typename Instruction::AddressT; - AddressT base = 0, index = 0; - -#define f(x, y) read_or_write(x, y, true) - switch(pointer.base()) { - default: break; - ALLREGS(base); - } - - switch(pointer.index()) { - default: break; - ALLREGS(index); - } -#undef f - - // Always compute address as 32-bit. - // TODO: verify application of memory_mask here. - // The point of memory_mask is that 32-bit x86 offers the memory size modifier, - // permitting 16-bit addresses to be generated in 32-bit mode and vice versa. - // To figure out is at what point in the calculation the 16-bit constraint is - // applied when active. - uint32_t address = index; - if constexpr (model >= Model::i80386) { - address <<= pointer.scale(); - } else { - assert(!pointer.scale()); - } - - address = (address & memory_mask) + (base & memory_mask) + instruction.displacement(); - - if constexpr (is_write) { - value = memory.template read( - instruction.data_segment(), - address - ); - } else { - memory.template write( - instruction.data_segment(), - address, - value - ); - } - } - } -#undef ALLREGS - } - - template constexpr static Register register_for_source(Source source) { - if constexpr (sizeof(DataT) == 4) { - switch(source) { - case Source::eAX: return Register::EAX; - case Source::eCX: return Register::ECX; - case Source::eDX: return Register::EDX; - case Source::eBX: return Register::EBX; - case Source::eSPorAH: return Register::ESP; - case Source::eBPorCH: return Register::EBP; - case Source::eSIorDH: return Register::ESI; - case Source::eDIorBH: return Register::EDI; - - default: break; - } - } - - if constexpr (sizeof(DataT) == 2) { - switch(source) { - case Source::eAX: return Register::AX; - case Source::eCX: return Register::CX; - case Source::eDX: return Register::DX; - case Source::eBX: return Register::BX; - case Source::eSPorAH: return Register::SP; - case Source::eBPorCH: return Register::BP; - case Source::eSIorDH: return Register::SI; - case Source::eDIorBH: return Register::DI; - case Source::ES: return Register::ES; - case Source::CS: return Register::CS; - case Source::SS: return Register::SS; - case Source::DS: return Register::DS; - case Source::FS: return Register::FS; - case Source::GS: return Register::GS; - - default: break; - } - } - - if constexpr (sizeof(DataT) == 1) { - switch(source) { - case Source::eAX: return Register::AL; - case Source::eCX: return Register::CL; - case Source::eDX: return Register::DL; - case Source::eBX: return Register::BL; - case Source::eSPorAH: return Register::AH; - case Source::eBPorCH: return Register::CH; - case Source::eSIorDH: return Register::DH; - case Source::eDIorBH: return Register::BH; - - default: break; - } - } - - return Register::None; - } + DataT &value); }; + +// +// Implementation begins here. +// + +template +template DataT DataPointerResolver::read( + RegistersT ®isters, + MemoryT &memory, + const Instruction &instruction, + DataPointer pointer) { + DataT result; + access(registers, memory, instruction, pointer, result); + return result; + } + +template +template void DataPointerResolver::write( + RegistersT ®isters, + MemoryT &memory, + const Instruction &instruction, + DataPointer pointer, + DataT value) { + access(registers, memory, instruction, pointer, value); + } + +#define rw(v, r, is_write) \ + case Source::r: { \ + if constexpr (is_write) { \ + registers.template write(Source::r)>(v); \ + } else { \ + v = registers.template read(Source::r)>(); \ + } \ + } break; + +#define ALLREGS(v, i) rw(v, eAX, i); rw(v, eCX, i); \ + rw(v, eDX, i); rw(v, eBX, i); \ + rw(v, eSPorAH, i); rw(v, eBPorCH, i); \ + rw(v, eSIorDH, i); rw(v, eDIorBH, i); \ + rw(v, ES, i); rw(v, CS, i); \ + rw(v, SS, i); rw(v, DS, i); \ + rw(v, FS, i); rw(v, GS, i); + +template +uint32_t DataPointerResolver::effective_address( + RegistersT ®isters, + const Instruction &instruction, + DataPointer pointer) { + using AddressT = typename Instruction::AddressT; + AddressT base = 0, index = 0; + + switch(pointer.base()) { + default: break; + ALLREGS(base, false); + } + + switch(pointer.index()) { + default: break; + ALLREGS(index, false); + } + + // Always compute address as 32-bit. + // TODO: verify application of memory_mask around here. + // The point of memory_mask is that 32-bit x86 offers the memory size modifier, + // permitting 16-bit addresses to be generated in 32-bit mode and vice versa. + // To figure out is at what point in the calculation the 16-bit constraint is + // applied when active. + uint32_t address = index; + if constexpr (model >= Model::i80386) { + address <<= pointer.scale(); + } else { + assert(!pointer.scale()); + } + + constexpr uint32_t memory_masks[] = {0x0000'ffff, 0xffff'ffff}; + const uint32_t memory_mask = memory_masks[instruction.address_size_is_32()]; + address = (address & memory_mask) + (base & memory_mask) + instruction.displacement(); + return address; + } + +template +template void DataPointerResolver::access( + RegistersT ®isters, + MemoryT &memory, + const Instruction &instruction, + DataPointer pointer, + DataT &value) { + const Source source = pointer.source(); + + switch(source) { + default: + if constexpr (!is_write) { + value = 0; + } + return; + + ALLREGS(value, is_write); + + case Source::DirectAddress: + if constexpr(is_write) { + memory.template write(instruction.data_segment(), instruction.displacement(), value); + } else { + value = memory.template read(instruction.data_segment(), instruction.displacement()); + } + break; + case Source::Immediate: + value = DataT(instruction.operand()); + break; + + case Source::Indirect: { + const auto address = effective_address(registers, instruction, pointer); + + if constexpr (is_write) { + value = memory.template read( + instruction.data_segment(), + address + ); + } else { + memory.template write( + instruction.data_segment(), + address, + value + ); + } + } + } + } +#undef ALLREGS +#undef read_or_write + } } diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp index 6a6268614..80eda82cd 100644 --- a/InstructionSets/x86/Instruction.hpp +++ b/InstructionSets/x86/Instruction.hpp @@ -522,14 +522,18 @@ template class Instruction { 8 bits operation; 4 bits original instruction size; 2 bits data size; - 3 bits extension flags. + 1 bit memory size; + 2 bits extension flags. Extensions (16 or 32 bit, depending on templated size): - 1) reptition + segment override + lock + memory size toggle (= 7 bits); + 1) reptition + segment override + lock + original instruction size (= 10 bits); 2) displacement; 3) immediate operand. - Presence or absence of extensions is dictated by the extention flags. + Presence or absence of extensions is dictated by: + * instruction size = 0 => the repetition, etc extension (including the real extension size); and + * the extension flags for displacement and/or immediate. + Therefore an instruction's footprint is: * 4–8 bytes (16-bit processors); * 4–12 bytes (32-bit processors). @@ -537,9 +541,9 @@ template class Instruction { I'll then implement a collection suited to packing these things based on their packing_size(), and later iterating them. - To verify: do the 8086 and 80286 limit instructions to 15 bytes as later members - of the family do? If not then consider original instruction size = 0 to imply an - extension of one word prior to the other extensions. + To verify: the 8086 allows unlimited-length instructions (which I'll probably handle by + generating length-15 NOPs and not resetting parser state), the 80386 limits them to + 15 bytes, but what do the processors in between do? */ private: @@ -570,7 +574,7 @@ template class Instruction { DataPointer source() const { return DataPointer(Source(sources_ & 0x3f), sib_); } DataPointer destination() const { return DataPointer(Source((sources_ >> 6) & 0x3f), sib_); } bool lock() const { return sources_ & 0x8000; } - bool address_size() const { return address_size_; } + bool address_size_is_32() const { return address_size_; } Source data_segment() const { const auto segment_override = Source((sources_ >> 12) & 7); if(segment_override != Source::None) return segment_override; @@ -586,8 +590,8 @@ template class Instruction { uint16_t segment() const { return uint16_t(operand_); } uint16_t offset() const { return uint16_t(displacement_); } - DisplacementT displacement() const { return displacement_; } - ImmediateT operand() const { return operand_; } + DisplacementT displacement() const { return displacement_; } + ImmediateT operand() const { return operand_; } Instruction() noexcept {} Instruction(