From 0d7a7dc7c96f541646792ae03c1c8f73b6e4e4b0 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Sun, 27 Feb 2022 11:25:02 -0500 Subject: [PATCH] Introduce `DataPointerResolver`, to codify the meaning of `DataPointer` and validate that enough information is present. --- InstructionSets/x86/DataPointerResolver.hpp | 200 ++++++++++++++++++ InstructionSets/x86/Decoder.hpp | 8 +- InstructionSets/x86/Instruction.hpp | 23 +- InstructionSets/x86/Model.hpp | 27 +++ .../Clock Signal.xcodeproj/project.pbxproj | 4 + .../Mac/Clock SignalTests/x86DecoderTests.mm | 1 + 6 files changed, 248 insertions(+), 15 deletions(-) create mode 100644 InstructionSets/x86/DataPointerResolver.hpp create mode 100644 InstructionSets/x86/Model.hpp diff --git a/InstructionSets/x86/DataPointerResolver.hpp b/InstructionSets/x86/DataPointerResolver.hpp new file mode 100644 index 000000000..8b5080173 --- /dev/null +++ b/InstructionSets/x86/DataPointerResolver.hpp @@ -0,0 +1,200 @@ +// +// DataPointerResolver.hpp +// Clock Signal +// +// Created by Thomas Harte on 24/02/2022. +// Copyright © 2022 Thomas Harte. All rights reserved. +// + +#ifndef DataPointerResolver_hpp +#define DataPointerResolver_hpp + +#include "Instruction.hpp" +#include "Model.hpp" + +namespace InstructionSet { +namespace x86 { + +/// Unlike source, describes only registers, and breaks +/// them down by conventional name — so AL, AH, AX and EAX are all +/// listed separately and uniquely, rather than being eAX+size or +/// eSPorAH with a size of 1. +enum class Register: uint8_t { + AL, AH, AX, EAX, + CL, CH, CX, ECX, + DL, DH, DX, EDX, + BL, BH, BX, EBX, + SP, ESP, + BP, EBP, + SI, ESI, + DI, EDI, + ES, + CS, + SS, + DS, + FS, + GS, + None +}; + +/// Reads from or writes to the source or target identified by a DataPointer, relying upon two user-supplied classes: +/// +/// * a register bank; and +/// * a memory pool. +/// +/// The register bank should implement `template DataT read()` and `template void write(DataT)`. +/// Those functions will be called only with registers and data types that are appropriate to the @c model. +/// +/// The memory pool should implement `template DataT read(Source segment, uint32_t address)` and +/// `template void write(Source segment, uint32_t address, DataT value)`. +template class DataPointerResolver { + public: + template DataT read( + RegistersT ®isters, + MemoryT &memory, + const Instruction &instruction, + DataPointer pointer, + decltype(RegistersT::eSP) memory_mask = ~0) { + DataT result; + access(registers, memory, instruction, pointer, memory_mask, result); + return result; + } + + template void write( + RegistersT ®isters, + MemoryT &memory, + const Instruction &instruction, + DataPointer pointer, + DataT value, + decltype(RegistersT::eSP) memory_mask = ~0) { + access(registers, memory, instruction, pointer, memory_mask, value); + } + + private: + template DataT access( + RegistersT ®isters, + MemoryT &memory, + const Instruction &instruction, + DataPointer pointer, + decltype(RegistersT::eSP) memory_mask, + DataT &value) { + const Source source = pointer.source(); + +#define read_or_write(v, x, allow_write) \ + case Source::x: \ + if constexpr(allow_write && is_write) {\ + registers.template write(Source::x)>(v); \ + } else { \ + value = registers.template read(Source::x)>(); \ + } + +#define ALLREGS(v) f(v, eAX); f(v, eCX); f(v, eDX); f(v, eBX); \ + f(v, eSPorAH); f(v, eBPorCH); f(v, eSIorDH); f(v, eDIorBH); \ + f(v, ES); f(v, CS); f(v, SS); f(v, DS); f(v, FS); f(v, GS); + + switch(source) { + default: return DataT(0); +#define f(x, y) read_or_write(x, y, true) + ALLREGS(value); +#undef f + + case Source::DirectAddress: + if constexpr(is_write) { + memory.template write(instruction.data_segment(), instruction.displacement(), value); + } else { + value = memory.template read(instruction.data_segment(), instruction.displacement()); + } + break; + case Source::Immediate: + value = DataT(instruction.operand()); + break; + + case Source::Indirect: { + uint32_t base = 0, index = 0; + +#define f(x, y) read_or_write(x, y, false) + switch(pointer.base()) { + default: break; + ALLREGS(base); + } + + switch(pointer.index()) { + default: break; + ALLREGS(index); + } +#undef f + + if constexpr (model >= Model::i80386) { + index <<= pointer.scale(); + } + + // TODO: verify application of memory_mask here. + value = memory.template get( + instruction.data_segment(), + (base & memory_mask) + (index & memory_mask) + ); + } + } +#undef ALLREGS + } + + template constexpr static Register register_for_source(Source source) { + if constexpr (sizeof(DataT) == 4) { + switch(source) { + case Source::eAX: return Register::EAX; + case Source::eCX: return Register::ECX; + case Source::eDX: return Register::EDX; + case Source::eBX: return Register::EBX; + case Source::eSPorAH: return Register::ESP; + case Source::eBPorCH: return Register::EBP; + case Source::eSIorDH: return Register::ESI; + case Source::eDIorBH: return Register::EDI; + + default: break; + } + } + + if constexpr (sizeof(DataT) == 2) { + switch(source) { + case Source::eAX: return Register::AX; + case Source::eCX: return Register::CX; + case Source::eDX: return Register::DX; + case Source::eBX: return Register::BX; + case Source::eSPorAH: return Register::SP; + case Source::eBPorCH: return Register::BP; + case Source::eSIorDH: return Register::SI; + case Source::eDIorBH: return Register::DI; + case Source::ES: return Register::ES; + case Source::CS: return Register::CS; + case Source::SS: return Register::SS; + case Source::DS: return Register::DS; + case Source::FS: return Register::FS; + case Source::GS: return Register::GS; + + default: break; + } + } + + if constexpr (sizeof(DataT) == 1) { + switch(source) { + case Source::eAX: return Register::AL; + case Source::eCX: return Register::CL; + case Source::eDX: return Register::DL; + case Source::eBX: return Register::BL; + case Source::eSPorAH: return Register::AH; + case Source::eBPorCH: return Register::CH; + case Source::eSIorDH: return Register::DH; + case Source::eDIorBH: return Register::BH; + + default: break; + } + } + + return Register::None; + } +}; + +} +} + +#endif /* DataPointerResolver_hpp */ diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp index 6c52e698b..30d6bd92e 100644 --- a/InstructionSets/x86/Decoder.hpp +++ b/InstructionSets/x86/Decoder.hpp @@ -10,6 +10,7 @@ #define InstructionSets_x86_Decoder_hpp #include "Instruction.hpp" +#include "Model.hpp" #include #include @@ -17,13 +18,6 @@ namespace InstructionSet { namespace x86 { -enum class Model { - i8086, - i80186, - i80286, - i80386, -}; - /*! Implements Intel x86 instruction decoding. diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp index 0f27d2356..4baf9e763 100644 --- a/InstructionSets/x86/Instruction.hpp +++ b/InstructionSets/x86/Instruction.hpp @@ -357,6 +357,12 @@ enum class Source: uint8_t { // Selectors. ES, CS, SS, DS, FS, GS, + /// @c None can be treated as a source that produces 0 when encountered; + /// it is semantically valid to receive it with that meaning in some contexts — + /// e.g. to indicate no index in indirect addressing. + /// It's listed here in order to allow an [optional] segment override to fit into three bits. + None, + /// The address included within this instruction should be used as the source. DirectAddress, // TODO: is this better eliminated in favour of an indirect @@ -365,11 +371,6 @@ enum class Source: uint8_t { /// The immediate value included within this instruction should be used as the source. Immediate, - /// @c None can be treated as a source that produces 0 when encountered; - /// it is semantically valid to receive it with that meaning in some contexts — - /// e.g. to indicate no index in indirect addressing. - None, - /// The ScaleIndexBase associated with this source should be used. Indirect = 0b11000, // Elsewhere, as an implementation detail, the low three bits of an indirect source @@ -554,16 +555,22 @@ template class Instruction { /// this allows a denser packing of instructions into containers. size_t packing_size() const { return sizeof(*this); /* TODO */ } - DataPointer source() const { return DataPointer(Source(sources_ & 0x3f), sib_); } + DataPointer source() const { return DataPointer(Source(sources_ & 0x3f), sib_); } DataPointer destination() const { return DataPointer(Source((sources_ >> 6) & 0x3f), sib_); } bool lock() const { return sources_ & 0x8000; } bool address_size() const { return address_size_; } - Source segment_override() const { return Source((sources_ >> 12) & 7); } + Source data_segment() const { + const auto segment_override = Source((sources_ >> 12) & 7); + if(segment_override != Source::None) return segment_override; + + // TODO: default source should be SS for anything touching the stack. + return Source::DS; + } Repetition repetition() const { return Repetition(repetition_size_ & 3); } Size operation_size() const { return Size(repetition_size_ >> 2); } - // TODO: confirm whether far call for some reason makes thse 32-bit in protected mode. + // TODO: confirm whether far call for some reason makes these 32-bit in protected mode. uint16_t segment() const { return uint16_t(operand_); } uint16_t offset() const { return uint16_t(displacement_); } diff --git a/InstructionSets/x86/Model.hpp b/InstructionSets/x86/Model.hpp new file mode 100644 index 000000000..8f8189556 --- /dev/null +++ b/InstructionSets/x86/Model.hpp @@ -0,0 +1,27 @@ +// +// Model.hpp +// Clock Signal +// +// Created by Thomas Harte on 27/02/2022. +// Copyright © 2022 Thomas Harte. All rights reserved. +// + +#ifndef Model_h +#define Model_h + +namespace InstructionSet { +namespace x86 { + +enum class Model { + i8086, + i80186, + i80286, + i80386, +}; + +#define is_32bit(model) (model >= Model::i80386) + +} +} + +#endif /* Model_h */ diff --git a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj index 99ca08e07..5d4d81c10 100644 --- a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj +++ b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj @@ -2074,6 +2074,8 @@ 4BE3231520532AA7006EF799 /* Target.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Target.hpp; sourceTree = ""; }; 4BE3231620532BED006EF799 /* Target.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Target.hpp; sourceTree = ""; }; 4BE34437238389E10058E78F /* AtariSTVideoTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AtariSTVideoTests.mm; sourceTree = ""; }; + 4BE3C69327C793EF000EAD28 /* DataPointerResolver.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = DataPointerResolver.hpp; sourceTree = ""; }; + 4BE3C69527CBC540000EAD28 /* Model.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Model.hpp; sourceTree = ""; }; 4BE76CF822641ED300ACD6FA /* QLTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = QLTests.mm; sourceTree = ""; }; 4BE845201F2FF7F100A5EA22 /* CRTC6845.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = CRTC6845.hpp; sourceTree = ""; }; 4BE8EB5425C0E9D40040BC40 /* Disassembler.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Disassembler.hpp; sourceTree = ""; }; @@ -4731,6 +4733,8 @@ 4BEDA3B925B25563000C2DBD /* Decoder.cpp */, 4BEDA3B825B25563000C2DBD /* Decoder.hpp */, 4BEDA3DB25B2588F000C2DBD /* Instruction.hpp */, + 4BE3C69327C793EF000EAD28 /* DataPointerResolver.hpp */, + 4BE3C69527CBC540000EAD28 /* Model.hpp */, ); path = x86; sourceTree = ""; diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm index 26c01569e..fe3354026 100644 --- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm +++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm @@ -12,6 +12,7 @@ #include #include #include "../../../InstructionSets/x86/Decoder.hpp" +#include "../../../InstructionSets/x86/DataPointerResolver.hpp" namespace {