From a954f236429abc6a22b16bf72a54e44e80242dbb Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Tue, 8 Mar 2022 14:39:49 -0500 Subject: [PATCH] Attempt 32-bit modregrm + SIB parsing. --- InstructionSets/x86/Decoder.cpp | 68 ++++++++++++------- InstructionSets/x86/Decoder.hpp | 1 + .../Mac/Clock SignalTests/x86DecoderTests.mm | 18 +++-- 3 files changed, 55 insertions(+), 32 deletions(-) diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp index a316786d5..432ec8507 100644 --- a/InstructionSets/x86/Decoder.cpp +++ b/InstructionSets/x86/Decoder.cpp @@ -591,13 +591,12 @@ std::pair::InstructionT> Decoder::decode(con const uint8_t mod = *source >> 6; // i.e. mode. const uint8_t reg = (*source >> 3) & 7; // i.e. register. const uint8_t rm = *source & 7; // i.e. register/memory. + bool expects_sib = false; ++source; ++consumed_; Source memreg; - // TODO: the below currently has no way to segue into fetching a SIB. - // TODO: can I just eliminate these lookup tables given the deliberate ordering within Source? constexpr Source reg_table[8] = { Source::eAX, Source::eCX, Source::eDX, Source::eBX, @@ -606,13 +605,39 @@ std::pair::InstructionT> Decoder::decode(con constexpr Source seg_table[6] = { Source::ES, Source::CS, Source::SS, Source::DS, Source::FS, Source::GS }; - switch(mod) { - default: { - const DataSize sizes[] = {DataSize::Byte, data_size_}; - displacement_size_ = sizes[mod == 2]; + + // Mode 3 is the same regardless of 16/32-bit mode. So deal with that up front. + if(mod == 3) { + // Other operand is just a register. + memreg = reg_table[rm]; + + // LES, LDS, etc accept a memory argument only, not a register. + if( + operation_ == Operation::LES || + operation_ == Operation::LDS || + operation_ == Operation::LGS || + operation_ == Operation::LSS || + operation_ == Operation::LFS) { + undefined(); } - [[fallthrough]]; - case 0: { + } else { + const DataSize sizes[] = { + DataSize::None, + DataSize::Byte, + address_size_ == AddressSize::b16 ? DataSize::Word : DataSize::DWord + }; + displacement_size_ = sizes[mod]; + memreg = Source::Indirect; + + if(allow_sib_) { + // 32-bit decoding: the range of potential indirections is expanded, + // and may segue into obtaining a SIB. + sib_ = ScaleIndexBase(0, Source::None, reg_table[rm]); + expects_sib = rm == 4; // Indirect via eSP isn't directly supported; it's the + // escape indicator for reading a SIB. + } else { + // Classic 16-bit decoding: mode picks a displacement size, + // and a few fixed index+base pairs are defined. constexpr ScaleIndexBase rm_table[8] = { ScaleIndexBase(0, Source::eBX, Source::eSI), ScaleIndexBase(0, Source::eBX, Source::eDI), @@ -624,24 +649,8 @@ std::pair::InstructionT> Decoder::decode(con ScaleIndexBase(0, Source::None, Source::eBX), }; - memreg = Source::Indirect; sib_ = rm_table[rm]; - } break; - - // Other operand is just a register. - case 3: - memreg = reg_table[rm]; - - // LES, LDS, etc accept a memory argument only, not a register. - if( - operation_ == Operation::LES || - operation_ == Operation::LDS || - operation_ == Operation::LGS || - operation_ == Operation::LSS || - operation_ == Operation::LFS) { - undefined(); - } - break; + } } switch(modregrm_format_) { @@ -826,7 +835,11 @@ std::pair::InstructionT> Decoder::decode(con default: assert(false); } - phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost; + if(expects_sib && (source_ == Source::Indirect | destination_ == Source::Indirect)) { + phase_ = Phase::ScaleIndexBase; + } else { + phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost; + } } #undef undefined @@ -837,6 +850,8 @@ std::pair::InstructionT> Decoder::decode(con sib_ = *source; ++source; ++consumed_; + + phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost; } // MARK: - Displacement and operand. @@ -916,6 +931,7 @@ template void Decoder::set_32bit_protected_mode(bool enable return; } + allow_sib_ = enabled; if(enabled) { default_address_size_ = address_size_ = AddressSize::b32; default_data_size_ = data_size_ = DataSize::DWord; diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp index 6c837df2a..7c411423e 100644 --- a/InstructionSets/x86/Decoder.hpp +++ b/InstructionSets/x86/Decoder.hpp @@ -196,6 +196,7 @@ template class Decoder { DataSize default_data_size_ = DataSize::Word; AddressSize address_size_ = AddressSize::b16; DataSize data_size_ = DataSize::Word; + bool allow_sib_ = false; /// Resets size capture and all fields with default values. void reset_parsing() { diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm index 1ef7c754f..206070194 100644 --- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm +++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm @@ -309,15 +309,21 @@ std::vector::InstructionT> decode(c - (void)testLDSLESEtc { auto run_test = [](bool is_32, DataSize size) { const auto instructions = decode({ - 0xc5, 0x33, // lds (%bp, %di), %si - 0xc4, 0x17, // les (%bx), %dx - 0x0f, 0xb2, 0x17, // lss edx, (edi) + 0xc5, 0x33, // 16-bit: lds si, (bp, di); 32-bit: lds esi, (ebx) + 0xc4, 0x17, // 16-bit: les dx, (bx); 32-bit: les edx, (edi) + 0x0f, 0xb2, 0x17, // 16-bit: lss dx, (bx); 32-bit: lss edx, (edi) }, is_32); XCTAssertEqual(instructions.size(), 3); - test(instructions[0], size, Operation::LDS, ScaleIndexBase(Source::eBP, Source::eDI), Source::eSI); - test(instructions[1], size, Operation::LES, ScaleIndexBase(Source::eBX), Source::eDX); - test(instructions[2], size, Operation::LSS, ScaleIndexBase(Source::eBX), Source::eDX); + if(is_32) { + test(instructions[0], size, Operation::LDS, ScaleIndexBase(Source::eBX), Source::eSI); + test(instructions[1], size, Operation::LES, ScaleIndexBase(Source::eDI), Source::eDX); + test(instructions[2], size, Operation::LSS, ScaleIndexBase(Source::eDI), Source::eDX); + } else { + test(instructions[0], size, Operation::LDS, ScaleIndexBase(Source::eBP, Source::eDI), Source::eSI); + test(instructions[1], size, Operation::LES, ScaleIndexBase(Source::eBX), Source::eDX); + test(instructions[2], size, Operation::LSS, ScaleIndexBase(Source::eBX), Source::eDX); + } }; run_test(false, DataSize::Word);