Attempt 32-bit modregrm + SIB parsing.

2025-07-12 02:24:07 +00:00 · 2022-03-08 14:39:49 -05:00
parent 41a104cc10
commit a954f23642
3 changed files with 55 additions and 32 deletions
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@ -591,13 +591,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 		const uint8_t mod = *source >> 6;		// i.e. mode.
 		const uint8_t reg = (*source >> 3) & 7;	// i.e. register.
 		const uint8_t rm = *source & 7;			// i.e. register/memory.
 		bool expects_sib = false;
 		++source;
 		++consumed_;
 		Source memreg;
 		// TODO: the below currently has no way to segue into fetching a SIB.
 		// TODO: can I just eliminate these lookup tables given the deliberate ordering within Source?
 		constexpr Source reg_table[8] = {
 			Source::eAX,		Source::eCX,		Source::eDX,		Source::eBX,
@ -606,30 +605,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 		constexpr Source seg_table[6] = {
 			Source::ES,	Source::CS,	Source::SS,	Source::DS,	Source::FS,	Source::GS
 		};
 		switch(mod) {
 			default: {
 				const DataSize sizes[] = {DataSize::Byte, data_size_};
 				displacement_size_ = sizes[mod == 2];
 			}
 				[[fallthrough]];
 			case 0: {
 				constexpr ScaleIndexBase rm_table[8] = {
 					ScaleIndexBase(0, Source::eBX, Source::eSI),
 					ScaleIndexBase(0, Source::eBX, Source::eDI),
 					ScaleIndexBase(0, Source::eBP, Source::eSI),
 					ScaleIndexBase(0, Source::eBP, Source::eDI),
 					ScaleIndexBase(0, Source::None, Source::eSI),
 					ScaleIndexBase(0, Source::None, Source::eDI),
 					ScaleIndexBase(0, Source::None, Source::eBP),
 					ScaleIndexBase(0, Source::None, Source::eBX),
 				};
 				memreg = Source::Indirect;
 				sib_ = rm_table[rm];
 			} break;
 		// Mode 3 is the same regardless of 16/32-bit mode. So deal with that up front.
 		if(mod == 3) {
 			// Other operand is just a register.
 			case 3:
 			memreg = reg_table[rm];
 			// LES, LDS, etc accept a memory argument only, not a register.
@ -641,7 +620,37 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				operation_ == Operation::LFS) {
 				undefined();
 			}
-			break;
+		} else {
 			const DataSize sizes[] = {
 				DataSize::None,
 				DataSize::Byte,
 				address_size_ == AddressSize::b16 ? DataSize::Word : DataSize::DWord
 			};
 			displacement_size_ = sizes[mod];
 			memreg = Source::Indirect;
 			if(allow_sib_) {
 				// 32-bit decoding: the range of potential indirections is expanded,
 				// and may segue into obtaining a SIB.
 				sib_ = ScaleIndexBase(0, Source::None, reg_table[rm]);
 				expects_sib = rm == 4;	// Indirect via eSP isn't directly supported; it's the
 										// escape indicator for reading a SIB.
 			} else {
 				// Classic 16-bit decoding: mode picks a displacement size,
 				// and a few fixed index+base pairs are defined.
 				constexpr ScaleIndexBase rm_table[8] = {
 					ScaleIndexBase(0, Source::eBX, Source::eSI),
 					ScaleIndexBase(0, Source::eBX, Source::eDI),
 					ScaleIndexBase(0, Source::eBP, Source::eSI),
 					ScaleIndexBase(0, Source::eBP, Source::eDI),
 					ScaleIndexBase(0, Source::None, Source::eSI),
 					ScaleIndexBase(0, Source::None, Source::eDI),
 					ScaleIndexBase(0, Source::None, Source::eBP),
 					ScaleIndexBase(0, Source::None, Source::eBX),
 				};
 				sib_ = rm_table[rm];
 			}
 		}
 		switch(modregrm_format_) {
@ -826,8 +835,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			default: assert(false);
 		}
 		if(expects_sib && (source_ == Source::Indirect | destination_ == Source::Indirect)) {
 			phase_ = Phase::ScaleIndexBase;
 		} else {
 			phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
 		}
 	}
 #undef undefined
@ -837,6 +850,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 		sib_ = *source;
 		++source;
 		++consumed_;
 		phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
 	}
 	// MARK: - Displacement and operand.
@ -916,6 +931,7 @@ template <Model model> void Decoder<model>::set_32bit_protected_mode(bool enable
 		return;
 	}
 	allow_sib_ = enabled;
 	if(enabled) {
 		default_address_size_ = address_size_ = AddressSize::b32;
 		default_data_size_ = data_size_ = DataSize::DWord;
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@ -196,6 +196,7 @@ template <Model model> class Decoder {
 		DataSize default_data_size_ = DataSize::Word;
 		AddressSize address_size_ = AddressSize::b16;
 		DataSize data_size_ = DataSize::Word;
 		bool allow_sib_ = false;
 		/// Resets size capture and all fields with default values.
 		void reset_parsing() {
--- a/SignalTests/x86DecoderTests.mm
+++ b/SignalTests/x86DecoderTests.mm
@ -309,15 +309,21 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 - (void)testLDSLESEtc {
 	auto run_test = [](bool is_32, DataSize size) {
 		const auto instructions = decode<Model::i80386>({
-			0xc5, 0x33,			// lds (%bp, %di), %si
+			0xc5, 0x33,			// 16-bit: lds si, (bp, di);	32-bit: lds esi, (ebx)
-			0xc4, 0x17,			// les (%bx), %dx
+			0xc4, 0x17,			// 16-bit: les dx, (bx);		32-bit: les edx, (edi)
-			0x0f, 0xb2, 0x17,	// lss edx, (edi)
+			0x0f, 0xb2, 0x17,	// 16-bit: lss dx, (bx);		32-bit: lss edx, (edi)
 		}, is_32);
 		XCTAssertEqual(instructions.size(), 3);
 		if(is_32) {
 			test(instructions[0], size, Operation::LDS, ScaleIndexBase(Source::eBX), Source::eSI);
 			test(instructions[1], size, Operation::LES, ScaleIndexBase(Source::eDI), Source::eDX);
 			test(instructions[2], size, Operation::LSS, ScaleIndexBase(Source::eDI), Source::eDX);
 		} else {
 			test(instructions[0], size, Operation::LDS, ScaleIndexBase(Source::eBP, Source::eDI), Source::eSI);
 			test(instructions[1], size, Operation::LES, ScaleIndexBase(Source::eBX), Source::eDX);
 			test(instructions[2], size, Operation::LSS, ScaleIndexBase(Source::eBX), Source::eDX);
 		}
 	};
 	run_test(false, DataSize::Word);