1
0
mirror of https://github.com/TomHarte/CLK.git synced 2025-01-11 08:30:55 +00:00

Attempt 32-bit modregrm + SIB parsing.

This commit is contained in:
Thomas Harte 2022-03-08 14:39:49 -05:00
parent 41a104cc10
commit a954f23642
3 changed files with 55 additions and 32 deletions

View File

@ -591,13 +591,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
const uint8_t mod = *source >> 6; // i.e. mode.
const uint8_t reg = (*source >> 3) & 7; // i.e. register.
const uint8_t rm = *source & 7; // i.e. register/memory.
bool expects_sib = false;
++source;
++consumed_;
Source memreg;
// TODO: the below currently has no way to segue into fetching a SIB.
// TODO: can I just eliminate these lookup tables given the deliberate ordering within Source?
constexpr Source reg_table[8] = {
Source::eAX, Source::eCX, Source::eDX, Source::eBX,
@ -606,13 +605,39 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
constexpr Source seg_table[6] = {
Source::ES, Source::CS, Source::SS, Source::DS, Source::FS, Source::GS
};
switch(mod) {
default: {
const DataSize sizes[] = {DataSize::Byte, data_size_};
displacement_size_ = sizes[mod == 2];
// Mode 3 is the same regardless of 16/32-bit mode. So deal with that up front.
if(mod == 3) {
// Other operand is just a register.
memreg = reg_table[rm];
// LES, LDS, etc accept a memory argument only, not a register.
if(
operation_ == Operation::LES ||
operation_ == Operation::LDS ||
operation_ == Operation::LGS ||
operation_ == Operation::LSS ||
operation_ == Operation::LFS) {
undefined();
}
[[fallthrough]];
case 0: {
} else {
const DataSize sizes[] = {
DataSize::None,
DataSize::Byte,
address_size_ == AddressSize::b16 ? DataSize::Word : DataSize::DWord
};
displacement_size_ = sizes[mod];
memreg = Source::Indirect;
if(allow_sib_) {
// 32-bit decoding: the range of potential indirections is expanded,
// and may segue into obtaining a SIB.
sib_ = ScaleIndexBase(0, Source::None, reg_table[rm]);
expects_sib = rm == 4; // Indirect via eSP isn't directly supported; it's the
// escape indicator for reading a SIB.
} else {
// Classic 16-bit decoding: mode picks a displacement size,
// and a few fixed index+base pairs are defined.
constexpr ScaleIndexBase rm_table[8] = {
ScaleIndexBase(0, Source::eBX, Source::eSI),
ScaleIndexBase(0, Source::eBX, Source::eDI),
@ -624,24 +649,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
ScaleIndexBase(0, Source::None, Source::eBX),
};
memreg = Source::Indirect;
sib_ = rm_table[rm];
} break;
// Other operand is just a register.
case 3:
memreg = reg_table[rm];
// LES, LDS, etc accept a memory argument only, not a register.
if(
operation_ == Operation::LES ||
operation_ == Operation::LDS ||
operation_ == Operation::LGS ||
operation_ == Operation::LSS ||
operation_ == Operation::LFS) {
undefined();
}
break;
}
}
switch(modregrm_format_) {
@ -826,7 +835,11 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
default: assert(false);
}
phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
if(expects_sib && (source_ == Source::Indirect | destination_ == Source::Indirect)) {
phase_ = Phase::ScaleIndexBase;
} else {
phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
}
}
#undef undefined
@ -837,6 +850,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
sib_ = *source;
++source;
++consumed_;
phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
}
// MARK: - Displacement and operand.
@ -916,6 +931,7 @@ template <Model model> void Decoder<model>::set_32bit_protected_mode(bool enable
return;
}
allow_sib_ = enabled;
if(enabled) {
default_address_size_ = address_size_ = AddressSize::b32;
default_data_size_ = data_size_ = DataSize::DWord;

View File

@ -196,6 +196,7 @@ template <Model model> class Decoder {
DataSize default_data_size_ = DataSize::Word;
AddressSize address_size_ = AddressSize::b16;
DataSize data_size_ = DataSize::Word;
bool allow_sib_ = false;
/// Resets size capture and all fields with default values.
void reset_parsing() {

View File

@ -309,15 +309,21 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
- (void)testLDSLESEtc {
auto run_test = [](bool is_32, DataSize size) {
const auto instructions = decode<Model::i80386>({
0xc5, 0x33, // lds (%bp, %di), %si
0xc4, 0x17, // les (%bx), %dx
0x0f, 0xb2, 0x17, // lss edx, (edi)
0xc5, 0x33, // 16-bit: lds si, (bp, di); 32-bit: lds esi, (ebx)
0xc4, 0x17, // 16-bit: les dx, (bx); 32-bit: les edx, (edi)
0x0f, 0xb2, 0x17, // 16-bit: lss dx, (bx); 32-bit: lss edx, (edi)
}, is_32);
XCTAssertEqual(instructions.size(), 3);
test(instructions[0], size, Operation::LDS, ScaleIndexBase(Source::eBP, Source::eDI), Source::eSI);
test(instructions[1], size, Operation::LES, ScaleIndexBase(Source::eBX), Source::eDX);
test(instructions[2], size, Operation::LSS, ScaleIndexBase(Source::eBX), Source::eDX);
if(is_32) {
test(instructions[0], size, Operation::LDS, ScaleIndexBase(Source::eBX), Source::eSI);
test(instructions[1], size, Operation::LES, ScaleIndexBase(Source::eDI), Source::eDX);
test(instructions[2], size, Operation::LSS, ScaleIndexBase(Source::eDI), Source::eDX);
} else {
test(instructions[0], size, Operation::LDS, ScaleIndexBase(Source::eBP, Source::eDI), Source::eSI);
test(instructions[1], size, Operation::LES, ScaleIndexBase(Source::eBX), Source::eDX);
test(instructions[2], size, Operation::LSS, ScaleIndexBase(Source::eBX), Source::eDX);
}
};
run_test(false, DataSize::Word);