1
0
mirror of https://github.com/TomHarte/CLK.git synced 2025-01-29 04:33:04 +00:00

Merge pull request #1168 from TomHarte/8088Tests

Verify and correct 8086 instruction decoding.
This commit is contained in:
Thomas Harte 2023-10-05 17:13:22 -04:00 committed by GitHub
commit 451c687441
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 1193 additions and 172 deletions

View File

@ -31,11 +31,16 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
// MARK: - Prefixes (if present) and the opcode.
/// Sets the operation and verifies that the current repetition, if any, is compatible, discarding it otherwise.
#define SetOperation(op) \
operation_ = op; \
repetition_ = supports(op, repetition_) ? repetition_ : Repetition::None
/// Helper macro for those that follow.
#define SetOpSrcDestSize(op, src, dest, size) \
operation_ = Operation::op; \
source_ = Source::src; \
destination_ = Source::dest; \
SetOperation(Operation::op); \
source_ = Source::src; \
destination_ = Source::dest; \
operation_size_ = size
/// Covers anything which is complete as soon as the opcode is encountered.
@ -53,19 +58,19 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
/// Handles instructions of the form Ax, jjkk where the latter is implicitly an address.
#define RegAddr(op, dest, op_size, addr_size) \
SetOpSrcDestSize(op, DirectAddress, dest, op_size); \
operand_size_ = addr_size; \
displacement_size_ = addr_size; \
phase_ = Phase::DisplacementOrOperand
/// Handles instructions of the form jjkk, Ax where the former is implicitly an address.
#define AddrReg(op, source, op_size, addr_size) \
SetOpSrcDestSize(op, source, DirectAddress, op_size); \
operand_size_ = addr_size; \
displacement_size_ = addr_size; \
destination_ = Source::DirectAddress; \
phase_ = Phase::DisplacementOrOperand
/// Covers both `mem/reg, reg` and `reg, mem/reg`.
#define MemRegReg(op, format, size) \
operation_ = Operation::op; \
SetOperation(Operation::op); \
phase_ = Phase::ModRegRM; \
modregrm_format_ = ModRegRMFormat::format; \
operand_size_ = DataSize::None; \
@ -73,27 +78,28 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
/// Handles JO, JNO, JB, etc — anything with only a displacement.
#define Displacement(op, size) \
operation_ = Operation::op; \
SetOperation(Operation::op); \
phase_ = Phase::DisplacementOrOperand; \
displacement_size_ = size
/// Handles PUSH [immediate], etc — anything with only an immediate operand.
#define Immediate(op, size) \
operation_ = Operation::op; \
SetOperation(Operation::op); \
source_ = Source::Immediate; \
phase_ = Phase::DisplacementOrOperand; \
operand_size_ = size
/// Handles far CALL and far JMP — fixed four or six byte operand operations.
#define Far(op) \
operation_ = Operation::op; \
SetOperation(Operation::op); \
phase_ = Phase::DisplacementOrOperand; \
operand_size_ = DataSize::Word; \
destination_ = Source::Immediate; \
displacement_size_ = data_size(default_address_size_)
/// Handles ENTER — a fixed three-byte operation.
#define Displacement16Operand8(op) \
operation_ = Operation::op; \
SetOperation(Operation::op); \
phase_ = Phase::DisplacementOrOperand; \
displacement_size_ = DataSize::Word; \
operand_size_ = DataSize::Byte
@ -133,7 +139,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
PartialBlock(0x00, ADD); break;
case 0x06: Complete(PUSH, ES, None, data_size_); break;
case 0x07: Complete(POP, None, ES, data_size_); break;
case 0x07: Complete(POP, ES, None, data_size_); break;
PartialBlock(0x08, OR); break;
case 0x0e: Complete(PUSH, CS, None, data_size_); break;
@ -141,17 +147,20 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
// The 286 onwards have a further set of instructions
// prefixed with $0f.
case 0x0f:
RequiresMin(i80286);
phase_ = Phase::InstructionPageF;
if constexpr (model < Model::i80286) {
Complete(POP, CS, None, data_size_);
} else {
phase_ = Phase::InstructionPageF;
}
break;
PartialBlock(0x10, ADC); break;
case 0x16: Complete(PUSH, SS, None, DataSize::Word); break;
case 0x17: Complete(POP, None, SS, DataSize::Word); break;
case 0x17: Complete(POP, SS, None, DataSize::Word); break;
PartialBlock(0x18, SBB); break;
case 0x1e: Complete(PUSH, DS, None, DataSize::Word); break;
case 0x1f: Complete(POP, None, DS, DataSize::Word); break;
case 0x1f: Complete(POP, DS, None, DataSize::Word); break;
PartialBlock(0x20, AND); break;
case 0x26: segment_override_ = Source::ES; break;
@ -189,80 +198,132 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
#undef RegisterBlock
case 0x60:
RequiresMin(i80186);
Complete(PUSHA, None, None, data_size_);
if constexpr (model < Model::i80186) {
Displacement(JO, DataSize::Byte);
} else {
Complete(PUSHA, None, None, data_size_);
}
break;
case 0x61:
RequiresMin(i80186);
Complete(POPA, None, None, data_size_);
if constexpr (model < Model::i80186) {
Displacement(JNO, DataSize::Byte);
} else {
Complete(POPA, None, None, data_size_);
}
break;
case 0x62:
RequiresMin(i80186);
MemRegReg(BOUND, Reg_MemReg, data_size_);
if constexpr (model < Model::i80186) {
Displacement(JB, DataSize::Byte);
} else {
MemRegReg(BOUND, Reg_MemReg, data_size_);
}
break;
case 0x63:
RequiresMin(i80286);
MemRegReg(ARPL, MemReg_Reg, DataSize::Word);
if constexpr (model < Model::i80286) {
Displacement(JNB, DataSize::Byte);
} else {
MemRegReg(ARPL, MemReg_Reg, DataSize::Word);
}
break;
case 0x64:
RequiresMin(i80386);
segment_override_ = Source::FS;
if constexpr (model < Model::i80386) {
Displacement(JZ, DataSize::Byte);
} else {
RequiresMin(i80386);
segment_override_ = Source::FS;
}
break;
case 0x65:
if constexpr (model < Model::i80286) {
Displacement(JNZ, DataSize::Byte);
break;
}
RequiresMin(i80386);
segment_override_ = Source::GS;
break;
case 0x66:
if constexpr (model < Model::i80286) {
Displacement(JBE, DataSize::Byte);
break;
}
RequiresMin(i80386);
data_size_ = DataSize(int(default_data_size_) ^ int(DataSize::Word) ^ int(DataSize::DWord));
break;
case 0x67:
if constexpr (model < Model::i80286) {
Displacement(JNBE, DataSize::Byte);
break;
}
RequiresMin(i80386);
address_size_ = AddressSize(int(default_address_size_) ^ int(AddressSize::b16) ^ int(AddressSize::b32));
break;
case 0x68:
RequiresMin(i80286);
Immediate(PUSH, data_size_);
operation_size_ = data_size_;
if constexpr (model < Model::i80286) {
Displacement(JS, DataSize::Byte);
} else {
Immediate(PUSH, data_size_);
operation_size_ = data_size_;
}
break;
case 0x69:
RequiresMin(i80286);
MemRegReg(IMUL_3, Reg_MemReg, data_size_);
operand_size_ = data_size_;
if constexpr (model < Model::i80286) {
Displacement(JNS, DataSize::Byte);
} else {
MemRegReg(IMUL_3, Reg_MemReg, data_size_);
operand_size_ = data_size_;
}
break;
case 0x6a:
RequiresMin(i80286);
Immediate(PUSH, DataSize::Byte);
if constexpr (model < Model::i80286) {
Displacement(JP, DataSize::Byte);
} else {
Immediate(PUSH, DataSize::Byte);
}
break;
case 0x6b:
RequiresMin(i80286);
MemRegReg(IMUL_3, Reg_MemReg, data_size_);
operand_size_ = DataSize::Byte;
sign_extend_ = true;
if constexpr (model < Model::i80286) {
Displacement(JNP, DataSize::Byte);
} else {
MemRegReg(IMUL_3, Reg_MemReg, data_size_);
operand_size_ = DataSize::Byte;
sign_extend_operand_ = true;
}
break;
case 0x6c: // INSB
RequiresMin(i80186);
Complete(INS, None, None, DataSize::Byte);
if constexpr (model < Model::i80186) {
Displacement(JL, DataSize::Byte);
} else {
Complete(INS, None, None, DataSize::Byte);
}
break;
case 0x6d: // INSW/INSD
RequiresMin(i80186);
Complete(INS, None, None, data_size_);
if constexpr (model < Model::i80186) {
Displacement(JNL, DataSize::Byte);
} else {
Complete(INS, None, None, data_size_);
}
break;
case 0x6e: // OUTSB
RequiresMin(i80186);
Complete(OUTS, None, None, DataSize::Byte);
if constexpr (model < Model::i80186) {
Displacement(JLE, DataSize::Byte);
} else {
Complete(OUTS, None, None, DataSize::Byte);
}
break;
case 0x6f: // OUTSW/OUSD
RequiresMin(i80186);
Complete(OUTS, None, None, data_size_);
if constexpr (model < Model::i80186) {
Displacement(JNLE, DataSize::Byte);
} else {
Complete(OUTS, None, None, data_size_);
}
break;
case 0x70: Displacement(JO, DataSize::Byte); break;
case 0x71: Displacement(JNO, DataSize::Byte); break;
case 0x72: Displacement(JB, DataSize::Byte); break;
case 0x73: Displacement(JNB, DataSize::Byte); break;
case 0x74: Displacement(JE, DataSize::Byte); break;
case 0x75: Displacement(JNE, DataSize::Byte); break;
case 0x74: Displacement(JZ, DataSize::Byte); break;
case 0x75: Displacement(JNZ, DataSize::Byte); break;
case 0x76: Displacement(JBE, DataSize::Byte); break;
case 0x77: Displacement(JNBE, DataSize::Byte); break;
case 0x78: Displacement(JS, DataSize::Byte); break;
@ -345,11 +406,23 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
case 0xbe: RegData(MOV, eSI, data_size_); break;
case 0xbf: RegData(MOV, eDI, data_size_); break;
case 0xc0: case 0xc1:
RequiresMin(i80186);
ShiftGroup();
source_ = Source::Immediate;
operand_size_ = DataSize::Byte;
case 0xc0:
if constexpr (model >= Model::i80186) {
ShiftGroup();
source_ = Source::Immediate;
operand_size_ = DataSize::Byte;
} else {
RegData(RETnear, None, data_size_);
}
break;
case 0xc1:
if constexpr (model >= Model::i80186) {
ShiftGroup();
source_ = Source::Immediate;
operand_size_ = data_size_;
} else {
Complete(RETnear, None, None, DataSize::None);
}
break;
case 0xc2: RegData(RETnear, None, data_size_); break;
case 0xc3: Complete(RETnear, None, None, DataSize::None); break;
@ -359,12 +432,18 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
case 0xc7: MemRegReg(MOV, MemRegMOV, data_size_); break;
case 0xc8:
RequiresMin(i80186);
Displacement16Operand8(ENTER);
if constexpr (model >= Model::i80186) {
Displacement16Operand8(ENTER);
} else {
RegData(RETfar, None, data_size_);
}
break;
case 0xc9:
RequiresMin(i80186);
Complete(LEAVE, None, None, DataSize::None);
if constexpr (model >= Model::i80186) {
Complete(LEAVE, None, None, DataSize::None);
} else {
Complete(RETfar, None, None, DataSize::DWord);
}
break;
case 0xca: RegData(RETfar, None, data_size_); break;
@ -382,8 +461,6 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
case 0xd0: case 0xd1:
ShiftGroup();
source_ = Source::Immediate;
operand_ = 1;
break;
case 0xd2: case 0xd3:
ShiftGroup();
@ -391,32 +468,32 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
break;
case 0xd4: RegData(AAM, eAX, DataSize::Byte); break;
case 0xd5: RegData(AAD, eAX, DataSize::Byte); break;
// Unused: 0xd6.
case 0xd6: Complete(SALC, None, None, DataSize::Byte); break;
case 0xd7: Complete(XLAT, None, None, DataSize::Byte); break;
case 0xd8: MemRegReg(ESC, MemReg_Reg, DataSize::None); break;
case 0xd9: MemRegReg(ESC, MemReg_Reg, DataSize::None); break;
case 0xda: MemRegReg(ESC, MemReg_Reg, DataSize::None); break;
case 0xdb: MemRegReg(ESC, MemReg_Reg, DataSize::None); break;
case 0xdc: MemRegReg(ESC, MemReg_Reg, DataSize::None); break;
case 0xdd: MemRegReg(ESC, MemReg_Reg, DataSize::None); break;
case 0xde: MemRegReg(ESC, MemReg_Reg, DataSize::None); break;
case 0xdf: MemRegReg(ESC, MemReg_Reg, DataSize::None); break;
case 0xd8: MemRegReg(ESC, Reg_MemReg, data_size_); break;
case 0xd9: MemRegReg(ESC, Reg_MemReg, data_size_); break;
case 0xda: MemRegReg(ESC, Reg_MemReg, data_size_); break;
case 0xdb: MemRegReg(ESC, Reg_MemReg, data_size_); break;
case 0xdc: MemRegReg(ESC, Reg_MemReg, data_size_); break;
case 0xdd: MemRegReg(ESC, Reg_MemReg, data_size_); break;
case 0xde: MemRegReg(ESC, Reg_MemReg, data_size_); break;
case 0xdf: MemRegReg(ESC, Reg_MemReg, data_size_); break;
case 0xe0: Displacement(LOOPNE, DataSize::Byte); break;
case 0xe1: Displacement(LOOPE, DataSize::Byte); break;
case 0xe2: Displacement(LOOP, DataSize::Byte); break;
case 0xe3: Displacement(JPCX, DataSize::Byte); break;
case 0xe3: Displacement(JCXZ, DataSize::Byte); break;
case 0xe4: RegAddr(IN, eAX, DataSize::Byte, DataSize::Byte); break;
case 0xe5: RegAddr(IN, eAX, data_size_, DataSize::Byte); break;
case 0xe6: AddrReg(OUT, eAX, DataSize::Byte, DataSize::Byte); break;
case 0xe7: AddrReg(OUT, eAX, data_size_, DataSize::Byte); break;
case 0xe8: Displacement(CALLrel, data_size_); break;
case 0xe9: Displacement(JMPrel, data_size_); break;
case 0xea: Far(JMPfar); break;
case 0xeb: Displacement(JMPrel, DataSize::Byte); break;
case 0xe8: Displacement(CALLrel, data_size(address_size_)); break;
case 0xe9: Displacement(JMPrel, data_size(address_size_)); break;
case 0xea: Far(JMPfar); break;
case 0xeb: Displacement(JMPrel, DataSize::Byte); break;
case 0xec: Complete(IN, eDX, eAX, DataSize::Byte); break;
case 0xed: Complete(IN, eDX, eAX, data_size_); break;
@ -497,8 +574,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
case 0x71: RequiresMin(i80386); Displacement(JNO, data_size_); break;
case 0x72: RequiresMin(i80386); Displacement(JB, data_size_); break;
case 0x73: RequiresMin(i80386); Displacement(JNB, data_size_); break;
case 0x74: RequiresMin(i80386); Displacement(JE, data_size_); break;
case 0x75: RequiresMin(i80386); Displacement(JNE, data_size_); break;
case 0x74: RequiresMin(i80386); Displacement(JZ, data_size_); break;
case 0x75: RequiresMin(i80386); Displacement(JNZ, data_size_); break;
case 0x76: RequiresMin(i80386); Displacement(JBE, data_size_); break;
case 0x77: RequiresMin(i80386); Displacement(JNBE, data_size_); break;
case 0x78: RequiresMin(i80386); Displacement(JS, data_size_); break;
@ -640,6 +717,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
operation_ == Operation::LFS) {
undefined();
}
} else if(rm == 6 && mod == 0) {
// There's no BP direct; BP with ostensibly no offset means 'direct address' mode.
displacement_size_ = data_size(address_size_);
memreg = Source::DirectAddress;
} else {
const DataSize sizes[] = {
DataSize::None,
@ -647,29 +728,33 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
data_size(address_size_)
};
displacement_size_ = sizes[mod];
memreg = Source::Indirect;
if(address_size_ == AddressSize::b32) {
if(is_32bit(model) && address_size_ == AddressSize::b32) {
// 32-bit decoding: the range of potential indirections is expanded,
// and may segue into obtaining a SIB.
sib_ = ScaleIndexBase(0, Source::None, reg_table[rm]);
expects_sib = rm == 4; // Indirect via eSP isn't directly supported; it's the
// escape indicator for reading a SIB.
memreg = Source::Indirect;
} else {
// Classic 16-bit decoding: mode picks a displacement size,
// and a few fixed index+base pairs are defined.
//
// A base of eAX is meaningless, with the source type being the indicator
// that it should be ignored. ScaleIndexBase can't store a base of Source::None.
constexpr ScaleIndexBase rm_table[8] = {
ScaleIndexBase(0, Source::eBX, Source::eSI),
ScaleIndexBase(0, Source::eBX, Source::eDI),
ScaleIndexBase(0, Source::eBP, Source::eSI),
ScaleIndexBase(0, Source::eBP, Source::eDI),
ScaleIndexBase(0, Source::None, Source::eSI),
ScaleIndexBase(0, Source::None, Source::eDI),
ScaleIndexBase(0, Source::eSI, Source::eBX),
ScaleIndexBase(0, Source::eDI, Source::eBX),
ScaleIndexBase(0, Source::eSI, Source::eBP),
ScaleIndexBase(0, Source::eDI, Source::eBP),
ScaleIndexBase(0, Source::eSI, Source::eAX),
ScaleIndexBase(0, Source::eDI, Source::eAX),
ScaleIndexBase(0, Source::None, Source::eBP),
ScaleIndexBase(0, Source::None, Source::eBX),
ScaleIndexBase(0, Source::eBX, Source::eAX),
};
sib_ = rm_table[rm];
memreg = (rm >= 4 && rm != 6) ? Source::IndirectNoBase : Source::Indirect;
}
}
@ -686,58 +771,86 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
} break;
case ModRegRMFormat::MemRegTEST_to_IDIV:
source_ = destination_ = memreg;
source_ = memreg;
switch(reg) {
default: undefined();
default:
// case 1 is treated as another form of TEST on the 8086.
// (and, I guess, the 80186?)
if constexpr (model >= Model::i80286) {
undefined();
}
[[fallthrough]];
case 0: operation_ = Operation::TEST; break;
case 2: operation_ = Operation::NOT; break;
case 3: operation_ = Operation::NEG; break;
case 4: operation_ = Operation::MUL; break;
case 5: operation_ = Operation::IMUL_1; break;
case 6: operation_ = Operation::DIV; break;
case 7: operation_ = Operation::IDIV; break;
case 0:
destination_ = memreg;
source_ = Source::Immediate;
operand_size_ = operation_size_;
SetOperation(Operation::TEST);
break;
case 2: SetOperation(Operation::NOT); break;
case 3: SetOperation(Operation::NEG); break;
case 4: SetOperation(Operation::MUL); break;
case 5: SetOperation(Operation::IMUL_1); break;
case 6: SetOperation(Operation::DIV); break;
case 7: SetOperation(Operation::IDIV); break;
}
break;
case ModRegRMFormat::Seg_MemReg:
case ModRegRMFormat::MemReg_Seg:
case ModRegRMFormat::MemReg_Seg: {
// On the 8086, only two bits of reg are used.
const int masked_reg = model >= Model::i80286 ? reg : reg & 3;
// The 16-bit chips have four segment registers;
// the 80386 onwards has six.
if(!is_32bit(model) && reg > 3) {
undefined();
} else if(reg > 5) {
undefined();
if constexpr (is_32bit(model)) {
if(masked_reg > 5) {
undefined();
}
} else {
if(masked_reg > 3) {
undefined();
}
}
if(modregrm_format_ == ModRegRMFormat::Seg_MemReg) {
source_ = memreg;
destination_ = seg_table[reg];
destination_ = seg_table[masked_reg];
// 80286 and later disallow MOV to CS.
if(model >= Model::i80286 && destination_ == Source::CS) {
undefined();
}
} else {
source_ = seg_table[reg];
source_ = seg_table[masked_reg];
destination_ = memreg;
}
break;
} break;
case ModRegRMFormat::MemRegROL_to_SAR:
destination_ = memreg;
switch(reg) {
default: undefined();
default:
if constexpr (model == Model::i8086) {
if(source_ == Source::eCX) {
SetOperation(Operation::SETMOC);
} else {
SetOperation(Operation::SETMO);
}
} else {
undefined();
}
break;
case 0: operation_ = Operation::ROL; break;
case 1: operation_ = Operation::ROR; break;
case 2: operation_ = Operation::RCL; break;
case 3: operation_ = Operation::RCR; break;
case 4: operation_ = Operation::SAL; break;
case 5: operation_ = Operation::SHR; break;
case 7: operation_ = Operation::SAR; break;
case 0: SetOperation(Operation::ROL); break;
case 1: SetOperation(Operation::ROR); break;
case 2: SetOperation(Operation::RCL); break;
case 3: SetOperation(Operation::RCR); break;
case 4: SetOperation(Operation::SAL); break;
case 5: SetOperation(Operation::SHR); break;
case 7: SetOperation(Operation::SAR); break;
}
break;
@ -747,8 +860,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
switch(reg) {
default: undefined();
case 0: operation_ = Operation::INC; break;
case 1: operation_ = Operation::DEC; break;
case 0: SetOperation(Operation::INC); break;
case 1: SetOperation(Operation::DEC); break;
}
break;
@ -756,16 +869,23 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
source_ = destination_ = memreg;
switch(reg) {
default: undefined();
default:
// case 7 is treated as another form of PUSH on the 8086.
// (and, I guess, the 80186?)
if constexpr (model >= Model::i80286) {
undefined();
}
[[fallthrough]];
case 6: SetOperation(Operation::PUSH); break;
case 0: operation_ = Operation::INC; break;
case 1: operation_ = Operation::DEC; break;
case 2: operation_ = Operation::CALLabs; break;
case 3: operation_ = Operation::CALLfar; break;
case 4: operation_ = Operation::JMPabs; break;
case 5: operation_ = Operation::JMPfar; break;
case 6: operation_ = Operation::PUSH; break;
case 0: SetOperation(Operation::INC); break;
case 1: SetOperation(Operation::DEC); break;
case 2: SetOperation(Operation::CALLabs); break;
case 3: SetOperation(Operation::CALLfar); break;
case 4: SetOperation(Operation::JMPabs); break;
case 5: SetOperation(Operation::JMPfar); break;
}
// TODO: CALLfar and JMPfar aren't correct above; find out what is.
break;
case ModRegRMFormat::MemRegSingleOperand:
@ -787,17 +907,17 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
source_ = Source::Immediate;
destination_ = memreg;
operand_size_ = (modregrm_format_ == ModRegRMFormat::MemRegADD_to_CMP_SignExtend) ? DataSize::Byte : operation_size_;
sign_extend_ = true; // Will be effective only if modregrm_format_ == ModRegRMFormat::MemRegADD_to_CMP_SignExtend.
sign_extend_operand_ = true; // Will be effective only if modregrm_format_ == ModRegRMFormat::MemRegADD_to_CMP_SignExtend.
switch(reg) {
default: operation_ = Operation::ADD; break;
case 1: operation_ = Operation::OR; break;
case 2: operation_ = Operation::ADC; break;
case 3: operation_ = Operation::SBB; break;
case 4: operation_ = Operation::AND; break;
case 5: operation_ = Operation::SUB; break;
case 6: operation_ = Operation::XOR; break;
case 7: operation_ = Operation::CMP; break;
default: SetOperation(Operation::ADD); break;
case 1: SetOperation(Operation::OR); break;
case 2: SetOperation(Operation::ADC); break;
case 3: SetOperation(Operation::SBB); break;
case 4: SetOperation(Operation::AND); break;
case 5: SetOperation(Operation::SUB); break;
case 6: SetOperation(Operation::XOR); break;
case 7: SetOperation(Operation::CMP); break;
}
break;
@ -807,12 +927,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
switch(reg) {
default: undefined();
case 0: operation_ = Operation::SLDT; break;
case 1: operation_ = Operation::STR; break;
case 2: operation_ = Operation::LLDT; break;
case 3: operation_ = Operation::LTR; break;
case 4: operation_ = Operation::VERR; break;
case 5: operation_ = Operation::VERW; break;
case 0: SetOperation(Operation::SLDT); break;
case 1: SetOperation(Operation::STR); break;
case 2: SetOperation(Operation::LLDT); break;
case 3: SetOperation(Operation::LTR); break;
case 4: SetOperation(Operation::VERR); break;
case 5: SetOperation(Operation::VERW); break;
}
break;
@ -822,12 +942,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
switch(reg) {
default: undefined();
case 0: operation_ = Operation::SGDT; break;
case 1: operation_ = Operation::SIDT; break;
case 2: operation_ = Operation::LGDT; break;
case 3: operation_ = Operation::LIDT; break;
case 4: operation_ = Operation::SMSW; break;
case 6: operation_ = Operation::LMSW; break;
case 0: SetOperation(Operation::SGDT); break;
case 1: SetOperation(Operation::SIDT); break;
case 2: SetOperation(Operation::LGDT); break;
case 3: SetOperation(Operation::LIDT); break;
case 4: SetOperation(Operation::SMSW); break;
case 6: SetOperation(Operation::LMSW); break;
}
break;
@ -839,10 +959,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
switch(reg) {
default: undefined();
case 4: operation_ = Operation::BT; break;
case 5: operation_ = Operation::BTS; break;
case 6: operation_ = Operation::BTR; break;
case 7: operation_ = Operation::BTC; break;
case 4: SetOperation(Operation::BT); break;
case 5: SetOperation(Operation::BTS); break;
case 6: SetOperation(Operation::BTR); break;
case 7: SetOperation(Operation::BTC); break;
}
break;
@ -857,6 +977,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
}
#undef undefined
#undef SetOperation
// MARK: - ScaleIndexBase
@ -894,16 +1015,28 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
if(bytes_to_consume == outstanding_bytes) {
phase_ = Phase::ReadyToPost;
switch(displacement_size_) {
case DataSize::None: displacement_ = 0; break;
case DataSize::Byte: displacement_ = int8_t(inward_data_); break;
case DataSize::Word: displacement_ = int16_t(inward_data_); break;
case DataSize::DWord: displacement_ = int32_t(inward_data_); break;
// TODO: whether the displacement is signed appears to depend on the opcode.
// Find an appropriate table.
if(!sign_extend_displacement_) {
switch(displacement_size_) {
case DataSize::None: displacement_ = 0; break;
case DataSize::Byte: displacement_ = decltype(displacement_)(uint8_t(inward_data_)); break;
case DataSize::Word: displacement_ = decltype(displacement_)(uint16_t(inward_data_)); break;
case DataSize::DWord: displacement_ = decltype(displacement_)(uint32_t(inward_data_)); break;
}
} else {
switch(displacement_size_) {
case DataSize::None: displacement_ = 0; break;
case DataSize::Byte: displacement_ = int8_t(inward_data_); break;
case DataSize::Word: displacement_ = int16_t(inward_data_); break;
case DataSize::DWord: displacement_ = int32_t(inward_data_); break;
}
}
inward_data_ >>= bit_size(displacement_size_);
// Use inequality of sizes as a test for necessary sign extension.
if(operand_size_ == data_size_ || !sign_extend_) {
if(operand_size_ == data_size_ || !sign_extend_operand_) {
operand_ = decltype(operand_)(inward_data_);
} else {
switch(operand_size_) {

View File

@ -193,8 +193,10 @@ template <Model model> class Decoder {
DataSize operand_size_ = DataSize::None; // i.e. size of in-stream operand, if any.
DataSize operation_size_ = DataSize::None; // i.e. size of data manipulated by the operation.
bool sign_extend_ = false; // If set then sign extend the operand up to the operation size;
bool sign_extend_operand_ = false; // If set then sign extend the operand up to the operation size;
// otherwise it'll be zero-padded.
bool sign_extend_displacement_ = false; // Much as above; 'displacement' is used internally for both
// displacements and offsets, so signage will vary.
// Prefix capture fields.
Repetition repetition_ = Repetition::None;
@ -222,7 +224,8 @@ template <Model model> class Decoder {
sib_ = ScaleIndexBase();
next_inward_data_shift_ = 0;
inward_data_ = 0;
sign_extend_ = false;
sign_extend_operand_ = false;
sign_extend_displacement_ = false;
}
};

View File

@ -0,0 +1,567 @@
//
// Instruction.cpp
// Clock Signal
//
// Created by Thomas Harte on 17/09/2023.
// Copyright © 2023 Thomas Harte. All rights reserved.
//
#include "Instruction.hpp"
#include <cassert>
#include <iomanip>
#include <sstream>
using namespace InstructionSet::x86;
bool InstructionSet::x86::has_displacement(Operation operation) {
switch(operation) {
default: return false;
case Operation::JO: case Operation::JNO:
case Operation::JB: case Operation::JNB:
case Operation::JZ: case Operation::JNZ:
case Operation::JBE: case Operation::JNBE:
case Operation::JS: case Operation::JNS:
case Operation::JP: case Operation::JNP:
case Operation::JL: case Operation::JNL:
case Operation::JLE: case Operation::JNLE:
case Operation::LOOPNE: case Operation::LOOPE:
case Operation::LOOP: case Operation::JCXZ:
case Operation::CALLrel: case Operation::JMPrel:
return true;
}
}
int InstructionSet::x86::max_displayed_operands(Operation operation) {
switch(operation) {
default: return 2;
case Operation::INC: case Operation::DEC:
case Operation::POP: case Operation::PUSH:
case Operation::MUL: case Operation::IMUL_1:
case Operation::IDIV: case Operation::DIV:
case Operation::ESC:
case Operation::AAM: case Operation::AAD:
case Operation::INT:
case Operation::JMPabs: case Operation::JMPfar:
case Operation::CALLabs: case Operation::CALLfar:
case Operation::NEG: case Operation::NOT:
case Operation::RETnear:
case Operation::RETfar:
return 1;
// Pedantically, these have an displacement rather than an operand.
case Operation::JO: case Operation::JNO:
case Operation::JB: case Operation::JNB:
case Operation::JZ: case Operation::JNZ:
case Operation::JBE: case Operation::JNBE:
case Operation::JS: case Operation::JNS:
case Operation::JP: case Operation::JNP:
case Operation::JL: case Operation::JNL:
case Operation::JLE: case Operation::JNLE:
case Operation::LOOPNE: case Operation::LOOPE:
case Operation::LOOP: case Operation::JCXZ:
case Operation::CALLrel: case Operation::JMPrel:
// Genuine zero-operand instructions:
case Operation::CMPS: case Operation::LODS:
case Operation::MOVS: case Operation::SCAS:
case Operation::STOS:
case Operation::CLC: case Operation::CLD:
case Operation::CLI:
case Operation::STC: case Operation::STD:
case Operation::STI:
case Operation::CMC:
case Operation::LAHF: case Operation::SAHF:
case Operation::AAA: case Operation::AAS:
case Operation::DAA: case Operation::DAS:
case Operation::CBW: case Operation::CWD:
case Operation::INTO:
case Operation::PUSHF: case Operation::POPF:
case Operation::IRET:
case Operation::NOP:
case Operation::XLAT:
case Operation::SALC:
case Operation::Invalid:
return 0;
}
}
std::string InstructionSet::x86::to_string(Operation operation, DataSize size, Model model) {
switch(operation) {
case Operation::AAA: return "aaa";
case Operation::AAD: return "aad";
case Operation::AAM: return "aam";
case Operation::AAS: return "aas";
case Operation::DAA: return "daa";
case Operation::DAS: return "das";
case Operation::CBW: return "cbw";
case Operation::CWD: return "cwd";
case Operation::ESC: return "esc";
case Operation::HLT: return "hlt";
case Operation::WAIT: return "wait";
case Operation::ADC: return "adc";
case Operation::ADD: return "add";
case Operation::SBB: return "sbb";
case Operation::SUB: return "sub";
case Operation::MUL: return "mul";
case Operation::IMUL_1: return "imul";
case Operation::DIV: return "div";
case Operation::IDIV: return "idiv";
case Operation::INC: return "inc";
case Operation::DEC: return "dec";
case Operation::IN: return "in";
case Operation::OUT: return "out";
case Operation::JO: return "jo";
case Operation::JNO: return "jno";
case Operation::JB: return "jb";
case Operation::JNB: return "jnb";
case Operation::JZ: return "jz";
case Operation::JNZ: return "jnz";
case Operation::JBE: return "jbe";
case Operation::JNBE: return "jnbe";
case Operation::JS: return "js";
case Operation::JNS: return "jns";
case Operation::JP: return "jp";
case Operation::JNP: return "jnp";
case Operation::JL: return "jl";
case Operation::JNL: return "jnl";
case Operation::JLE: return "jle";
case Operation::JNLE: return "jnle";
case Operation::CALLabs: return "call";
case Operation::CALLrel: return "call";
case Operation::CALLfar: return "callf";
case Operation::IRET: return "iret";
case Operation::RETfar: return "retf";
case Operation::RETnear: return "retn";
case Operation::JMPabs: return "jmp";
case Operation::JMPrel: return "jmp";
case Operation::JMPfar: return "jmpf";
case Operation::JCXZ: return "jcxz";
case Operation::INT: return "int";
case Operation::INTO: return "into";
case Operation::LAHF: return "lahf";
case Operation::SAHF: return "sahf";
case Operation::LDS: return "lds";
case Operation::LES: return "les";
case Operation::LEA: return "lea";
case Operation::CMPS: {
constexpr char sizes[][6] = { "cmpsb", "cmpsw", "cmpsd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::LODS: {
constexpr char sizes[][6] = { "lodsb", "lodsw", "lodsd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::MOVS: {
constexpr char sizes[][6] = { "movsb", "movsw", "movsd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::SCAS: {
constexpr char sizes[][6] = { "scasb", "scasw", "scasd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::STOS: {
constexpr char sizes[][6] = { "stosb", "stosw", "stosd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::LOOP: return "loop";
case Operation::LOOPE: return "loope";
case Operation::LOOPNE: return "loopne";
case Operation::MOV: return "mov";
case Operation::NEG: return "neg";
case Operation::NOT: return "not";
case Operation::AND: return "and";
case Operation::OR: return "or";
case Operation::XOR: return "xor";
case Operation::NOP: return "nop";
case Operation::POP: return "pop";
case Operation::POPF: return "popf";
case Operation::PUSH: return "push";
case Operation::PUSHF: return "pushf";
case Operation::RCL: return "rcl";
case Operation::RCR: return "rcr";
case Operation::ROL: return "rol";
case Operation::ROR: return "ror";
case Operation::SAL: return "sal";
case Operation::SAR: return "sar";
case Operation::SHR: return "shr";
case Operation::CLC: return "clc";
case Operation::CLD: return "cld";
case Operation::CLI: return "cli";
case Operation::STC: return "stc";
case Operation::STD: return "std";
case Operation::STI: return "sti";
case Operation::CMC: return "cmc";
case Operation::CMP: return "cmp";
case Operation::TEST: return "test";
case Operation::XCHG: return "xchg";
case Operation::XLAT: return "xlat";
case Operation::SALC: return "salc";
case Operation::SETMO:
if(model == Model::i8086) {
return "setmo";
} else {
return "enter";
}
case Operation::SETMOC:
if(model == Model::i8086) {
return "setmoc";
} else {
return "bound";
}
case Operation::Invalid: return "invalid";
default:
assert(false);
return "";
}
}
bool InstructionSet::x86::mnemonic_implies_data_size(Operation operation) {
switch(operation) {
default: return false;
case Operation::CMPS:
case Operation::LODS:
case Operation::MOVS:
case Operation::SCAS:
case Operation::STOS:
case Operation::JMPrel:
case Operation::LEA:
return true;
}
}
std::string InstructionSet::x86::to_string(DataSize size) {
constexpr char sizes[][6] = { "byte", "word", "dword", "?" };
return sizes[static_cast<int>(size)];
}
std::string InstructionSet::x86::to_string(Source source, DataSize size) {
switch(source) {
case Source::eAX: {
constexpr char sizes[][4] = { "al", "ax", "eax", "?" };
return sizes[static_cast<int>(size)];
}
case Source::eCX: {
constexpr char sizes[][4] = { "cl", "cx", "ecx", "?" };
return sizes[static_cast<int>(size)];
}
case Source::eDX: {
constexpr char sizes[][4] = { "dl", "dx", "edx", "?" };
return sizes[static_cast<int>(size)];
}
case Source::eBX: {
constexpr char sizes[][4] = { "bl", "bx", "ebx", "?" };
return sizes[static_cast<int>(size)];
}
case Source::eSPorAH: {
constexpr char sizes[][4] = { "ah", "sp", "esp", "?" };
return sizes[static_cast<int>(size)];
}
case Source::eBPorCH: {
constexpr char sizes[][4] = { "ch", "bp", "ebp", "?" };
return sizes[static_cast<int>(size)];
}
case Source::eSIorDH: {
constexpr char sizes[][4] = { "dh", "si", "esi", "?" };
return sizes[static_cast<int>(size)];
}
case Source::eDIorBH: {
constexpr char sizes[][4] = { "bh", "di", "edi", "?" };
return sizes[static_cast<int>(size)];
}
case Source::ES: return "es";
case Source::CS: return "cs";
case Source::SS: return "ss";
case Source::DS: return "ds";
case Source::FS: return "fd";
case Source::GS: return "gs";
case Source::None: return "0";
case Source::DirectAddress: return "DirectAccess";
case Source::Immediate: return "Immediate";
case Source::Indirect: return "Indirect";
case Source::IndirectNoBase: return "IndirectNoBase";
default: return "???";
}
}
namespace {
std::string to_hex(int value, int digits, bool with_suffix = true) {
auto stream = std::stringstream();
stream << std::setfill('0') << std::uppercase << std::hex << std::setw(digits);
switch(digits) {
case 2: stream << +uint8_t(value); break;
case 4: stream << +uint16_t(value); break;
default: stream << value; break;
}
if (with_suffix) stream << 'h';
return stream.str();
};
}
template <bool is_32bit>
std::string InstructionSet::x86::to_string(
DataPointer pointer,
Instruction<is_32bit> instruction,
int offset_length,
int immediate_length,
DataSize operation_size
) {
if(operation_size == InstructionSet::x86::DataSize::None) operation_size = instruction.operation_size();
std::string operand;
auto append = [](std::stringstream &stream, auto value, int length, const char *prefix) {
switch(length) {
case 0:
if(!value) {
break;
}
[[fallthrough]];
case 2:
// If asked to pretend the offset was originally two digits then either of: an unsigned
// 8-bit value or a sign-extended 8-bit value as having been originally 8-bit.
//
// This kicks the issue of whether sign was extended appropriately to functionality tests.
if(
!(value & 0xff00) ||
((value & 0xff80) == 0xff80) ||
((value & 0xff80) == 0x0000)
) {
stream << prefix << to_hex(value, 2);
break;
}
[[fallthrough]];
default:
stream << prefix << to_hex(value, 4);
break;
}
};
using Source = InstructionSet::x86::Source;
const Source source = pointer.source<false>();
switch(source) {
// to_string handles all direct register names correctly.
default: return InstructionSet::x86::to_string(source, operation_size);
case Source::Immediate: {
std::stringstream stream;
append(stream, instruction.operand(), immediate_length, "");
return stream.str();
}
case Source::DirectAddress:
case Source::Indirect:
case Source::IndirectNoBase: {
std::stringstream stream;
if(!InstructionSet::x86::mnemonic_implies_data_size(instruction.operation)) {
stream << InstructionSet::x86::to_string(operation_size) << ' ';
}
Source segment = instruction.data_segment();
if(segment == Source::None) {
segment = pointer.default_segment();
if(segment == Source::None) {
segment = Source::DS;
}
}
stream << InstructionSet::x86::to_string(segment, InstructionSet::x86::DataSize::None) << ':';
stream << '[';
bool addOffset = false;
switch(source) {
default: break;
case Source::Indirect:
stream << InstructionSet::x86::to_string(pointer.base(), data_size(instruction.address_size()));
if(pointer.index() != Source::None) {
stream << '+' << InstructionSet::x86::to_string(pointer.index(), data_size(instruction.address_size()));
}
addOffset = true;
break;
case Source::IndirectNoBase:
stream << InstructionSet::x86::to_string(pointer.index(), data_size(instruction.address_size()));
addOffset = true;
break;
case Source::DirectAddress:
stream << to_hex(instruction.offset(), 4);
break;
}
if(addOffset) {
append(stream, instruction.offset(), offset_length, "+");
}
stream << ']';
return stream.str();
}
}
return operand;
};
template<bool is_32bit>
std::string InstructionSet::x86::to_string(
Instruction<is_32bit> instruction,
Model model,
int offset_length,
int immediate_length
) {
std::string operation;
// Add a repetition prefix; it'll be one of 'rep', 'repe' or 'repne'.
switch(instruction.repetition()) {
case Repetition::None: break;
case Repetition::RepE:
switch(instruction.operation) {
default:
operation += "repe ";
break;
case Operation::MOVS:
case Operation::STOS:
case Operation::LODS:
operation += "rep ";
break;
}
break;
case Repetition::RepNE:
operation += "repne ";
break;
}
// Add operation itself.
operation += to_string(instruction.operation, instruction.operation_size(), model);
operation += " ";
// Deal with a few special cases up front.
switch(instruction.operation) {
default: {
const int operands = max_displayed_operands(instruction.operation);
const bool displacement = has_displacement(instruction.operation);
const bool print_first = operands > 1 && instruction.destination().source() != Source::None;
if(print_first) {
operation += to_string(instruction.destination(), instruction, offset_length, immediate_length);
}
if(operands > 0 && instruction.source().source() != Source::None) {
if(print_first) operation += ", ";
operation += to_string(instruction.source(), instruction, offset_length, immediate_length);
}
if(displacement) {
operation += to_hex(instruction.displacement(), offset_length);
}
} break;
case Operation::CALLfar:
case Operation::JMPfar: {
switch(instruction.destination().source()) {
case Source::Immediate:
operation += "far 0x";
operation += to_hex(instruction.segment(), 4, false);
operation += ":0x";
operation += to_hex(instruction.offset(), 4, false);
break;
default:
operation += to_string(instruction.destination(), instruction, offset_length, immediate_length);
break;
}
} break;
case Operation::LDS:
case Operation::LES: // The test set labels the pointer type as dword, which I guess is technically accurate.
// A full 32 bits will be loaded from that address in 16-bit mode.
operation += to_string(instruction.destination(), instruction, offset_length, immediate_length);
operation += ", ";
operation += to_string(instruction.source(), instruction, offset_length, immediate_length, InstructionSet::x86::DataSize::DWord);
break;
case Operation::IN:
operation += to_string(instruction.destination(), instruction, offset_length, immediate_length);
operation += ", ";
switch(instruction.source().source()) {
case Source::DirectAddress:
operation += to_hex(instruction.offset(), 2, true);
break;
default:
operation += to_string(instruction.source(), instruction, offset_length, immediate_length, InstructionSet::x86::DataSize::Word);
break;
}
break;
case Operation::OUT:
switch(instruction.destination().source()) {
case Source::DirectAddress:
operation += to_hex(instruction.offset(), 2, true);
break;
default:
operation += to_string(instruction.destination(), instruction, offset_length, immediate_length, InstructionSet::x86::DataSize::Word);
break;
}
operation += ", ";
operation += to_string(instruction.source(), instruction, offset_length, immediate_length);
break;
// Rolls and shifts list eCX as a source on the understanding that everyone knows that rolls and shifts
// use CL even when they're shifting or rolling a word-sized quantity.
case Operation::RCL: case Operation::RCR:
case Operation::ROL: case Operation::ROR:
case Operation::SAL: case Operation::SAR:
case Operation::SHR:
case Operation::SETMO: case Operation::SETMOC:
operation += to_string(instruction.destination(), instruction, offset_length, immediate_length);
switch(instruction.source().source()) {
case Source::None: break;
case Source::eCX: operation += ", cl"; break;
case Source::Immediate:
// Providing an immediate operand of 1 is a little future-proofing by the decoder; the '1'
// is actually implicit on a real 8088. So omit it.
if(instruction.operand() == 1) break;
[[fallthrough]];
default:
operation += ", ";
operation += to_string(instruction.source(), instruction, offset_length, immediate_length);
break;
}
break;
}
return operation;
}
// Although advertised, 32-bit printing is incomplete.
//
//template std::string InstructionSet::x86::to_string(
// Instruction<true> instruction,
// Model model,
// int offset_length,
// int immediate_length
//);
template std::string InstructionSet::x86::to_string(
Instruction<false> instruction,
Model model,
int offset_length,
int immediate_length
);

View File

@ -9,8 +9,11 @@
#ifndef InstructionSets_x86_Instruction_h
#define InstructionSets_x86_Instruction_h
#include "Model.hpp"
#include <cstddef>
#include <cstdint>
#include <string>
#include <type_traits>
namespace InstructionSet::x86 {
@ -82,19 +85,19 @@ enum class Operation: uint8_t {
/// Reads from the port specified by source to the destination.
IN,
/// Writes from the port specified by destination from the source.
/// Writes to the port specified by destination from the source.
OUT,
// Various jumps; see the displacement to calculate targets.
JO, JNO, JB, JNB, JE, JNE, JBE, JNBE,
JO, JNO, JB, JNB, JZ, JNZ, JBE, JNBE,
JS, JNS, JP, JNP, JL, JNL, JLE, JNLE,
/// Far call; see the segment() and offset() fields.
CALLfar,
/// Relative call; see displacement().
CALLrel,
/// Near call.
CALLabs,
/// Relative call; see displacement().
CALLrel,
/// Far call; if destination is Source::Immediate then see the segment() and offset() fields; otherwise take segment and offset by indirection.
CALLfar,
/// Return from interrupt.
IRET,
/// Near return; if source is not ::None then it will be an ::Immediate indicating how many additional bytes to remove from the stack.
@ -105,10 +108,10 @@ enum class Operation: uint8_t {
JMPabs,
/// Near jump with a relative destination.
JMPrel,
/// Far jump to the indicated segment and offset.
/// Far jump; if destination is Source::Immediate then see the segment() and offset() fields; otherwise take segment and offset by indirection.
JMPfar,
/// Relative jump performed only if CX = 0; see the displacement.
JPCX,
JCXZ,
/// Generates a software interrupt of the level stated in the operand.
INT,
/// Generates a software interrupt of level 4 if overflow is set.
@ -153,7 +156,7 @@ enum class Operation: uint8_t {
XOR,
/// NOP; no further fields.
NOP,
/// POP from the stack to destination.
/// POP from the stack to source.
POP,
/// POP from the stack to the flags register.
POPF,
@ -161,19 +164,27 @@ enum class Operation: uint8_t {
PUSH,
/// PUSH the flags register to the stack.
PUSHF,
/// Rotate the destination left through carry the number of bits indicated by source; if the source is a register then implicitly its size is 1.
/// If it is ::None then the rotation is by a single position only.
RCL,
/// Rotate the destination right through carry the number of bits indicated by source; if the source is a register then implicitly its size is 1.
/// If it is ::None then the rotation is by a single position only.
RCR,
/// Rotate the destination left the number of bits indicated by source; if the source is a register then implicitly its size is 1.
/// If it is ::None then the rotation is by a single position only.
ROL,
/// Rotate the destination right the number of bits indicated by source; if the source is a register then implicitly its size is 1.
/// If it is ::None then the rotation is by a single position only.
ROR,
/// Arithmetic shift left the destination by the number of bits indicated by source; if the source is a register then implicitly its size is 1.
/// If it is ::None then the shift is by a single position only.
SAL,
/// Arithmetic shift right the destination by the number of bits indicated by source; if the source is a register then implicitly its size is 1.
/// If it is ::None then the shift is by a single position only.
SAR,
/// Logical shift right the destination by the number of bits indicated by source; if the source is a register then implicitly its size is 1.
/// If it is ::None then the shift is by a single position only.
SHR,
/// Clear carry flag; no source or destination provided.
@ -202,6 +213,18 @@ enum class Operation: uint8_t {
/// Load AL with DS:[AL+BX].
XLAT,
/// Set AL to FFh if carry is set; 00h otherwise.
SALC,
//
// 8086 exclusives.
//
/// Set destination to ~0 if CL is non-zero.
SETMOC,
/// Set destination to ~0.
SETMO,
//
// 80186 additions.
//
@ -210,7 +233,7 @@ enum class Operation: uint8_t {
/// stored at the location indicated by the source register, which will point to two
/// 16- or 32-bit words, the first being a signed lower bound and the signed upper.
/// Raises a bounds exception if not.
BOUND,
BOUND = SETMOC,
/// Create stack frame. See operand() for the nesting level and offset()
@ -339,6 +362,7 @@ enum class Operation: uint8_t {
MOVtoTr, MOVfromTr,
};
enum class DataSize: uint8_t {
Byte = 0,
Word = 1,
@ -433,6 +457,30 @@ enum class Repetition: uint8_t {
None, RepE, RepNE
};
/// @returns @c true if @c operation supports repetition mode @c repetition; @c false otherwise.
constexpr bool supports(Operation operation, Repetition repetition) {
switch(operation) {
default: return false;
case Operation::INS:
case Operation::OUTS:
return repetition == Repetition::RepE;
case Operation::Invalid: // Retain context here; it's used as an intermediate
// state sometimes.
case Operation::CMPS:
case Operation::LODS:
case Operation::MOVS:
case Operation::SCAS:
case Operation::STOS:
return true;
case Operation::IDIV:
return repetition == Repetition::RepNE;
}
}
/// Provides a 32-bit-style scale, index and base; to produce the address this represents,
/// calcluate base() + (index() << scale()).
///
@ -451,7 +499,7 @@ class ScaleIndexBase {
constexpr ScaleIndexBase(int scale, Source index, Source base) noexcept :
sib_(uint8_t(
scale << 6 |
(int(index != Source::None ? index : Source::eSI) << 3) |
(int(index != Source::None ? index : Source::eSP) << 3) |
int(base)
)) {}
constexpr ScaleIndexBase(Source index, Source base) noexcept : ScaleIndexBase(0, index, base) {}
@ -556,6 +604,23 @@ class DataPointer {
return sib_.index();
}
/// @returns The default segment to use for this access.
constexpr Source default_segment() const {
switch(source_) {
default:
case Source::IndirectNoBase:
return Source::None;
case Source::Indirect:
switch(base()) {
default: return Source::DS;
case Source::eBP:
case Source::eSP: return Source::SS;
case Source::eDI: return Source::ES;
}
}
}
template <bool obscure_indirectNoBase = false> constexpr Source base() const {
if constexpr (obscure_indirectNoBase) {
return (source_ <= Source::IndirectNoBase) ? Source::None : sib_.base();
@ -693,12 +758,12 @@ template<bool is_32bit> class Instruction {
return AddressSize(mem_exts_source_ >> 7);
}
/// @returns @c Source::DS if no segment override was found; the overridden segment otherwise.
/// @returns @c Source::None if no segment override was found; the overridden segment otherwise.
/// On x86 a segment override cannot modify the segment used as a destination in string instructions,
/// or that used by stack instructions, but this function does not spend the time necessary to provide
/// the correct default for those.
Source data_segment() const {
if(!has_length_extension()) return Source::DS;
if(!has_length_extension()) return Source::None;
return Source(
int(Source::ES) +
((length_extension() >> 1) & 7)
@ -781,9 +846,6 @@ template<bool is_32bit> class Instruction {
++extension;
}
if(has_length_extension()) {
// As per the rule stated for segment(), this class provides ::DS for any instruction
// that doesn't have a segment override.
if(segment_override == Source::None) segment_override = Source::DS;
extensions_[extension] = ImmediateT(
(length << 6) | (int(repetition) << 4) | ((int(segment_override) & 7) << 1) | int(lock)
);
@ -795,6 +857,56 @@ template<bool is_32bit> class Instruction {
static_assert(sizeof(Instruction<true>) <= 16);
static_assert(sizeof(Instruction<false>) <= 10);
//
// Disassembly aids.
//
/// @returns @c true if @c operation uses a @c displacement().
bool has_displacement(Operation operation);
/// @returns The maximum number of operands to print in a disassembly of @c operation;
/// i.e. 2 for both source() and destination(), 1 for source() alone, 0 for neither. This is a maximum
/// only — if either source is Source::None then it should not be printed.
int max_displayed_operands(Operation operation);
/// Provides the idiomatic name of the @c Operation given an operation @c DataSize and processor @c Model.
std::string to_string(Operation, DataSize, Model);
/// @returns @c true if the idiomatic name of @c Operation implies the data size (e.g. stosb), @c false otherwise (e.g. ld).
bool mnemonic_implies_data_size(Operation);
/// Provides the name of the @c DataSize, i.e. 'byte', 'word' or 'dword'.
std::string to_string(DataSize);
/// Provides the name of the @c Source at @c DataSize, e.g. for Source::eAX it might return AL, AX or EAX.
std::string to_string(Source, DataSize);
/// Provides the printable version of @c pointer as an appendage for @c instruction.
///
/// See notes below re: @c offset_length and @c immediate_length.
/// If @c operation_size is the default value of @c ::None, it'll be taken from the @c instruction.
template <bool is_32bit>
std::string to_string(
DataPointer pointer,
Instruction<is_32bit> instruction,
int offset_length,
int immediate_length,
DataSize operation_size = InstructionSet::x86::DataSize::None
);
/// Provides the printable version of @c instruction.
///
/// Internally, instructions do not retain the original sizes of offsets/displacements or immediates so the following are available:
///
/// If @c offset_length is '2' or '4', truncates any printed offset to 2 or 4 digits if it is compatible with being that length.
/// If @c immediate_length is '2' or '4', truncates any printed immediate value to 2 or 4 digits if it is compatible with being that length.
template<bool is_32bit>
std::string to_string(
Instruction<is_32bit> instruction,
Model model,
int offset_length = 0,
int immediate_length = 0);
}
#endif /* InstructionSets_x86_Instruction_h */

View File

@ -7,6 +7,8 @@
objects = {
/* Begin PBXBuildFile section */
423BDC4A2AB24699008E37B6 /* 8088Tests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 423BDC492AB24699008E37B6 /* 8088Tests.mm */; };
42437B332AC70833006DFED1 /* HDV.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B6FD0342923061300EC4760 /* HDV.cpp */; };
4281683A2A37AFB4008ECD27 /* DispatcherTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 428168392A37AFB4008ECD27 /* DispatcherTests.mm */; };
42A5E80C2ABBE04600A0DD5D /* NeskellTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 42A5E80B2ABBE04600A0DD5D /* NeskellTests.swift */; };
42A5E8442ABBE16F00A0DD5D /* illegal_rmw_test.bin in Resources */ = {isa = PBXBuildFile; fileRef = 42A5E8332ABBE16F00A0DD5D /* illegal_rmw_test.bin */; };
@ -26,6 +28,7 @@
42A5E8522ABBE16F00A0DD5D /* nop_test.bin in Resources */ = {isa = PBXBuildFile; fileRef = 42A5E8412ABBE16F00A0DD5D /* nop_test.bin */; };
42A5E8532ABBE16F00A0DD5D /* lax_test.bin in Resources */ = {isa = PBXBuildFile; fileRef = 42A5E8422ABBE16F00A0DD5D /* lax_test.bin */; };
42A5E8542ABBE16F00A0DD5D /* branch_backwards_test.bin in Resources */ = {isa = PBXBuildFile; fileRef = 42A5E8432ABBE16F00A0DD5D /* branch_backwards_test.bin */; };
42E5C3932AC46A7700DA093D /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 42E5C3922AC46A7700DA093D /* Carbon.framework */; };
4B018B89211930DE002A3937 /* 65C02_extended_opcodes_test.bin in Resources */ = {isa = PBXBuildFile; fileRef = 4B018B88211930DE002A3937 /* 65C02_extended_opcodes_test.bin */; };
4B01A6881F22F0DB001FD6E3 /* Z80MemptrTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4B01A6871F22F0DB001FD6E3 /* Z80MemptrTests.swift */; };
4B0333AF2094081A0050B93D /* AppleDSK.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B0333AD2094081A0050B93D /* AppleDSK.cpp */; };
@ -316,6 +319,9 @@
4B680CE223A5553100451D43 /* 68000ComparativeTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 4B680CE123A5553100451D43 /* 68000ComparativeTests.mm */; };
4B680CE423A555CA00451D43 /* 68000 Comparative Tests in Resources */ = {isa = PBXBuildFile; fileRef = 4B680CE323A555CA00451D43 /* 68000 Comparative Tests */; };
4B683B012727BE700043E541 /* Amiga Blitter Tests in Resources */ = {isa = PBXBuildFile; fileRef = 4B683B002727BE6F0043E541 /* Amiga Blitter Tests */; };
4B69DEB62AB79E4F0055B217 /* Instruction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B69DEB52AB79E4F0055B217 /* Instruction.cpp */; };
4B69DEB72AB79E4F0055B217 /* Instruction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B69DEB52AB79E4F0055B217 /* Instruction.cpp */; };
4B69DEB82AB79E4F0055B217 /* Instruction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B69DEB52AB79E4F0055B217 /* Instruction.cpp */; };
4B69FB3D1C4D908A00B5F0AA /* Tape.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B69FB3B1C4D908A00B5F0AA /* Tape.cpp */; };
4B69FB441C4D941400B5F0AA /* TapeUEF.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B69FB421C4D941400B5F0AA /* TapeUEF.cpp */; };
4B69FB461C4D950F00B5F0AA /* libz.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 4B69FB451C4D950F00B5F0AA /* libz.tbd */; };
@ -1119,6 +1125,7 @@
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
423BDC492AB24699008E37B6 /* 8088Tests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = 8088Tests.mm; sourceTree = "<group>"; };
4281572E2AA0334300E16AA1 /* Carry.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Carry.hpp; sourceTree = "<group>"; };
428168372A16C25C008ECD27 /* LineLayout.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = LineLayout.hpp; sourceTree = "<group>"; };
428168392A37AFB4008ECD27 /* DispatcherTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = DispatcherTests.mm; sourceTree = "<group>"; };
@ -1143,6 +1150,7 @@
42AD552E2A0C4D5000ACE410 /* 68000.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = 68000.hpp; sourceTree = "<group>"; };
42AD55302A0C4D5000ACE410 /* 68000Storage.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = 68000Storage.hpp; sourceTree = "<group>"; };
42AD55312A0C4D5000ACE410 /* 68000Implementation.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = 68000Implementation.hpp; sourceTree = "<group>"; };
42E5C3922AC46A7700DA093D /* Carbon.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Carbon.framework; path = System/Library/Frameworks/Carbon.framework; sourceTree = SDKROOT; };
4B018B88211930DE002A3937 /* 65C02_extended_opcodes_test.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = 65C02_extended_opcodes_test.bin; path = "Klaus Dormann/65C02_extended_opcodes_test.bin"; sourceTree = "<group>"; };
4B01A6871F22F0DB001FD6E3 /* Z80MemptrTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Z80MemptrTests.swift; sourceTree = "<group>"; };
4B0333AD2094081A0050B93D /* AppleDSK.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = AppleDSK.cpp; sourceTree = "<group>"; };
@ -1475,6 +1483,7 @@
4B680CE323A555CA00451D43 /* 68000 Comparative Tests */ = {isa = PBXFileReference; lastKnownFileType = folder; path = "68000 Comparative Tests"; sourceTree = "<group>"; };
4B683B002727BE6F0043E541 /* Amiga Blitter Tests */ = {isa = PBXFileReference; lastKnownFileType = folder; path = "Amiga Blitter Tests"; sourceTree = "<group>"; };
4B698D1A1FE768A100696C91 /* SampleSource.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = SampleSource.hpp; sourceTree = "<group>"; };
4B69DEB52AB79E4F0055B217 /* Instruction.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = Instruction.cpp; sourceTree = "<group>"; };
4B69FB3B1C4D908A00B5F0AA /* Tape.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Tape.cpp; sourceTree = "<group>"; };
4B69FB3C1C4D908A00B5F0AA /* Tape.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = Tape.hpp; sourceTree = "<group>"; };
4B69FB421C4D941400B5F0AA /* TapeUEF.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TapeUEF.cpp; sourceTree = "<group>"; };
@ -2296,6 +2305,7 @@
buildActionMask = 2147483647;
files = (
4BB8617124E22F5700A00E03 /* Accelerate.framework in Frameworks */,
42E5C3932AC46A7700DA093D /* Carbon.framework in Frameworks */,
4B50AF80242817F40099BBD7 /* QuartzCore.framework in Frameworks */,
4B69FB461C4D950F00B5F0AA /* libz.tbd in Frameworks */,
);
@ -2385,6 +2395,7 @@
4B055A761FAE78210060FFFF /* Frameworks */ = {
isa = PBXGroup;
children = (
42E5C3922AC46A7700DA093D /* Carbon.framework */,
4BB8617024E22F4900A00E03 /* Accelerate.framework */,
4B50AF7F242817F40099BBD7 /* QuartzCore.framework */,
4B055AF01FAE9C080060FFFF /* OpenGL.framework */,
@ -4334,6 +4345,7 @@
4B90467222C6FA31000E2074 /* TestRunner68000.hpp */,
4BC62FF128A149300036AE59 /* NSData+dataWithContentsOfGZippedFile.m */,
4BDA7F8229C4EA28007A10A5 /* 6809OperationMapperTests.mm */,
423BDC492AB24699008E37B6 /* 8088Tests.mm */,
4B04C898285E3DC800AA8FD6 /* 65816ComparativeTests.mm */,
4B90467522C6FD6E000E2074 /* 68000ArithmeticTests.mm */,
4B9D0C4A22C7D70900DE1AD3 /* 68000BCDTests.mm */,
@ -4976,9 +4988,10 @@
isa = PBXGroup;
children = (
4BEDA3B925B25563000C2DBD /* Decoder.cpp */,
4B69DEB52AB79E4F0055B217 /* Instruction.cpp */,
4BE3C69327C793EF000EAD28 /* DataPointerResolver.hpp */,
4BEDA3B825B25563000C2DBD /* Decoder.hpp */,
4BEDA3DB25B2588F000C2DBD /* Instruction.hpp */,
4BE3C69327C793EF000EAD28 /* DataPointerResolver.hpp */,
4BE3C69527CBC540000EAD28 /* Model.hpp */,
);
path = x86;
@ -5838,6 +5851,7 @@
4B055A931FAE85B50060FFFF /* BinaryDump.cpp in Sources */,
4B89452D201967B4007DE474 /* Tape.cpp in Sources */,
4B055AD61FAE9B130060FFFF /* MemoryFuzzer.cpp in Sources */,
4B69DEB82AB79E4F0055B217 /* Instruction.cpp in Sources */,
4B055AC21FAE9AE30060FFFF /* KeyboardMachine.cpp in Sources */,
4B89453B201967B4007DE474 /* StaticAnalyser.cpp in Sources */,
4B055AEB1FAE9BA20060FFFF /* PartialMachineCycle.cpp in Sources */,
@ -5956,6 +5970,7 @@
4B2B3A4C1F9B8FA70062DABF /* MemoryFuzzer.cpp in Sources */,
4B9EC0EA26B384080060A31F /* Keyboard.cpp in Sources */,
4B7913CC1DFCD80E00175A82 /* Video.cpp in Sources */,
4B69DEB62AB79E4F0055B217 /* Instruction.cpp in Sources */,
4B7962A02819681F008130F9 /* Decoder.cpp in Sources */,
4BC57CD92436A62900FBC404 /* State.cpp in Sources */,
4BDA00E622E699B000AC3CD0 /* CSMachine.mm in Sources */,
@ -6156,6 +6171,7 @@
4B778F0E23A5EC4F0000D260 /* Tape.cpp in Sources */,
4B778F2D23A5EF190000D260 /* MFMDiskController.cpp in Sources */,
4B7752C228217F5C0073E2C5 /* Spectrum.cpp in Sources */,
423BDC4A2AB24699008E37B6 /* 8088Tests.mm in Sources */,
4B778F2723A5EEF60000D260 /* BinaryDump.cpp in Sources */,
4BFCA1241ECBDCB400AC40C1 /* AllRAMProcessor.cpp in Sources */,
4B778F5223A5F22F0000D260 /* StaticAnalyser.cpp in Sources */,
@ -6298,6 +6314,7 @@
4BDA7F8329C4EA28007A10A5 /* 6809OperationMapperTests.mm in Sources */,
4B778F5723A5F2BB0000D260 /* ZX8081.cpp in Sources */,
4B778F2F23A5F0B10000D260 /* ScanTarget.cpp in Sources */,
4B69DEB72AB79E4F0055B217 /* Instruction.cpp in Sources */,
4BE90FFD22D5864800FB464D /* MacintoshVideoTests.mm in Sources */,
4B4F478A25367EDC004245B8 /* 65816AddressingTests.swift in Sources */,
4B778F0B23A5EC150000D260 /* TapeUEF.cpp in Sources */,
@ -6313,6 +6330,7 @@
4B778F4323A5F1B00000D260 /* ImplicitSectors.cpp in Sources */,
4B7752B128217EA30073E2C5 /* StaticAnalyser.cpp in Sources */,
4B778F5123A5F2290000D260 /* StaticAnalyser.cpp in Sources */,
42437B332AC70833006DFED1 /* HDV.cpp in Sources */,
4B7752C028217F3D0073E2C5 /* Line.cpp in Sources */,
4B7C7A00282C3BCA002D6C0B /* 68000flamewingTests.mm in Sources */,
4B778F0223A5EBA40000D260 /* MFMSectorDump.cpp in Sources */,

View File

@ -0,0 +1,188 @@
//
// 8088Tests.m
// Clock SignalTests
//
// Created by Thomas Harte on 13/09/2023.
// Copyright © 2023 Thomas Harte. All rights reserved.
//
#import <XCTest/XCTest.h>
#include <array>
#include <cassert>
#include <iostream>
#include <sstream>
#include <fstream>
#include "NSData+dataWithContentsOfGZippedFile.h"
#include "../../../InstructionSets/x86/Decoder.hpp"
namespace {
// The tests themselves are not duplicated in this repository;
// provide their real path here.
constexpr char TestSuiteHome[] = "/Users/tharte/Projects/ProcessorTests/8088/v1";
}
@interface i8088Tests : XCTestCase
@end
@implementation i8088Tests
- (NSArray<NSString *> *)testFiles {
NSString *path = [NSString stringWithUTF8String:TestSuiteHome];
NSSet *allowList = [NSSet setWithArray:@[
]];
NSSet *ignoreList = nil;
NSArray<NSString *> *files = [[NSFileManager defaultManager] contentsOfDirectoryAtPath:path error:nil];
files = [files filteredArrayUsingPredicate:[NSPredicate predicateWithBlock:^BOOL(NSString* evaluatedObject, NSDictionary<NSString *,id> *) {
if(allowList.count && ![allowList containsObject:[evaluatedObject lastPathComponent]]) {
return NO;
}
if([ignoreList containsObject:[evaluatedObject lastPathComponent]]) {
return NO;
}
return [evaluatedObject hasSuffix:@"json.gz"];
}]];
NSMutableArray<NSString *> *fullPaths = [[NSMutableArray alloc] init];
for(NSString *file in files) {
[fullPaths addObject:[path stringByAppendingPathComponent:file]];
}
return [fullPaths sortedArrayUsingSelector:@selector(compare:)];
}
- (NSString *)toString:(const InstructionSet::x86::Instruction<false> &)instruction offsetLength:(int)offsetLength immediateLength:(int)immediateLength {
const auto operation = to_string(instruction, InstructionSet::x86::Model::i8086, offsetLength, immediateLength);
return [[NSString stringWithUTF8String:operation.c_str()] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceCharacterSet]];
}
- (bool)applyDecodingTest:(NSDictionary *)test file:(NSString *)file assert:(BOOL)assert {
using Decoder = InstructionSet::x86::Decoder<InstructionSet::x86::Model::i8086>;
Decoder decoder;
// Build a vector of the instruction bytes; this makes manual step debugging easier.
NSArray<NSNumber *> *encoding = test[@"bytes"];
std::vector<uint8_t> data;
data.reserve(encoding.count);
for(NSNumber *number in encoding) {
data.push_back([number intValue]);
}
auto hex_instruction = [&]() -> NSString * {
NSMutableString *hexInstruction = [[NSMutableString alloc] init];
for(uint8_t byte: data) {
[hexInstruction appendFormat:@"%02x ", byte];
}
return hexInstruction;
};
const auto decoded = decoder.decode(data.data(), data.size());
if(assert) {
XCTAssert(
decoded.first == [encoding count],
"Wrong length of instruction decoded for %@ — decoded %d rather than %lu from %@; file %@",
test[@"name"],
decoded.first,
(unsigned long)[encoding count],
hex_instruction(),
file
);
}
if(decoded.first != [encoding count]) {
return false;
}
// The decoder doesn't preserve the original offset length, which makes no functional difference but
// does affect the way that offsets are printed in the test set.
NSSet<NSString *> *decodings = [NSSet setWithObjects:
[self toString:decoded.second offsetLength:4 immediateLength:4],
[self toString:decoded.second offsetLength:2 immediateLength:4],
[self toString:decoded.second offsetLength:0 immediateLength:4],
[self toString:decoded.second offsetLength:4 immediateLength:2],
[self toString:decoded.second offsetLength:2 immediateLength:2],
[self toString:decoded.second offsetLength:0 immediateLength:2],
nil];
auto compare_decoding = [&](NSString *name) -> bool {
return [decodings containsObject:name];
};
bool isEqual = compare_decoding(test[@"name"]);
// Attempt clerical reconciliation:
//
// TEMPORARY HACK: the test set incorrectly states 'bp+si' whenever it means 'bp+di'.
// Though it also uses 'bp+si' correctly when it means 'bp+si'. Until fixed, take
// a pass on potential issues there.
//
// SEPARATELY: The test suite retains a distinction between SHL and SAL, which the decoder doesn't. So consider that
// a potential point of difference.
//
// Also, the decoder treats INT3 and INT 3 as the same thing. So allow for a meshing of those.
int adjustment = 7;
while(!isEqual && adjustment) {
NSString *alteredName = [test[@"name"] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceCharacterSet]];
if(adjustment & 4) {
alteredName = [alteredName stringByReplacingOccurrencesOfString:@"bp+si" withString:@"bp+di"];
}
if(adjustment & 2) {
alteredName = [alteredName stringByReplacingOccurrencesOfString:@"shl" withString:@"sal"];
}
if(adjustment & 1) {
alteredName = [alteredName stringByReplacingOccurrencesOfString:@"int3" withString:@"int 03h"];
}
isEqual = compare_decoding(alteredName);
--adjustment;
}
if(assert) {
XCTAssert(
isEqual,
"%@ doesn't match %@ or similar, was %@ within %@",
test[@"name"],
[decodings anyObject],
hex_instruction(),
file
);
}
return isEqual;
}
- (void)testDecoding {
NSMutableSet<NSString *> *failures = [[NSMutableSet alloc] init];
NSArray<NSString *> *testFiles = [self testFiles];
for(NSString *file in testFiles) {
NSData *data = [NSData dataWithContentsOfGZippedFile:file];
NSArray<NSDictionary *> *testsInFile = [NSJSONSerialization JSONObjectWithData:data options:0 error:nil];
NSUInteger successes = 0;
for(NSDictionary *test in testsInFile) {
// A single failure per instruction is fine.
if(![self applyDecodingTest:test file:file assert:YES]) {
[failures addObject:file];
// Attempt a second decoding, to provide a debugger hook.
[self applyDecodingTest:test file:file assert:NO];
break;
}
++successes;
}
if(successes != [testsInFile count]) {
NSLog(@"Failed after %ld successes", successes);
}
}
NSLog(@"%ld failures out of %ld tests: %@", failures.count, testFiles.count, [[failures allObjects] sortedArrayUsingSelector:@selector(caseInsensitiveCompare:)]);
}
@end

View File

@ -533,11 +533,11 @@ decode(const std::initializer_list<uint8_t> &stream, bool set_32_bit = false) {
// cmp ecx,DWORD PTR [ebp+0x2c87445f]
// jecxz 0x00000084 (from 0x82)
// sahf
// je 0x000000f3 (from 0x85)
// jz 0x000000f3 (from 0x85)
test(instructions[52], DataSize::DWord, Operation::CMP, ScaleIndexBase(Source::eBP), Source::eCX, 0, 0x2c87445f);
test(instructions[53], Operation::JPCX, 0, 0x02);
test(instructions[53], Operation::JCXZ, 0, 0x02);
test(instructions[54], Operation::SAHF);
test(instructions[55], Operation::JE, 0, 0x6e);
test(instructions[55], Operation::JZ, 0, 0x6e);
// sbb ecx,DWORD PTR [edi+0x433c54d]
// lahf
@ -555,7 +555,7 @@ decode(const std::initializer_list<uint8_t> &stream, bool set_32_bit = false) {
test_far(instructions[60], Operation::CALLfar, 0xe21b, 0x97d0f58a);
test(instructions[61], Operation::PUSHA);
test(instructions[62], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eAX, 0xcf);
test(instructions[63], Operation::JPCX, 0, 0xd4 - 0x9d);
test(instructions[63], Operation::JCXZ, 0, 0xd4 - 0x9d);
}
- (void)testSourceModRegRM1 {