diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp index d24d188a3..a8c3f92d4 100644 --- a/InstructionSets/x86/Decoder.cpp +++ b/InstructionSets/x86/Decoder.cpp @@ -31,11 +31,16 @@ std::pair::InstructionT> Decoder::decode(con // MARK: - Prefixes (if present) and the opcode. +/// Sets the operation and verifies that the current repetition, if any, is compatible, discarding it otherwise. +#define SetOperation(op) \ + operation_ = op; \ + repetition_ = supports(op, repetition_) ? repetition_ : Repetition::None + /// Helper macro for those that follow. #define SetOpSrcDestSize(op, src, dest, size) \ - operation_ = Operation::op; \ - source_ = Source::src; \ - destination_ = Source::dest; \ + SetOperation(Operation::op); \ + source_ = Source::src; \ + destination_ = Source::dest; \ operation_size_ = size /// Covers anything which is complete as soon as the opcode is encountered. @@ -53,19 +58,19 @@ std::pair::InstructionT> Decoder::decode(con /// Handles instructions of the form Ax, jjkk where the latter is implicitly an address. #define RegAddr(op, dest, op_size, addr_size) \ SetOpSrcDestSize(op, DirectAddress, dest, op_size); \ - operand_size_ = addr_size; \ + displacement_size_ = addr_size; \ phase_ = Phase::DisplacementOrOperand /// Handles instructions of the form jjkk, Ax where the former is implicitly an address. #define AddrReg(op, source, op_size, addr_size) \ SetOpSrcDestSize(op, source, DirectAddress, op_size); \ - operand_size_ = addr_size; \ + displacement_size_ = addr_size; \ destination_ = Source::DirectAddress; \ phase_ = Phase::DisplacementOrOperand /// Covers both `mem/reg, reg` and `reg, mem/reg`. #define MemRegReg(op, format, size) \ - operation_ = Operation::op; \ + SetOperation(Operation::op); \ phase_ = Phase::ModRegRM; \ modregrm_format_ = ModRegRMFormat::format; \ operand_size_ = DataSize::None; \ @@ -73,27 +78,28 @@ std::pair::InstructionT> Decoder::decode(con /// Handles JO, JNO, JB, etc — anything with only a displacement. #define Displacement(op, size) \ - operation_ = Operation::op; \ + SetOperation(Operation::op); \ phase_ = Phase::DisplacementOrOperand; \ displacement_size_ = size /// Handles PUSH [immediate], etc — anything with only an immediate operand. #define Immediate(op, size) \ - operation_ = Operation::op; \ + SetOperation(Operation::op); \ source_ = Source::Immediate; \ phase_ = Phase::DisplacementOrOperand; \ operand_size_ = size /// Handles far CALL and far JMP — fixed four or six byte operand operations. #define Far(op) \ - operation_ = Operation::op; \ + SetOperation(Operation::op); \ phase_ = Phase::DisplacementOrOperand; \ operand_size_ = DataSize::Word; \ + destination_ = Source::Immediate; \ displacement_size_ = data_size(default_address_size_) /// Handles ENTER — a fixed three-byte operation. #define Displacement16Operand8(op) \ - operation_ = Operation::op; \ + SetOperation(Operation::op); \ phase_ = Phase::DisplacementOrOperand; \ displacement_size_ = DataSize::Word; \ operand_size_ = DataSize::Byte @@ -133,7 +139,7 @@ std::pair::InstructionT> Decoder::decode(con PartialBlock(0x00, ADD); break; case 0x06: Complete(PUSH, ES, None, data_size_); break; - case 0x07: Complete(POP, None, ES, data_size_); break; + case 0x07: Complete(POP, ES, None, data_size_); break; PartialBlock(0x08, OR); break; case 0x0e: Complete(PUSH, CS, None, data_size_); break; @@ -141,17 +147,20 @@ std::pair::InstructionT> Decoder::decode(con // The 286 onwards have a further set of instructions // prefixed with $0f. case 0x0f: - RequiresMin(i80286); - phase_ = Phase::InstructionPageF; + if constexpr (model < Model::i80286) { + Complete(POP, CS, None, data_size_); + } else { + phase_ = Phase::InstructionPageF; + } break; PartialBlock(0x10, ADC); break; case 0x16: Complete(PUSH, SS, None, DataSize::Word); break; - case 0x17: Complete(POP, None, SS, DataSize::Word); break; + case 0x17: Complete(POP, SS, None, DataSize::Word); break; PartialBlock(0x18, SBB); break; case 0x1e: Complete(PUSH, DS, None, DataSize::Word); break; - case 0x1f: Complete(POP, None, DS, DataSize::Word); break; + case 0x1f: Complete(POP, DS, None, DataSize::Word); break; PartialBlock(0x20, AND); break; case 0x26: segment_override_ = Source::ES; break; @@ -189,80 +198,132 @@ std::pair::InstructionT> Decoder::decode(con #undef RegisterBlock case 0x60: - RequiresMin(i80186); - Complete(PUSHA, None, None, data_size_); + if constexpr (model < Model::i80186) { + Displacement(JO, DataSize::Byte); + } else { + Complete(PUSHA, None, None, data_size_); + } break; case 0x61: - RequiresMin(i80186); - Complete(POPA, None, None, data_size_); + if constexpr (model < Model::i80186) { + Displacement(JNO, DataSize::Byte); + } else { + Complete(POPA, None, None, data_size_); + } break; case 0x62: - RequiresMin(i80186); - MemRegReg(BOUND, Reg_MemReg, data_size_); + if constexpr (model < Model::i80186) { + Displacement(JB, DataSize::Byte); + } else { + MemRegReg(BOUND, Reg_MemReg, data_size_); + } break; case 0x63: - RequiresMin(i80286); - MemRegReg(ARPL, MemReg_Reg, DataSize::Word); + if constexpr (model < Model::i80286) { + Displacement(JNB, DataSize::Byte); + } else { + MemRegReg(ARPL, MemReg_Reg, DataSize::Word); + } break; case 0x64: - RequiresMin(i80386); - segment_override_ = Source::FS; + if constexpr (model < Model::i80386) { + Displacement(JZ, DataSize::Byte); + } else { + RequiresMin(i80386); + segment_override_ = Source::FS; + } break; case 0x65: + if constexpr (model < Model::i80286) { + Displacement(JNZ, DataSize::Byte); + break; + } RequiresMin(i80386); segment_override_ = Source::GS; break; case 0x66: + if constexpr (model < Model::i80286) { + Displacement(JBE, DataSize::Byte); + break; + } RequiresMin(i80386); data_size_ = DataSize(int(default_data_size_) ^ int(DataSize::Word) ^ int(DataSize::DWord)); break; case 0x67: + if constexpr (model < Model::i80286) { + Displacement(JNBE, DataSize::Byte); + break; + } RequiresMin(i80386); address_size_ = AddressSize(int(default_address_size_) ^ int(AddressSize::b16) ^ int(AddressSize::b32)); break; case 0x68: - RequiresMin(i80286); - Immediate(PUSH, data_size_); - operation_size_ = data_size_; + if constexpr (model < Model::i80286) { + Displacement(JS, DataSize::Byte); + } else { + Immediate(PUSH, data_size_); + operation_size_ = data_size_; + } break; case 0x69: - RequiresMin(i80286); - MemRegReg(IMUL_3, Reg_MemReg, data_size_); - operand_size_ = data_size_; + if constexpr (model < Model::i80286) { + Displacement(JNS, DataSize::Byte); + } else { + MemRegReg(IMUL_3, Reg_MemReg, data_size_); + operand_size_ = data_size_; + } break; case 0x6a: - RequiresMin(i80286); - Immediate(PUSH, DataSize::Byte); + if constexpr (model < Model::i80286) { + Displacement(JP, DataSize::Byte); + } else { + Immediate(PUSH, DataSize::Byte); + } break; case 0x6b: - RequiresMin(i80286); - MemRegReg(IMUL_3, Reg_MemReg, data_size_); - operand_size_ = DataSize::Byte; - sign_extend_ = true; + if constexpr (model < Model::i80286) { + Displacement(JNP, DataSize::Byte); + } else { + MemRegReg(IMUL_3, Reg_MemReg, data_size_); + operand_size_ = DataSize::Byte; + sign_extend_operand_ = true; + } break; case 0x6c: // INSB - RequiresMin(i80186); - Complete(INS, None, None, DataSize::Byte); + if constexpr (model < Model::i80186) { + Displacement(JL, DataSize::Byte); + } else { + Complete(INS, None, None, DataSize::Byte); + } break; case 0x6d: // INSW/INSD - RequiresMin(i80186); - Complete(INS, None, None, data_size_); + if constexpr (model < Model::i80186) { + Displacement(JNL, DataSize::Byte); + } else { + Complete(INS, None, None, data_size_); + } break; case 0x6e: // OUTSB - RequiresMin(i80186); - Complete(OUTS, None, None, DataSize::Byte); + if constexpr (model < Model::i80186) { + Displacement(JLE, DataSize::Byte); + } else { + Complete(OUTS, None, None, DataSize::Byte); + } break; case 0x6f: // OUTSW/OUSD - RequiresMin(i80186); - Complete(OUTS, None, None, data_size_); + if constexpr (model < Model::i80186) { + Displacement(JNLE, DataSize::Byte); + } else { + Complete(OUTS, None, None, data_size_); + } break; case 0x70: Displacement(JO, DataSize::Byte); break; case 0x71: Displacement(JNO, DataSize::Byte); break; case 0x72: Displacement(JB, DataSize::Byte); break; case 0x73: Displacement(JNB, DataSize::Byte); break; - case 0x74: Displacement(JE, DataSize::Byte); break; - case 0x75: Displacement(JNE, DataSize::Byte); break; + case 0x74: Displacement(JZ, DataSize::Byte); break; + case 0x75: Displacement(JNZ, DataSize::Byte); break; case 0x76: Displacement(JBE, DataSize::Byte); break; case 0x77: Displacement(JNBE, DataSize::Byte); break; case 0x78: Displacement(JS, DataSize::Byte); break; @@ -345,11 +406,23 @@ std::pair::InstructionT> Decoder::decode(con case 0xbe: RegData(MOV, eSI, data_size_); break; case 0xbf: RegData(MOV, eDI, data_size_); break; - case 0xc0: case 0xc1: - RequiresMin(i80186); - ShiftGroup(); - source_ = Source::Immediate; - operand_size_ = DataSize::Byte; + case 0xc0: + if constexpr (model >= Model::i80186) { + ShiftGroup(); + source_ = Source::Immediate; + operand_size_ = DataSize::Byte; + } else { + RegData(RETnear, None, data_size_); + } + break; + case 0xc1: + if constexpr (model >= Model::i80186) { + ShiftGroup(); + source_ = Source::Immediate; + operand_size_ = data_size_; + } else { + Complete(RETnear, None, None, DataSize::None); + } break; case 0xc2: RegData(RETnear, None, data_size_); break; case 0xc3: Complete(RETnear, None, None, DataSize::None); break; @@ -359,12 +432,18 @@ std::pair::InstructionT> Decoder::decode(con case 0xc7: MemRegReg(MOV, MemRegMOV, data_size_); break; case 0xc8: - RequiresMin(i80186); - Displacement16Operand8(ENTER); + if constexpr (model >= Model::i80186) { + Displacement16Operand8(ENTER); + } else { + RegData(RETfar, None, data_size_); + } break; case 0xc9: - RequiresMin(i80186); - Complete(LEAVE, None, None, DataSize::None); + if constexpr (model >= Model::i80186) { + Complete(LEAVE, None, None, DataSize::None); + } else { + Complete(RETfar, None, None, DataSize::DWord); + } break; case 0xca: RegData(RETfar, None, data_size_); break; @@ -382,8 +461,6 @@ std::pair::InstructionT> Decoder::decode(con case 0xd0: case 0xd1: ShiftGroup(); - source_ = Source::Immediate; - operand_ = 1; break; case 0xd2: case 0xd3: ShiftGroup(); @@ -391,32 +468,32 @@ std::pair::InstructionT> Decoder::decode(con break; case 0xd4: RegData(AAM, eAX, DataSize::Byte); break; case 0xd5: RegData(AAD, eAX, DataSize::Byte); break; - // Unused: 0xd6. + case 0xd6: Complete(SALC, None, None, DataSize::Byte); break; case 0xd7: Complete(XLAT, None, None, DataSize::Byte); break; - case 0xd8: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; - case 0xd9: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; - case 0xda: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; - case 0xdb: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; - case 0xdc: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; - case 0xdd: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; - case 0xde: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; - case 0xdf: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; + case 0xd8: MemRegReg(ESC, Reg_MemReg, data_size_); break; + case 0xd9: MemRegReg(ESC, Reg_MemReg, data_size_); break; + case 0xda: MemRegReg(ESC, Reg_MemReg, data_size_); break; + case 0xdb: MemRegReg(ESC, Reg_MemReg, data_size_); break; + case 0xdc: MemRegReg(ESC, Reg_MemReg, data_size_); break; + case 0xdd: MemRegReg(ESC, Reg_MemReg, data_size_); break; + case 0xde: MemRegReg(ESC, Reg_MemReg, data_size_); break; + case 0xdf: MemRegReg(ESC, Reg_MemReg, data_size_); break; case 0xe0: Displacement(LOOPNE, DataSize::Byte); break; case 0xe1: Displacement(LOOPE, DataSize::Byte); break; case 0xe2: Displacement(LOOP, DataSize::Byte); break; - case 0xe3: Displacement(JPCX, DataSize::Byte); break; + case 0xe3: Displacement(JCXZ, DataSize::Byte); break; case 0xe4: RegAddr(IN, eAX, DataSize::Byte, DataSize::Byte); break; case 0xe5: RegAddr(IN, eAX, data_size_, DataSize::Byte); break; case 0xe6: AddrReg(OUT, eAX, DataSize::Byte, DataSize::Byte); break; case 0xe7: AddrReg(OUT, eAX, data_size_, DataSize::Byte); break; - case 0xe8: Displacement(CALLrel, data_size_); break; - case 0xe9: Displacement(JMPrel, data_size_); break; - case 0xea: Far(JMPfar); break; - case 0xeb: Displacement(JMPrel, DataSize::Byte); break; + case 0xe8: Displacement(CALLrel, data_size(address_size_)); break; + case 0xe9: Displacement(JMPrel, data_size(address_size_)); break; + case 0xea: Far(JMPfar); break; + case 0xeb: Displacement(JMPrel, DataSize::Byte); break; case 0xec: Complete(IN, eDX, eAX, DataSize::Byte); break; case 0xed: Complete(IN, eDX, eAX, data_size_); break; @@ -497,8 +574,8 @@ std::pair::InstructionT> Decoder::decode(con case 0x71: RequiresMin(i80386); Displacement(JNO, data_size_); break; case 0x72: RequiresMin(i80386); Displacement(JB, data_size_); break; case 0x73: RequiresMin(i80386); Displacement(JNB, data_size_); break; - case 0x74: RequiresMin(i80386); Displacement(JE, data_size_); break; - case 0x75: RequiresMin(i80386); Displacement(JNE, data_size_); break; + case 0x74: RequiresMin(i80386); Displacement(JZ, data_size_); break; + case 0x75: RequiresMin(i80386); Displacement(JNZ, data_size_); break; case 0x76: RequiresMin(i80386); Displacement(JBE, data_size_); break; case 0x77: RequiresMin(i80386); Displacement(JNBE, data_size_); break; case 0x78: RequiresMin(i80386); Displacement(JS, data_size_); break; @@ -640,6 +717,10 @@ std::pair::InstructionT> Decoder::decode(con operation_ == Operation::LFS) { undefined(); } + } else if(rm == 6 && mod == 0) { + // There's no BP direct; BP with ostensibly no offset means 'direct address' mode. + displacement_size_ = data_size(address_size_); + memreg = Source::DirectAddress; } else { const DataSize sizes[] = { DataSize::None, @@ -647,29 +728,33 @@ std::pair::InstructionT> Decoder::decode(con data_size(address_size_) }; displacement_size_ = sizes[mod]; - memreg = Source::Indirect; - if(address_size_ == AddressSize::b32) { + if(is_32bit(model) && address_size_ == AddressSize::b32) { // 32-bit decoding: the range of potential indirections is expanded, // and may segue into obtaining a SIB. sib_ = ScaleIndexBase(0, Source::None, reg_table[rm]); expects_sib = rm == 4; // Indirect via eSP isn't directly supported; it's the // escape indicator for reading a SIB. + memreg = Source::Indirect; } else { // Classic 16-bit decoding: mode picks a displacement size, // and a few fixed index+base pairs are defined. + // + // A base of eAX is meaningless, with the source type being the indicator + // that it should be ignored. ScaleIndexBase can't store a base of Source::None. constexpr ScaleIndexBase rm_table[8] = { - ScaleIndexBase(0, Source::eBX, Source::eSI), - ScaleIndexBase(0, Source::eBX, Source::eDI), - ScaleIndexBase(0, Source::eBP, Source::eSI), - ScaleIndexBase(0, Source::eBP, Source::eDI), - ScaleIndexBase(0, Source::None, Source::eSI), - ScaleIndexBase(0, Source::None, Source::eDI), + ScaleIndexBase(0, Source::eSI, Source::eBX), + ScaleIndexBase(0, Source::eDI, Source::eBX), + ScaleIndexBase(0, Source::eSI, Source::eBP), + ScaleIndexBase(0, Source::eDI, Source::eBP), + ScaleIndexBase(0, Source::eSI, Source::eAX), + ScaleIndexBase(0, Source::eDI, Source::eAX), ScaleIndexBase(0, Source::None, Source::eBP), - ScaleIndexBase(0, Source::None, Source::eBX), + ScaleIndexBase(0, Source::eBX, Source::eAX), }; sib_ = rm_table[rm]; + memreg = (rm >= 4 && rm != 6) ? Source::IndirectNoBase : Source::Indirect; } } @@ -686,58 +771,86 @@ std::pair::InstructionT> Decoder::decode(con } break; case ModRegRMFormat::MemRegTEST_to_IDIV: - source_ = destination_ = memreg; + source_ = memreg; switch(reg) { - default: undefined(); + default: + // case 1 is treated as another form of TEST on the 8086. + // (and, I guess, the 80186?) + if constexpr (model >= Model::i80286) { + undefined(); + } + [[fallthrough]]; - case 0: operation_ = Operation::TEST; break; - case 2: operation_ = Operation::NOT; break; - case 3: operation_ = Operation::NEG; break; - case 4: operation_ = Operation::MUL; break; - case 5: operation_ = Operation::IMUL_1; break; - case 6: operation_ = Operation::DIV; break; - case 7: operation_ = Operation::IDIV; break; + case 0: + destination_ = memreg; + source_ = Source::Immediate; + operand_size_ = operation_size_; + SetOperation(Operation::TEST); + break; + case 2: SetOperation(Operation::NOT); break; + case 3: SetOperation(Operation::NEG); break; + case 4: SetOperation(Operation::MUL); break; + case 5: SetOperation(Operation::IMUL_1); break; + case 6: SetOperation(Operation::DIV); break; + case 7: SetOperation(Operation::IDIV); break; } break; case ModRegRMFormat::Seg_MemReg: - case ModRegRMFormat::MemReg_Seg: + case ModRegRMFormat::MemReg_Seg: { + // On the 8086, only two bits of reg are used. + const int masked_reg = model >= Model::i80286 ? reg : reg & 3; + // The 16-bit chips have four segment registers; // the 80386 onwards has six. - if(!is_32bit(model) && reg > 3) { - undefined(); - } else if(reg > 5) { - undefined(); + if constexpr (is_32bit(model)) { + if(masked_reg > 5) { + undefined(); + } + } else { + if(masked_reg > 3) { + undefined(); + } } if(modregrm_format_ == ModRegRMFormat::Seg_MemReg) { source_ = memreg; - destination_ = seg_table[reg]; + destination_ = seg_table[masked_reg]; // 80286 and later disallow MOV to CS. if(model >= Model::i80286 && destination_ == Source::CS) { undefined(); } } else { - source_ = seg_table[reg]; + source_ = seg_table[masked_reg]; destination_ = memreg; } - break; + } break; case ModRegRMFormat::MemRegROL_to_SAR: destination_ = memreg; switch(reg) { - default: undefined(); + default: + if constexpr (model == Model::i8086) { + if(source_ == Source::eCX) { + SetOperation(Operation::SETMOC); + } else { + SetOperation(Operation::SETMO); + } + } else { + undefined(); + } + break; - case 0: operation_ = Operation::ROL; break; - case 1: operation_ = Operation::ROR; break; - case 2: operation_ = Operation::RCL; break; - case 3: operation_ = Operation::RCR; break; - case 4: operation_ = Operation::SAL; break; - case 5: operation_ = Operation::SHR; break; - case 7: operation_ = Operation::SAR; break; + case 0: SetOperation(Operation::ROL); break; + case 1: SetOperation(Operation::ROR); break; + case 2: SetOperation(Operation::RCL); break; + case 3: SetOperation(Operation::RCR); break; + case 4: SetOperation(Operation::SAL); break; + case 5: SetOperation(Operation::SHR); break; + case 7: SetOperation(Operation::SAR); break; } break; @@ -747,8 +860,8 @@ std::pair::InstructionT> Decoder::decode(con switch(reg) { default: undefined(); - case 0: operation_ = Operation::INC; break; - case 1: operation_ = Operation::DEC; break; + case 0: SetOperation(Operation::INC); break; + case 1: SetOperation(Operation::DEC); break; } break; @@ -756,16 +869,23 @@ std::pair::InstructionT> Decoder::decode(con source_ = destination_ = memreg; switch(reg) { - default: undefined(); + default: + // case 7 is treated as another form of PUSH on the 8086. + // (and, I guess, the 80186?) + if constexpr (model >= Model::i80286) { + undefined(); + } + [[fallthrough]]; + case 6: SetOperation(Operation::PUSH); break; - case 0: operation_ = Operation::INC; break; - case 1: operation_ = Operation::DEC; break; - case 2: operation_ = Operation::CALLabs; break; - case 3: operation_ = Operation::CALLfar; break; - case 4: operation_ = Operation::JMPabs; break; - case 5: operation_ = Operation::JMPfar; break; - case 6: operation_ = Operation::PUSH; break; + case 0: SetOperation(Operation::INC); break; + case 1: SetOperation(Operation::DEC); break; + case 2: SetOperation(Operation::CALLabs); break; + case 3: SetOperation(Operation::CALLfar); break; + case 4: SetOperation(Operation::JMPabs); break; + case 5: SetOperation(Operation::JMPfar); break; } + // TODO: CALLfar and JMPfar aren't correct above; find out what is. break; case ModRegRMFormat::MemRegSingleOperand: @@ -787,17 +907,17 @@ std::pair::InstructionT> Decoder::decode(con source_ = Source::Immediate; destination_ = memreg; operand_size_ = (modregrm_format_ == ModRegRMFormat::MemRegADD_to_CMP_SignExtend) ? DataSize::Byte : operation_size_; - sign_extend_ = true; // Will be effective only if modregrm_format_ == ModRegRMFormat::MemRegADD_to_CMP_SignExtend. + sign_extend_operand_ = true; // Will be effective only if modregrm_format_ == ModRegRMFormat::MemRegADD_to_CMP_SignExtend. switch(reg) { - default: operation_ = Operation::ADD; break; - case 1: operation_ = Operation::OR; break; - case 2: operation_ = Operation::ADC; break; - case 3: operation_ = Operation::SBB; break; - case 4: operation_ = Operation::AND; break; - case 5: operation_ = Operation::SUB; break; - case 6: operation_ = Operation::XOR; break; - case 7: operation_ = Operation::CMP; break; + default: SetOperation(Operation::ADD); break; + case 1: SetOperation(Operation::OR); break; + case 2: SetOperation(Operation::ADC); break; + case 3: SetOperation(Operation::SBB); break; + case 4: SetOperation(Operation::AND); break; + case 5: SetOperation(Operation::SUB); break; + case 6: SetOperation(Operation::XOR); break; + case 7: SetOperation(Operation::CMP); break; } break; @@ -807,12 +927,12 @@ std::pair::InstructionT> Decoder::decode(con switch(reg) { default: undefined(); - case 0: operation_ = Operation::SLDT; break; - case 1: operation_ = Operation::STR; break; - case 2: operation_ = Operation::LLDT; break; - case 3: operation_ = Operation::LTR; break; - case 4: operation_ = Operation::VERR; break; - case 5: operation_ = Operation::VERW; break; + case 0: SetOperation(Operation::SLDT); break; + case 1: SetOperation(Operation::STR); break; + case 2: SetOperation(Operation::LLDT); break; + case 3: SetOperation(Operation::LTR); break; + case 4: SetOperation(Operation::VERR); break; + case 5: SetOperation(Operation::VERW); break; } break; @@ -822,12 +942,12 @@ std::pair::InstructionT> Decoder::decode(con switch(reg) { default: undefined(); - case 0: operation_ = Operation::SGDT; break; - case 1: operation_ = Operation::SIDT; break; - case 2: operation_ = Operation::LGDT; break; - case 3: operation_ = Operation::LIDT; break; - case 4: operation_ = Operation::SMSW; break; - case 6: operation_ = Operation::LMSW; break; + case 0: SetOperation(Operation::SGDT); break; + case 1: SetOperation(Operation::SIDT); break; + case 2: SetOperation(Operation::LGDT); break; + case 3: SetOperation(Operation::LIDT); break; + case 4: SetOperation(Operation::SMSW); break; + case 6: SetOperation(Operation::LMSW); break; } break; @@ -839,10 +959,10 @@ std::pair::InstructionT> Decoder::decode(con switch(reg) { default: undefined(); - case 4: operation_ = Operation::BT; break; - case 5: operation_ = Operation::BTS; break; - case 6: operation_ = Operation::BTR; break; - case 7: operation_ = Operation::BTC; break; + case 4: SetOperation(Operation::BT); break; + case 5: SetOperation(Operation::BTS); break; + case 6: SetOperation(Operation::BTR); break; + case 7: SetOperation(Operation::BTC); break; } break; @@ -857,6 +977,7 @@ std::pair::InstructionT> Decoder::decode(con } #undef undefined +#undef SetOperation // MARK: - ScaleIndexBase @@ -894,16 +1015,28 @@ std::pair::InstructionT> Decoder::decode(con if(bytes_to_consume == outstanding_bytes) { phase_ = Phase::ReadyToPost; - switch(displacement_size_) { - case DataSize::None: displacement_ = 0; break; - case DataSize::Byte: displacement_ = int8_t(inward_data_); break; - case DataSize::Word: displacement_ = int16_t(inward_data_); break; - case DataSize::DWord: displacement_ = int32_t(inward_data_); break; + // TODO: whether the displacement is signed appears to depend on the opcode. + // Find an appropriate table. + + if(!sign_extend_displacement_) { + switch(displacement_size_) { + case DataSize::None: displacement_ = 0; break; + case DataSize::Byte: displacement_ = decltype(displacement_)(uint8_t(inward_data_)); break; + case DataSize::Word: displacement_ = decltype(displacement_)(uint16_t(inward_data_)); break; + case DataSize::DWord: displacement_ = decltype(displacement_)(uint32_t(inward_data_)); break; + } + } else { + switch(displacement_size_) { + case DataSize::None: displacement_ = 0; break; + case DataSize::Byte: displacement_ = int8_t(inward_data_); break; + case DataSize::Word: displacement_ = int16_t(inward_data_); break; + case DataSize::DWord: displacement_ = int32_t(inward_data_); break; + } } inward_data_ >>= bit_size(displacement_size_); // Use inequality of sizes as a test for necessary sign extension. - if(operand_size_ == data_size_ || !sign_extend_) { + if(operand_size_ == data_size_ || !sign_extend_operand_) { operand_ = decltype(operand_)(inward_data_); } else { switch(operand_size_) { diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp index cdc5299bf..354d4accd 100644 --- a/InstructionSets/x86/Decoder.hpp +++ b/InstructionSets/x86/Decoder.hpp @@ -193,8 +193,10 @@ template class Decoder { DataSize operand_size_ = DataSize::None; // i.e. size of in-stream operand, if any. DataSize operation_size_ = DataSize::None; // i.e. size of data manipulated by the operation. - bool sign_extend_ = false; // If set then sign extend the operand up to the operation size; + bool sign_extend_operand_ = false; // If set then sign extend the operand up to the operation size; // otherwise it'll be zero-padded. + bool sign_extend_displacement_ = false; // Much as above; 'displacement' is used internally for both + // displacements and offsets, so signage will vary. // Prefix capture fields. Repetition repetition_ = Repetition::None; @@ -222,7 +224,8 @@ template class Decoder { sib_ = ScaleIndexBase(); next_inward_data_shift_ = 0; inward_data_ = 0; - sign_extend_ = false; + sign_extend_operand_ = false; + sign_extend_displacement_ = false; } }; diff --git a/InstructionSets/x86/Instruction.cpp b/InstructionSets/x86/Instruction.cpp new file mode 100644 index 000000000..c247a4f11 --- /dev/null +++ b/InstructionSets/x86/Instruction.cpp @@ -0,0 +1,567 @@ +// +// Instruction.cpp +// Clock Signal +// +// Created by Thomas Harte on 17/09/2023. +// Copyright © 2023 Thomas Harte. All rights reserved. +// + +#include "Instruction.hpp" + +#include +#include +#include + +using namespace InstructionSet::x86; + +bool InstructionSet::x86::has_displacement(Operation operation) { + switch(operation) { + default: return false; + + case Operation::JO: case Operation::JNO: + case Operation::JB: case Operation::JNB: + case Operation::JZ: case Operation::JNZ: + case Operation::JBE: case Operation::JNBE: + case Operation::JS: case Operation::JNS: + case Operation::JP: case Operation::JNP: + case Operation::JL: case Operation::JNL: + case Operation::JLE: case Operation::JNLE: + case Operation::LOOPNE: case Operation::LOOPE: + case Operation::LOOP: case Operation::JCXZ: + case Operation::CALLrel: case Operation::JMPrel: + return true; + } +} + +int InstructionSet::x86::max_displayed_operands(Operation operation) { + switch(operation) { + default: return 2; + + case Operation::INC: case Operation::DEC: + case Operation::POP: case Operation::PUSH: + case Operation::MUL: case Operation::IMUL_1: + case Operation::IDIV: case Operation::DIV: + case Operation::ESC: + case Operation::AAM: case Operation::AAD: + case Operation::INT: + case Operation::JMPabs: case Operation::JMPfar: + case Operation::CALLabs: case Operation::CALLfar: + case Operation::NEG: case Operation::NOT: + case Operation::RETnear: + case Operation::RETfar: + return 1; + + // Pedantically, these have an displacement rather than an operand. + case Operation::JO: case Operation::JNO: + case Operation::JB: case Operation::JNB: + case Operation::JZ: case Operation::JNZ: + case Operation::JBE: case Operation::JNBE: + case Operation::JS: case Operation::JNS: + case Operation::JP: case Operation::JNP: + case Operation::JL: case Operation::JNL: + case Operation::JLE: case Operation::JNLE: + case Operation::LOOPNE: case Operation::LOOPE: + case Operation::LOOP: case Operation::JCXZ: + case Operation::CALLrel: case Operation::JMPrel: + // Genuine zero-operand instructions: + case Operation::CMPS: case Operation::LODS: + case Operation::MOVS: case Operation::SCAS: + case Operation::STOS: + case Operation::CLC: case Operation::CLD: + case Operation::CLI: + case Operation::STC: case Operation::STD: + case Operation::STI: + case Operation::CMC: + case Operation::LAHF: case Operation::SAHF: + case Operation::AAA: case Operation::AAS: + case Operation::DAA: case Operation::DAS: + case Operation::CBW: case Operation::CWD: + case Operation::INTO: + case Operation::PUSHF: case Operation::POPF: + case Operation::IRET: + case Operation::NOP: + case Operation::XLAT: + case Operation::SALC: + case Operation::Invalid: + return 0; + } +} + +std::string InstructionSet::x86::to_string(Operation operation, DataSize size, Model model) { + switch(operation) { + case Operation::AAA: return "aaa"; + case Operation::AAD: return "aad"; + case Operation::AAM: return "aam"; + case Operation::AAS: return "aas"; + case Operation::DAA: return "daa"; + case Operation::DAS: return "das"; + + case Operation::CBW: return "cbw"; + case Operation::CWD: return "cwd"; + case Operation::ESC: return "esc"; + + case Operation::HLT: return "hlt"; + case Operation::WAIT: return "wait"; + + case Operation::ADC: return "adc"; + case Operation::ADD: return "add"; + case Operation::SBB: return "sbb"; + case Operation::SUB: return "sub"; + case Operation::MUL: return "mul"; + case Operation::IMUL_1: return "imul"; + case Operation::DIV: return "div"; + case Operation::IDIV: return "idiv"; + + case Operation::INC: return "inc"; + case Operation::DEC: return "dec"; + + case Operation::IN: return "in"; + case Operation::OUT: return "out"; + + case Operation::JO: return "jo"; + case Operation::JNO: return "jno"; + case Operation::JB: return "jb"; + case Operation::JNB: return "jnb"; + case Operation::JZ: return "jz"; + case Operation::JNZ: return "jnz"; + case Operation::JBE: return "jbe"; + case Operation::JNBE: return "jnbe"; + case Operation::JS: return "js"; + case Operation::JNS: return "jns"; + case Operation::JP: return "jp"; + case Operation::JNP: return "jnp"; + case Operation::JL: return "jl"; + case Operation::JNL: return "jnl"; + case Operation::JLE: return "jle"; + case Operation::JNLE: return "jnle"; + + case Operation::CALLabs: return "call"; + case Operation::CALLrel: return "call"; + case Operation::CALLfar: return "callf"; + case Operation::IRET: return "iret"; + case Operation::RETfar: return "retf"; + case Operation::RETnear: return "retn"; + case Operation::JMPabs: return "jmp"; + case Operation::JMPrel: return "jmp"; + case Operation::JMPfar: return "jmpf"; + case Operation::JCXZ: return "jcxz"; + case Operation::INT: return "int"; + case Operation::INTO: return "into"; + + case Operation::LAHF: return "lahf"; + case Operation::SAHF: return "sahf"; + case Operation::LDS: return "lds"; + case Operation::LES: return "les"; + case Operation::LEA: return "lea"; + + case Operation::CMPS: { + constexpr char sizes[][6] = { "cmpsb", "cmpsw", "cmpsd", "?" }; + return sizes[static_cast(size)]; + } + case Operation::LODS: { + constexpr char sizes[][6] = { "lodsb", "lodsw", "lodsd", "?" }; + return sizes[static_cast(size)]; + } + case Operation::MOVS: { + constexpr char sizes[][6] = { "movsb", "movsw", "movsd", "?" }; + return sizes[static_cast(size)]; + } + case Operation::SCAS: { + constexpr char sizes[][6] = { "scasb", "scasw", "scasd", "?" }; + return sizes[static_cast(size)]; + } + case Operation::STOS: { + constexpr char sizes[][6] = { "stosb", "stosw", "stosd", "?" }; + return sizes[static_cast(size)]; + } + + case Operation::LOOP: return "loop"; + case Operation::LOOPE: return "loope"; + case Operation::LOOPNE: return "loopne"; + + case Operation::MOV: return "mov"; + case Operation::NEG: return "neg"; + case Operation::NOT: return "not"; + case Operation::AND: return "and"; + case Operation::OR: return "or"; + case Operation::XOR: return "xor"; + case Operation::NOP: return "nop"; + case Operation::POP: return "pop"; + case Operation::POPF: return "popf"; + case Operation::PUSH: return "push"; + case Operation::PUSHF: return "pushf"; + case Operation::RCL: return "rcl"; + case Operation::RCR: return "rcr"; + case Operation::ROL: return "rol"; + case Operation::ROR: return "ror"; + case Operation::SAL: return "sal"; + case Operation::SAR: return "sar"; + case Operation::SHR: return "shr"; + + case Operation::CLC: return "clc"; + case Operation::CLD: return "cld"; + case Operation::CLI: return "cli"; + case Operation::STC: return "stc"; + case Operation::STD: return "std"; + case Operation::STI: return "sti"; + case Operation::CMC: return "cmc"; + + case Operation::CMP: return "cmp"; + case Operation::TEST: return "test"; + + case Operation::XCHG: return "xchg"; + case Operation::XLAT: return "xlat"; + case Operation::SALC: return "salc"; + + case Operation::SETMO: + if(model == Model::i8086) { + return "setmo"; + } else { + return "enter"; + } + + case Operation::SETMOC: + if(model == Model::i8086) { + return "setmoc"; + } else { + return "bound"; + } + + case Operation::Invalid: return "invalid"; + + default: + assert(false); + return ""; + } +} + +bool InstructionSet::x86::mnemonic_implies_data_size(Operation operation) { + switch(operation) { + default: return false; + + case Operation::CMPS: + case Operation::LODS: + case Operation::MOVS: + case Operation::SCAS: + case Operation::STOS: + case Operation::JMPrel: + case Operation::LEA: + return true; + } +} + +std::string InstructionSet::x86::to_string(DataSize size) { + constexpr char sizes[][6] = { "byte", "word", "dword", "?" }; + return sizes[static_cast(size)]; +} + +std::string InstructionSet::x86::to_string(Source source, DataSize size) { + switch(source) { + case Source::eAX: { + constexpr char sizes[][4] = { "al", "ax", "eax", "?" }; + return sizes[static_cast(size)]; + } + case Source::eCX: { + constexpr char sizes[][4] = { "cl", "cx", "ecx", "?" }; + return sizes[static_cast(size)]; + } + case Source::eDX: { + constexpr char sizes[][4] = { "dl", "dx", "edx", "?" }; + return sizes[static_cast(size)]; + } + case Source::eBX: { + constexpr char sizes[][4] = { "bl", "bx", "ebx", "?" }; + return sizes[static_cast(size)]; + } + case Source::eSPorAH: { + constexpr char sizes[][4] = { "ah", "sp", "esp", "?" }; + return sizes[static_cast(size)]; + } + case Source::eBPorCH: { + constexpr char sizes[][4] = { "ch", "bp", "ebp", "?" }; + return sizes[static_cast(size)]; + } + case Source::eSIorDH: { + constexpr char sizes[][4] = { "dh", "si", "esi", "?" }; + return sizes[static_cast(size)]; + } + case Source::eDIorBH: { + constexpr char sizes[][4] = { "bh", "di", "edi", "?" }; + return sizes[static_cast(size)]; + } + + case Source::ES: return "es"; + case Source::CS: return "cs"; + case Source::SS: return "ss"; + case Source::DS: return "ds"; + case Source::FS: return "fd"; + case Source::GS: return "gs"; + + case Source::None: return "0"; + case Source::DirectAddress: return "DirectAccess"; + case Source::Immediate: return "Immediate"; + case Source::Indirect: return "Indirect"; + case Source::IndirectNoBase: return "IndirectNoBase"; + + default: return "???"; + } +} + +namespace { + +std::string to_hex(int value, int digits, bool with_suffix = true) { + auto stream = std::stringstream(); + stream << std::setfill('0') << std::uppercase << std::hex << std::setw(digits); + switch(digits) { + case 2: stream << +uint8_t(value); break; + case 4: stream << +uint16_t(value); break; + default: stream << value; break; + } + if (with_suffix) stream << 'h'; + return stream.str(); +}; + +} + +template +std::string InstructionSet::x86::to_string( + DataPointer pointer, + Instruction instruction, + int offset_length, + int immediate_length, + DataSize operation_size +) { + if(operation_size == InstructionSet::x86::DataSize::None) operation_size = instruction.operation_size(); + + std::string operand; + + auto append = [](std::stringstream &stream, auto value, int length, const char *prefix) { + switch(length) { + case 0: + if(!value) { + break; + } + [[fallthrough]]; + case 2: + // If asked to pretend the offset was originally two digits then either of: an unsigned + // 8-bit value or a sign-extended 8-bit value as having been originally 8-bit. + // + // This kicks the issue of whether sign was extended appropriately to functionality tests. + if( + !(value & 0xff00) || + ((value & 0xff80) == 0xff80) || + ((value & 0xff80) == 0x0000) + ) { + stream << prefix << to_hex(value, 2); + break; + } + [[fallthrough]]; + default: + stream << prefix << to_hex(value, 4); + break; + } + }; + + using Source = InstructionSet::x86::Source; + const Source source = pointer.source(); + switch(source) { + // to_string handles all direct register names correctly. + default: return InstructionSet::x86::to_string(source, operation_size); + + case Source::Immediate: { + std::stringstream stream; + append(stream, instruction.operand(), immediate_length, ""); + return stream.str(); + } + + case Source::DirectAddress: + case Source::Indirect: + case Source::IndirectNoBase: { + std::stringstream stream; + + if(!InstructionSet::x86::mnemonic_implies_data_size(instruction.operation)) { + stream << InstructionSet::x86::to_string(operation_size) << ' '; + } + + Source segment = instruction.data_segment(); + if(segment == Source::None) { + segment = pointer.default_segment(); + if(segment == Source::None) { + segment = Source::DS; + } + } + stream << InstructionSet::x86::to_string(segment, InstructionSet::x86::DataSize::None) << ':'; + + stream << '['; + bool addOffset = false; + switch(source) { + default: break; + case Source::Indirect: + stream << InstructionSet::x86::to_string(pointer.base(), data_size(instruction.address_size())); + if(pointer.index() != Source::None) { + stream << '+' << InstructionSet::x86::to_string(pointer.index(), data_size(instruction.address_size())); + } + addOffset = true; + break; + case Source::IndirectNoBase: + stream << InstructionSet::x86::to_string(pointer.index(), data_size(instruction.address_size())); + addOffset = true; + break; + case Source::DirectAddress: + stream << to_hex(instruction.offset(), 4); + break; + } + if(addOffset) { + append(stream, instruction.offset(), offset_length, "+"); + } + stream << ']'; + return stream.str(); + } + } + + return operand; +}; + +template +std::string InstructionSet::x86::to_string( + Instruction instruction, + Model model, + int offset_length, + int immediate_length +) { + std::string operation; + + // Add a repetition prefix; it'll be one of 'rep', 'repe' or 'repne'. + switch(instruction.repetition()) { + case Repetition::None: break; + case Repetition::RepE: + switch(instruction.operation) { + default: + operation += "repe "; + break; + + case Operation::MOVS: + case Operation::STOS: + case Operation::LODS: + operation += "rep "; + break; + } + break; + case Repetition::RepNE: + operation += "repne "; + break; + } + + // Add operation itself. + operation += to_string(instruction.operation, instruction.operation_size(), model); + operation += " "; + + // Deal with a few special cases up front. + switch(instruction.operation) { + default: { + const int operands = max_displayed_operands(instruction.operation); + const bool displacement = has_displacement(instruction.operation); + const bool print_first = operands > 1 && instruction.destination().source() != Source::None; + if(print_first) { + operation += to_string(instruction.destination(), instruction, offset_length, immediate_length); + } + if(operands > 0 && instruction.source().source() != Source::None) { + if(print_first) operation += ", "; + operation += to_string(instruction.source(), instruction, offset_length, immediate_length); + } + if(displacement) { + operation += to_hex(instruction.displacement(), offset_length); + } + } break; + + case Operation::CALLfar: + case Operation::JMPfar: { + switch(instruction.destination().source()) { + case Source::Immediate: + operation += "far 0x"; + operation += to_hex(instruction.segment(), 4, false); + operation += ":0x"; + operation += to_hex(instruction.offset(), 4, false); + break; + default: + operation += to_string(instruction.destination(), instruction, offset_length, immediate_length); + break; + } + } break; + + case Operation::LDS: + case Operation::LES: // The test set labels the pointer type as dword, which I guess is technically accurate. + // A full 32 bits will be loaded from that address in 16-bit mode. + operation += to_string(instruction.destination(), instruction, offset_length, immediate_length); + operation += ", "; + operation += to_string(instruction.source(), instruction, offset_length, immediate_length, InstructionSet::x86::DataSize::DWord); + break; + + case Operation::IN: + operation += to_string(instruction.destination(), instruction, offset_length, immediate_length); + operation += ", "; + switch(instruction.source().source()) { + case Source::DirectAddress: + operation += to_hex(instruction.offset(), 2, true); + break; + default: + operation += to_string(instruction.source(), instruction, offset_length, immediate_length, InstructionSet::x86::DataSize::Word); + break; + } + break; + + case Operation::OUT: + switch(instruction.destination().source()) { + case Source::DirectAddress: + operation += to_hex(instruction.offset(), 2, true); + break; + default: + operation += to_string(instruction.destination(), instruction, offset_length, immediate_length, InstructionSet::x86::DataSize::Word); + break; + } + operation += ", "; + operation += to_string(instruction.source(), instruction, offset_length, immediate_length); + break; + + // Rolls and shifts list eCX as a source on the understanding that everyone knows that rolls and shifts + // use CL even when they're shifting or rolling a word-sized quantity. + case Operation::RCL: case Operation::RCR: + case Operation::ROL: case Operation::ROR: + case Operation::SAL: case Operation::SAR: + case Operation::SHR: + case Operation::SETMO: case Operation::SETMOC: + operation += to_string(instruction.destination(), instruction, offset_length, immediate_length); + switch(instruction.source().source()) { + case Source::None: break; + case Source::eCX: operation += ", cl"; break; + case Source::Immediate: + // Providing an immediate operand of 1 is a little future-proofing by the decoder; the '1' + // is actually implicit on a real 8088. So omit it. + if(instruction.operand() == 1) break; + [[fallthrough]]; + default: + operation += ", "; + operation += to_string(instruction.source(), instruction, offset_length, immediate_length); + break; + } + break; + } + + return operation; +} + +// Although advertised, 32-bit printing is incomplete. +// +//template std::string InstructionSet::x86::to_string( +// Instruction instruction, +// Model model, +// int offset_length, +// int immediate_length +//); + +template std::string InstructionSet::x86::to_string( + Instruction instruction, + Model model, + int offset_length, + int immediate_length +); diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp index 2c6e7bd8a..d5b78d9b9 100644 --- a/InstructionSets/x86/Instruction.hpp +++ b/InstructionSets/x86/Instruction.hpp @@ -9,8 +9,11 @@ #ifndef InstructionSets_x86_Instruction_h #define InstructionSets_x86_Instruction_h +#include "Model.hpp" + #include #include +#include #include namespace InstructionSet::x86 { @@ -82,19 +85,19 @@ enum class Operation: uint8_t { /// Reads from the port specified by source to the destination. IN, - /// Writes from the port specified by destination from the source. + /// Writes to the port specified by destination from the source. OUT, // Various jumps; see the displacement to calculate targets. - JO, JNO, JB, JNB, JE, JNE, JBE, JNBE, + JO, JNO, JB, JNB, JZ, JNZ, JBE, JNBE, JS, JNS, JP, JNP, JL, JNL, JLE, JNLE, - /// Far call; see the segment() and offset() fields. - CALLfar, - /// Relative call; see displacement(). - CALLrel, /// Near call. CALLabs, + /// Relative call; see displacement(). + CALLrel, + /// Far call; if destination is Source::Immediate then see the segment() and offset() fields; otherwise take segment and offset by indirection. + CALLfar, /// Return from interrupt. IRET, /// Near return; if source is not ::None then it will be an ::Immediate indicating how many additional bytes to remove from the stack. @@ -105,10 +108,10 @@ enum class Operation: uint8_t { JMPabs, /// Near jump with a relative destination. JMPrel, - /// Far jump to the indicated segment and offset. + /// Far jump; if destination is Source::Immediate then see the segment() and offset() fields; otherwise take segment and offset by indirection. JMPfar, /// Relative jump performed only if CX = 0; see the displacement. - JPCX, + JCXZ, /// Generates a software interrupt of the level stated in the operand. INT, /// Generates a software interrupt of level 4 if overflow is set. @@ -153,7 +156,7 @@ enum class Operation: uint8_t { XOR, /// NOP; no further fields. NOP, - /// POP from the stack to destination. + /// POP from the stack to source. POP, /// POP from the stack to the flags register. POPF, @@ -161,19 +164,27 @@ enum class Operation: uint8_t { PUSH, /// PUSH the flags register to the stack. PUSHF, + /// Rotate the destination left through carry the number of bits indicated by source; if the source is a register then implicitly its size is 1. + /// If it is ::None then the rotation is by a single position only. RCL, /// Rotate the destination right through carry the number of bits indicated by source; if the source is a register then implicitly its size is 1. + /// If it is ::None then the rotation is by a single position only. RCR, /// Rotate the destination left the number of bits indicated by source; if the source is a register then implicitly its size is 1. + /// If it is ::None then the rotation is by a single position only. ROL, /// Rotate the destination right the number of bits indicated by source; if the source is a register then implicitly its size is 1. + /// If it is ::None then the rotation is by a single position only. ROR, /// Arithmetic shift left the destination by the number of bits indicated by source; if the source is a register then implicitly its size is 1. + /// If it is ::None then the shift is by a single position only. SAL, /// Arithmetic shift right the destination by the number of bits indicated by source; if the source is a register then implicitly its size is 1. + /// If it is ::None then the shift is by a single position only. SAR, /// Logical shift right the destination by the number of bits indicated by source; if the source is a register then implicitly its size is 1. + /// If it is ::None then the shift is by a single position only. SHR, /// Clear carry flag; no source or destination provided. @@ -202,6 +213,18 @@ enum class Operation: uint8_t { /// Load AL with DS:[AL+BX]. XLAT, + /// Set AL to FFh if carry is set; 00h otherwise. + SALC, + + // + // 8086 exclusives. + // + + /// Set destination to ~0 if CL is non-zero. + SETMOC, + /// Set destination to ~0. + SETMO, + // // 80186 additions. // @@ -210,7 +233,7 @@ enum class Operation: uint8_t { /// stored at the location indicated by the source register, which will point to two /// 16- or 32-bit words, the first being a signed lower bound and the signed upper. /// Raises a bounds exception if not. - BOUND, + BOUND = SETMOC, /// Create stack frame. See operand() for the nesting level and offset() @@ -339,6 +362,7 @@ enum class Operation: uint8_t { MOVtoTr, MOVfromTr, }; + enum class DataSize: uint8_t { Byte = 0, Word = 1, @@ -433,6 +457,30 @@ enum class Repetition: uint8_t { None, RepE, RepNE }; +/// @returns @c true if @c operation supports repetition mode @c repetition; @c false otherwise. +constexpr bool supports(Operation operation, Repetition repetition) { + switch(operation) { + default: return false; + + case Operation::INS: + case Operation::OUTS: + return repetition == Repetition::RepE; + + case Operation::Invalid: // Retain context here; it's used as an intermediate + // state sometimes. + case Operation::CMPS: + case Operation::LODS: + case Operation::MOVS: + case Operation::SCAS: + case Operation::STOS: + return true; + + case Operation::IDIV: + return repetition == Repetition::RepNE; + } +} + + /// Provides a 32-bit-style scale, index and base; to produce the address this represents, /// calcluate base() + (index() << scale()). /// @@ -451,7 +499,7 @@ class ScaleIndexBase { constexpr ScaleIndexBase(int scale, Source index, Source base) noexcept : sib_(uint8_t( scale << 6 | - (int(index != Source::None ? index : Source::eSI) << 3) | + (int(index != Source::None ? index : Source::eSP) << 3) | int(base) )) {} constexpr ScaleIndexBase(Source index, Source base) noexcept : ScaleIndexBase(0, index, base) {} @@ -556,6 +604,23 @@ class DataPointer { return sib_.index(); } + /// @returns The default segment to use for this access. + constexpr Source default_segment() const { + switch(source_) { + default: + case Source::IndirectNoBase: + return Source::None; + + case Source::Indirect: + switch(base()) { + default: return Source::DS; + case Source::eBP: + case Source::eSP: return Source::SS; + case Source::eDI: return Source::ES; + } + } + } + template constexpr Source base() const { if constexpr (obscure_indirectNoBase) { return (source_ <= Source::IndirectNoBase) ? Source::None : sib_.base(); @@ -693,12 +758,12 @@ template class Instruction { return AddressSize(mem_exts_source_ >> 7); } - /// @returns @c Source::DS if no segment override was found; the overridden segment otherwise. + /// @returns @c Source::None if no segment override was found; the overridden segment otherwise. /// On x86 a segment override cannot modify the segment used as a destination in string instructions, /// or that used by stack instructions, but this function does not spend the time necessary to provide /// the correct default for those. Source data_segment() const { - if(!has_length_extension()) return Source::DS; + if(!has_length_extension()) return Source::None; return Source( int(Source::ES) + ((length_extension() >> 1) & 7) @@ -781,9 +846,6 @@ template class Instruction { ++extension; } if(has_length_extension()) { - // As per the rule stated for segment(), this class provides ::DS for any instruction - // that doesn't have a segment override. - if(segment_override == Source::None) segment_override = Source::DS; extensions_[extension] = ImmediateT( (length << 6) | (int(repetition) << 4) | ((int(segment_override) & 7) << 1) | int(lock) ); @@ -795,6 +857,56 @@ template class Instruction { static_assert(sizeof(Instruction) <= 16); static_assert(sizeof(Instruction) <= 10); +// +// Disassembly aids. +// + +/// @returns @c true if @c operation uses a @c displacement(). +bool has_displacement(Operation operation); + +/// @returns The maximum number of operands to print in a disassembly of @c operation; +/// i.e. 2 for both source() and destination(), 1 for source() alone, 0 for neither. This is a maximum +/// only — if either source is Source::None then it should not be printed. +int max_displayed_operands(Operation operation); + +/// Provides the idiomatic name of the @c Operation given an operation @c DataSize and processor @c Model. +std::string to_string(Operation, DataSize, Model); + +/// @returns @c true if the idiomatic name of @c Operation implies the data size (e.g. stosb), @c false otherwise (e.g. ld). +bool mnemonic_implies_data_size(Operation); + +/// Provides the name of the @c DataSize, i.e. 'byte', 'word' or 'dword'. +std::string to_string(DataSize); + +/// Provides the name of the @c Source at @c DataSize, e.g. for Source::eAX it might return AL, AX or EAX. +std::string to_string(Source, DataSize); + +/// Provides the printable version of @c pointer as an appendage for @c instruction. +/// +/// See notes below re: @c offset_length and @c immediate_length. +/// If @c operation_size is the default value of @c ::None, it'll be taken from the @c instruction. +template +std::string to_string( + DataPointer pointer, + Instruction instruction, + int offset_length, + int immediate_length, + DataSize operation_size = InstructionSet::x86::DataSize::None +); + +/// Provides the printable version of @c instruction. +/// +/// Internally, instructions do not retain the original sizes of offsets/displacements or immediates so the following are available: +/// +/// If @c offset_length is '2' or '4', truncates any printed offset to 2 or 4 digits if it is compatible with being that length. +/// If @c immediate_length is '2' or '4', truncates any printed immediate value to 2 or 4 digits if it is compatible with being that length. +template +std::string to_string( + Instruction instruction, + Model model, + int offset_length = 0, + int immediate_length = 0); + } #endif /* InstructionSets_x86_Instruction_h */ diff --git a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj index b08cdeb18..81c6ba4b8 100644 --- a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj +++ b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj @@ -7,6 +7,8 @@ objects = { /* Begin PBXBuildFile section */ + 423BDC4A2AB24699008E37B6 /* 8088Tests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 423BDC492AB24699008E37B6 /* 8088Tests.mm */; }; + 42437B332AC70833006DFED1 /* HDV.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B6FD0342923061300EC4760 /* HDV.cpp */; }; 4281683A2A37AFB4008ECD27 /* DispatcherTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 428168392A37AFB4008ECD27 /* DispatcherTests.mm */; }; 42A5E80C2ABBE04600A0DD5D /* NeskellTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 42A5E80B2ABBE04600A0DD5D /* NeskellTests.swift */; }; 42A5E8442ABBE16F00A0DD5D /* illegal_rmw_test.bin in Resources */ = {isa = PBXBuildFile; fileRef = 42A5E8332ABBE16F00A0DD5D /* illegal_rmw_test.bin */; }; @@ -26,6 +28,7 @@ 42A5E8522ABBE16F00A0DD5D /* nop_test.bin in Resources */ = {isa = PBXBuildFile; fileRef = 42A5E8412ABBE16F00A0DD5D /* nop_test.bin */; }; 42A5E8532ABBE16F00A0DD5D /* lax_test.bin in Resources */ = {isa = PBXBuildFile; fileRef = 42A5E8422ABBE16F00A0DD5D /* lax_test.bin */; }; 42A5E8542ABBE16F00A0DD5D /* branch_backwards_test.bin in Resources */ = {isa = PBXBuildFile; fileRef = 42A5E8432ABBE16F00A0DD5D /* branch_backwards_test.bin */; }; + 42E5C3932AC46A7700DA093D /* Carbon.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 42E5C3922AC46A7700DA093D /* Carbon.framework */; }; 4B018B89211930DE002A3937 /* 65C02_extended_opcodes_test.bin in Resources */ = {isa = PBXBuildFile; fileRef = 4B018B88211930DE002A3937 /* 65C02_extended_opcodes_test.bin */; }; 4B01A6881F22F0DB001FD6E3 /* Z80MemptrTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4B01A6871F22F0DB001FD6E3 /* Z80MemptrTests.swift */; }; 4B0333AF2094081A0050B93D /* AppleDSK.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B0333AD2094081A0050B93D /* AppleDSK.cpp */; }; @@ -316,6 +319,9 @@ 4B680CE223A5553100451D43 /* 68000ComparativeTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 4B680CE123A5553100451D43 /* 68000ComparativeTests.mm */; }; 4B680CE423A555CA00451D43 /* 68000 Comparative Tests in Resources */ = {isa = PBXBuildFile; fileRef = 4B680CE323A555CA00451D43 /* 68000 Comparative Tests */; }; 4B683B012727BE700043E541 /* Amiga Blitter Tests in Resources */ = {isa = PBXBuildFile; fileRef = 4B683B002727BE6F0043E541 /* Amiga Blitter Tests */; }; + 4B69DEB62AB79E4F0055B217 /* Instruction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B69DEB52AB79E4F0055B217 /* Instruction.cpp */; }; + 4B69DEB72AB79E4F0055B217 /* Instruction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B69DEB52AB79E4F0055B217 /* Instruction.cpp */; }; + 4B69DEB82AB79E4F0055B217 /* Instruction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B69DEB52AB79E4F0055B217 /* Instruction.cpp */; }; 4B69FB3D1C4D908A00B5F0AA /* Tape.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B69FB3B1C4D908A00B5F0AA /* Tape.cpp */; }; 4B69FB441C4D941400B5F0AA /* TapeUEF.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B69FB421C4D941400B5F0AA /* TapeUEF.cpp */; }; 4B69FB461C4D950F00B5F0AA /* libz.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = 4B69FB451C4D950F00B5F0AA /* libz.tbd */; }; @@ -1119,6 +1125,7 @@ /* End PBXCopyFilesBuildPhase section */ /* Begin PBXFileReference section */ + 423BDC492AB24699008E37B6 /* 8088Tests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = 8088Tests.mm; sourceTree = ""; }; 4281572E2AA0334300E16AA1 /* Carry.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Carry.hpp; sourceTree = ""; }; 428168372A16C25C008ECD27 /* LineLayout.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = LineLayout.hpp; sourceTree = ""; }; 428168392A37AFB4008ECD27 /* DispatcherTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = DispatcherTests.mm; sourceTree = ""; }; @@ -1143,6 +1150,7 @@ 42AD552E2A0C4D5000ACE410 /* 68000.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = 68000.hpp; sourceTree = ""; }; 42AD55302A0C4D5000ACE410 /* 68000Storage.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = 68000Storage.hpp; sourceTree = ""; }; 42AD55312A0C4D5000ACE410 /* 68000Implementation.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = 68000Implementation.hpp; sourceTree = ""; }; + 42E5C3922AC46A7700DA093D /* Carbon.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Carbon.framework; path = System/Library/Frameworks/Carbon.framework; sourceTree = SDKROOT; }; 4B018B88211930DE002A3937 /* 65C02_extended_opcodes_test.bin */ = {isa = PBXFileReference; lastKnownFileType = archive.macbinary; name = 65C02_extended_opcodes_test.bin; path = "Klaus Dormann/65C02_extended_opcodes_test.bin"; sourceTree = ""; }; 4B01A6871F22F0DB001FD6E3 /* Z80MemptrTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Z80MemptrTests.swift; sourceTree = ""; }; 4B0333AD2094081A0050B93D /* AppleDSK.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = AppleDSK.cpp; sourceTree = ""; }; @@ -1475,6 +1483,7 @@ 4B680CE323A555CA00451D43 /* 68000 Comparative Tests */ = {isa = PBXFileReference; lastKnownFileType = folder; path = "68000 Comparative Tests"; sourceTree = ""; }; 4B683B002727BE6F0043E541 /* Amiga Blitter Tests */ = {isa = PBXFileReference; lastKnownFileType = folder; path = "Amiga Blitter Tests"; sourceTree = ""; }; 4B698D1A1FE768A100696C91 /* SampleSource.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = SampleSource.hpp; sourceTree = ""; }; + 4B69DEB52AB79E4F0055B217 /* Instruction.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = Instruction.cpp; sourceTree = ""; }; 4B69FB3B1C4D908A00B5F0AA /* Tape.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Tape.cpp; sourceTree = ""; }; 4B69FB3C1C4D908A00B5F0AA /* Tape.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = Tape.hpp; sourceTree = ""; }; 4B69FB421C4D941400B5F0AA /* TapeUEF.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TapeUEF.cpp; sourceTree = ""; }; @@ -2296,6 +2305,7 @@ buildActionMask = 2147483647; files = ( 4BB8617124E22F5700A00E03 /* Accelerate.framework in Frameworks */, + 42E5C3932AC46A7700DA093D /* Carbon.framework in Frameworks */, 4B50AF80242817F40099BBD7 /* QuartzCore.framework in Frameworks */, 4B69FB461C4D950F00B5F0AA /* libz.tbd in Frameworks */, ); @@ -2385,6 +2395,7 @@ 4B055A761FAE78210060FFFF /* Frameworks */ = { isa = PBXGroup; children = ( + 42E5C3922AC46A7700DA093D /* Carbon.framework */, 4BB8617024E22F4900A00E03 /* Accelerate.framework */, 4B50AF7F242817F40099BBD7 /* QuartzCore.framework */, 4B055AF01FAE9C080060FFFF /* OpenGL.framework */, @@ -4334,6 +4345,7 @@ 4B90467222C6FA31000E2074 /* TestRunner68000.hpp */, 4BC62FF128A149300036AE59 /* NSData+dataWithContentsOfGZippedFile.m */, 4BDA7F8229C4EA28007A10A5 /* 6809OperationMapperTests.mm */, + 423BDC492AB24699008E37B6 /* 8088Tests.mm */, 4B04C898285E3DC800AA8FD6 /* 65816ComparativeTests.mm */, 4B90467522C6FD6E000E2074 /* 68000ArithmeticTests.mm */, 4B9D0C4A22C7D70900DE1AD3 /* 68000BCDTests.mm */, @@ -4976,9 +4988,10 @@ isa = PBXGroup; children = ( 4BEDA3B925B25563000C2DBD /* Decoder.cpp */, + 4B69DEB52AB79E4F0055B217 /* Instruction.cpp */, + 4BE3C69327C793EF000EAD28 /* DataPointerResolver.hpp */, 4BEDA3B825B25563000C2DBD /* Decoder.hpp */, 4BEDA3DB25B2588F000C2DBD /* Instruction.hpp */, - 4BE3C69327C793EF000EAD28 /* DataPointerResolver.hpp */, 4BE3C69527CBC540000EAD28 /* Model.hpp */, ); path = x86; @@ -5838,6 +5851,7 @@ 4B055A931FAE85B50060FFFF /* BinaryDump.cpp in Sources */, 4B89452D201967B4007DE474 /* Tape.cpp in Sources */, 4B055AD61FAE9B130060FFFF /* MemoryFuzzer.cpp in Sources */, + 4B69DEB82AB79E4F0055B217 /* Instruction.cpp in Sources */, 4B055AC21FAE9AE30060FFFF /* KeyboardMachine.cpp in Sources */, 4B89453B201967B4007DE474 /* StaticAnalyser.cpp in Sources */, 4B055AEB1FAE9BA20060FFFF /* PartialMachineCycle.cpp in Sources */, @@ -5956,6 +5970,7 @@ 4B2B3A4C1F9B8FA70062DABF /* MemoryFuzzer.cpp in Sources */, 4B9EC0EA26B384080060A31F /* Keyboard.cpp in Sources */, 4B7913CC1DFCD80E00175A82 /* Video.cpp in Sources */, + 4B69DEB62AB79E4F0055B217 /* Instruction.cpp in Sources */, 4B7962A02819681F008130F9 /* Decoder.cpp in Sources */, 4BC57CD92436A62900FBC404 /* State.cpp in Sources */, 4BDA00E622E699B000AC3CD0 /* CSMachine.mm in Sources */, @@ -6156,6 +6171,7 @@ 4B778F0E23A5EC4F0000D260 /* Tape.cpp in Sources */, 4B778F2D23A5EF190000D260 /* MFMDiskController.cpp in Sources */, 4B7752C228217F5C0073E2C5 /* Spectrum.cpp in Sources */, + 423BDC4A2AB24699008E37B6 /* 8088Tests.mm in Sources */, 4B778F2723A5EEF60000D260 /* BinaryDump.cpp in Sources */, 4BFCA1241ECBDCB400AC40C1 /* AllRAMProcessor.cpp in Sources */, 4B778F5223A5F22F0000D260 /* StaticAnalyser.cpp in Sources */, @@ -6298,6 +6314,7 @@ 4BDA7F8329C4EA28007A10A5 /* 6809OperationMapperTests.mm in Sources */, 4B778F5723A5F2BB0000D260 /* ZX8081.cpp in Sources */, 4B778F2F23A5F0B10000D260 /* ScanTarget.cpp in Sources */, + 4B69DEB72AB79E4F0055B217 /* Instruction.cpp in Sources */, 4BE90FFD22D5864800FB464D /* MacintoshVideoTests.mm in Sources */, 4B4F478A25367EDC004245B8 /* 65816AddressingTests.swift in Sources */, 4B778F0B23A5EC150000D260 /* TapeUEF.cpp in Sources */, @@ -6313,6 +6330,7 @@ 4B778F4323A5F1B00000D260 /* ImplicitSectors.cpp in Sources */, 4B7752B128217EA30073E2C5 /* StaticAnalyser.cpp in Sources */, 4B778F5123A5F2290000D260 /* StaticAnalyser.cpp in Sources */, + 42437B332AC70833006DFED1 /* HDV.cpp in Sources */, 4B7752C028217F3D0073E2C5 /* Line.cpp in Sources */, 4B7C7A00282C3BCA002D6C0B /* 68000flamewingTests.mm in Sources */, 4B778F0223A5EBA40000D260 /* MFMSectorDump.cpp in Sources */, diff --git a/OSBindings/Mac/Clock SignalTests/8088Tests.mm b/OSBindings/Mac/Clock SignalTests/8088Tests.mm new file mode 100644 index 000000000..a7f81b22f --- /dev/null +++ b/OSBindings/Mac/Clock SignalTests/8088Tests.mm @@ -0,0 +1,188 @@ +// +// 8088Tests.m +// Clock SignalTests +// +// Created by Thomas Harte on 13/09/2023. +// Copyright © 2023 Thomas Harte. All rights reserved. +// + +#import + +#include +#include + +#include +#include +#include + +#include "NSData+dataWithContentsOfGZippedFile.h" + +#include "../../../InstructionSets/x86/Decoder.hpp" + +namespace { + +// The tests themselves are not duplicated in this repository; +// provide their real path here. +constexpr char TestSuiteHome[] = "/Users/tharte/Projects/ProcessorTests/8088/v1"; + +} + +@interface i8088Tests : XCTestCase +@end + +@implementation i8088Tests + +- (NSArray *)testFiles { + NSString *path = [NSString stringWithUTF8String:TestSuiteHome]; + NSSet *allowList = [NSSet setWithArray:@[ + ]]; + + NSSet *ignoreList = nil; + + NSArray *files = [[NSFileManager defaultManager] contentsOfDirectoryAtPath:path error:nil]; + files = [files filteredArrayUsingPredicate:[NSPredicate predicateWithBlock:^BOOL(NSString* evaluatedObject, NSDictionary *) { + if(allowList.count && ![allowList containsObject:[evaluatedObject lastPathComponent]]) { + return NO; + } + if([ignoreList containsObject:[evaluatedObject lastPathComponent]]) { + return NO; + } + return [evaluatedObject hasSuffix:@"json.gz"]; + }]]; + + NSMutableArray *fullPaths = [[NSMutableArray alloc] init]; + for(NSString *file in files) { + [fullPaths addObject:[path stringByAppendingPathComponent:file]]; + } + + return [fullPaths sortedArrayUsingSelector:@selector(compare:)]; +} + +- (NSString *)toString:(const InstructionSet::x86::Instruction &)instruction offsetLength:(int)offsetLength immediateLength:(int)immediateLength { + const auto operation = to_string(instruction, InstructionSet::x86::Model::i8086, offsetLength, immediateLength); + return [[NSString stringWithUTF8String:operation.c_str()] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceCharacterSet]]; +} + +- (bool)applyDecodingTest:(NSDictionary *)test file:(NSString *)file assert:(BOOL)assert { + using Decoder = InstructionSet::x86::Decoder; + Decoder decoder; + + // Build a vector of the instruction bytes; this makes manual step debugging easier. + NSArray *encoding = test[@"bytes"]; + std::vector data; + data.reserve(encoding.count); + for(NSNumber *number in encoding) { + data.push_back([number intValue]); + } + auto hex_instruction = [&]() -> NSString * { + NSMutableString *hexInstruction = [[NSMutableString alloc] init]; + for(uint8_t byte: data) { + [hexInstruction appendFormat:@"%02x ", byte]; + } + return hexInstruction; + }; + + const auto decoded = decoder.decode(data.data(), data.size()); + if(assert) { + XCTAssert( + decoded.first == [encoding count], + "Wrong length of instruction decoded for %@ — decoded %d rather than %lu from %@; file %@", + test[@"name"], + decoded.first, + (unsigned long)[encoding count], + hex_instruction(), + file + ); + } + if(decoded.first != [encoding count]) { + return false; + } + + // The decoder doesn't preserve the original offset length, which makes no functional difference but + // does affect the way that offsets are printed in the test set. + NSSet *decodings = [NSSet setWithObjects: + [self toString:decoded.second offsetLength:4 immediateLength:4], + [self toString:decoded.second offsetLength:2 immediateLength:4], + [self toString:decoded.second offsetLength:0 immediateLength:4], + [self toString:decoded.second offsetLength:4 immediateLength:2], + [self toString:decoded.second offsetLength:2 immediateLength:2], + [self toString:decoded.second offsetLength:0 immediateLength:2], + nil]; + + auto compare_decoding = [&](NSString *name) -> bool { + return [decodings containsObject:name]; + }; + + bool isEqual = compare_decoding(test[@"name"]); + + // Attempt clerical reconciliation: + // + // TEMPORARY HACK: the test set incorrectly states 'bp+si' whenever it means 'bp+di'. + // Though it also uses 'bp+si' correctly when it means 'bp+si'. Until fixed, take + // a pass on potential issues there. + // + // SEPARATELY: The test suite retains a distinction between SHL and SAL, which the decoder doesn't. So consider that + // a potential point of difference. + // + // Also, the decoder treats INT3 and INT 3 as the same thing. So allow for a meshing of those. + int adjustment = 7; + while(!isEqual && adjustment) { + NSString *alteredName = [test[@"name"] stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceCharacterSet]]; + + if(adjustment & 4) { + alteredName = [alteredName stringByReplacingOccurrencesOfString:@"bp+si" withString:@"bp+di"]; + } + if(adjustment & 2) { + alteredName = [alteredName stringByReplacingOccurrencesOfString:@"shl" withString:@"sal"]; + } + if(adjustment & 1) { + alteredName = [alteredName stringByReplacingOccurrencesOfString:@"int3" withString:@"int 03h"]; + } + + isEqual = compare_decoding(alteredName); + --adjustment; + } + + if(assert) { + XCTAssert( + isEqual, + "%@ doesn't match %@ or similar, was %@ within %@", + test[@"name"], + [decodings anyObject], + hex_instruction(), + file + ); + } + + return isEqual; +} + +- (void)testDecoding { + NSMutableSet *failures = [[NSMutableSet alloc] init]; + NSArray *testFiles = [self testFiles]; + + for(NSString *file in testFiles) { + NSData *data = [NSData dataWithContentsOfGZippedFile:file]; + NSArray *testsInFile = [NSJSONSerialization JSONObjectWithData:data options:0 error:nil]; + NSUInteger successes = 0; + for(NSDictionary *test in testsInFile) { + // A single failure per instruction is fine. + if(![self applyDecodingTest:test file:file assert:YES]) { + [failures addObject:file]; + + // Attempt a second decoding, to provide a debugger hook. + [self applyDecodingTest:test file:file assert:NO]; + + break; + } + ++successes; + } + if(successes != [testsInFile count]) { + NSLog(@"Failed after %ld successes", successes); + } + } + + NSLog(@"%ld failures out of %ld tests: %@", failures.count, testFiles.count, [[failures allObjects] sortedArrayUsingSelector:@selector(caseInsensitiveCompare:)]); +} + +@end diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm index e1080c3bf..7544cf22a 100644 --- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm +++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm @@ -533,11 +533,11 @@ decode(const std::initializer_list &stream, bool set_32_bit = false) { // cmp ecx,DWORD PTR [ebp+0x2c87445f] // jecxz 0x00000084 (from 0x82) // sahf - // je 0x000000f3 (from 0x85) + // jz 0x000000f3 (from 0x85) test(instructions[52], DataSize::DWord, Operation::CMP, ScaleIndexBase(Source::eBP), Source::eCX, 0, 0x2c87445f); - test(instructions[53], Operation::JPCX, 0, 0x02); + test(instructions[53], Operation::JCXZ, 0, 0x02); test(instructions[54], Operation::SAHF); - test(instructions[55], Operation::JE, 0, 0x6e); + test(instructions[55], Operation::JZ, 0, 0x6e); // sbb ecx,DWORD PTR [edi+0x433c54d] // lahf @@ -555,7 +555,7 @@ decode(const std::initializer_list &stream, bool set_32_bit = false) { test_far(instructions[60], Operation::CALLfar, 0xe21b, 0x97d0f58a); test(instructions[61], Operation::PUSHA); test(instructions[62], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eAX, 0xcf); - test(instructions[63], Operation::JPCX, 0, 0xd4 - 0x9d); + test(instructions[63], Operation::JCXZ, 0, 0xd4 - 0x9d); } - (void)testSourceModRegRM1 {