diff --git a/InstructionSets/x86/DataPointerResolver.hpp b/InstructionSets/x86/DataPointerResolver.hpp new file mode 100644 index 000000000..2a2197a25 --- /dev/null +++ b/InstructionSets/x86/DataPointerResolver.hpp @@ -0,0 +1,320 @@ +// +// DataPointerResolver.hpp +// Clock Signal +// +// Created by Thomas Harte on 24/02/2022. +// Copyright © 2022 Thomas Harte. All rights reserved. +// + +#ifndef DataPointerResolver_hpp +#define DataPointerResolver_hpp + +#include "Instruction.hpp" +#include "Model.hpp" + +#include + +namespace InstructionSet { +namespace x86 { + +/// Unlike source, describes only registers, and breaks +/// them down by conventional name — so AL, AH, AX and EAX are all +/// listed separately and uniquely, rather than being eAX+size or +/// eSPorAH with a size of 1. +enum class Register: uint8_t { + // 8-bit registers. + AL, AH, + CL, CH, + DL, DH, + BL, BH, + + // 16-bit registers. + AX, CX, DX, BX, + SP, BP, SI, DI, + ES, CS, SS, DS, + FS, GS, + + // 32-bit registers. + EAX, ECX, EDX, EBX, + ESP, EBP, ESI, EDI, + + // + None +}; + +/// @returns @c true if @c r is the same size as @c DataT; @c false otherwise. +/// @discussion Provided primarily to aid in asserts; if the decoder and resolver are both +/// working then it shouldn't be necessary to test this in register files. +template constexpr bool is_sized(Register r) { + static_assert(sizeof(DataT) == 4 || sizeof(DataT) == 2 || sizeof(DataT) == 1); + + if constexpr (sizeof(DataT) == 4) { + return r >= Register::EAX && r < Register::None; + } + + if constexpr (sizeof(DataT) == 2) { + return r >= Register::AX && r < Register::EAX; + } + + if constexpr (sizeof(DataT) == 1) { + return r >= Register::AL && r < Register::AX; + } + + return false; +} + +/// @returns the proper @c Register given @c source and data of size @c sizeof(DataT), +/// or Register::None if no such register exists (e.g. asking for a 32-bit version of CS). +template constexpr Register register_for_source(Source source) { + static_assert(sizeof(DataT) == 4 || sizeof(DataT) == 2 || sizeof(DataT) == 1); + + if constexpr (sizeof(DataT) == 4) { + switch(source) { + case Source::eAX: return Register::EAX; + case Source::eCX: return Register::ECX; + case Source::eDX: return Register::EDX; + case Source::eBX: return Register::EBX; + case Source::eSPorAH: return Register::ESP; + case Source::eBPorCH: return Register::EBP; + case Source::eSIorDH: return Register::ESI; + case Source::eDIorBH: return Register::EDI; + + default: break; + } + } + + if constexpr (sizeof(DataT) == 2) { + switch(source) { + case Source::eAX: return Register::AX; + case Source::eCX: return Register::CX; + case Source::eDX: return Register::DX; + case Source::eBX: return Register::BX; + case Source::eSPorAH: return Register::SP; + case Source::eBPorCH: return Register::BP; + case Source::eSIorDH: return Register::SI; + case Source::eDIorBH: return Register::DI; + case Source::ES: return Register::ES; + case Source::CS: return Register::CS; + case Source::SS: return Register::SS; + case Source::DS: return Register::DS; + case Source::FS: return Register::FS; + case Source::GS: return Register::GS; + + default: break; + } + } + + if constexpr (sizeof(DataT) == 1) { + switch(source) { + case Source::eAX: return Register::AL; + case Source::eCX: return Register::CL; + case Source::eDX: return Register::DL; + case Source::eBX: return Register::BL; + case Source::eSPorAH: return Register::AH; + case Source::eBPorCH: return Register::CH; + case Source::eSIorDH: return Register::DH; + case Source::eDIorBH: return Register::BH; + + default: break; + } + } + + return Register::None; +} + +/// Reads from or writes to the source or target identified by a DataPointer, relying upon two user-supplied classes: +/// +/// * a register bank; and +/// * a memory pool. +/// +/// The register bank should implement `template DataT read()` and `template void write(DataT)`. +/// Those functions will be called only with registers and data types that are appropriate to the @c model. +/// +/// The memory pool should implement `template DataT read(Source segment, uint32_t address)` and +/// `template void write(Source segment, uint32_t address, DataT value)`. +template class DataPointerResolver { + public: + public: + /// Reads the data pointed to by @c pointer, referencing @c instruction, @c memory and @c registers as necessary. + template static DataT read( + RegistersT ®isters, + MemoryT &memory, + const Instruction &instruction, + DataPointer pointer); + + /// Writes @c value to the data pointed to by @c pointer, referencing @c instruction, @c memory and @c registers as necessary. + template static void write( + RegistersT ®isters, + MemoryT &memory, + const Instruction &instruction, + DataPointer pointer, + DataT value); + + /// Computes the effective address of @c pointer including any displacement applied by @c instruction. + /// @c pointer must be of type Source::Indirect. + template + static uint32_t effective_address( + RegistersT ®isters, + const Instruction &instruction, + DataPointer pointer); + + private: + template static void access( + RegistersT ®isters, + MemoryT &memory, + const Instruction &instruction, + DataPointer pointer, + DataT &value); +}; + + +// +// Implementation begins here. +// + +template +template DataT DataPointerResolver::read( + RegistersT ®isters, + MemoryT &memory, + const Instruction &instruction, + DataPointer pointer) { + DataT result; + access(registers, memory, instruction, pointer, result); + return result; + } + +template +template void DataPointerResolver::write( + RegistersT ®isters, + MemoryT &memory, + const Instruction &instruction, + DataPointer pointer, + DataT value) { + access(registers, memory, instruction, pointer, value); + } + +#define rw(v, r, is_write) \ + case Source::r: \ + using VType = typename std::remove_reference::type; \ + if constexpr (is_write) { \ + registers.template write(Source::r)>(v); \ + } else { \ + v = registers.template read(Source::r)>(); \ + } \ + break; + +#define ALLREGS(v, i) rw(v, eAX, i); rw(v, eCX, i); \ + rw(v, eDX, i); rw(v, eBX, i); \ + rw(v, eSPorAH, i); rw(v, eBPorCH, i); \ + rw(v, eSIorDH, i); rw(v, eDIorBH, i); \ + rw(v, ES, i); rw(v, CS, i); \ + rw(v, SS, i); rw(v, DS, i); \ + rw(v, FS, i); rw(v, GS, i); + +template +template +uint32_t DataPointerResolver::effective_address( + RegistersT ®isters, + const Instruction &instruction, + DataPointer pointer) { + using AddressT = typename Instruction::AddressT; + AddressT base = 0, index = 0; + + if constexpr (has_base) { + switch(pointer.base()) { + default: break; + ALLREGS(base, false); + } + } + + switch(pointer.index()) { + default: break; + ALLREGS(index, false); + } + + uint32_t address = index; + if constexpr (model >= Model::i80386) { + address <<= pointer.scale(); + } else { + assert(!pointer.scale()); + } + + // Always compute address as 32-bit. + // TODO: verify use of memory_mask around here. + // Also I think possibly an exception is supposed to be generated + // if the programmer is in 32-bit mode and has asked for 16-bit + // address computation but generated e.g. a 17-bit result. Look into + // that when working on execution. For now the goal is merely decoding + // and this code exists both to verify the presence of all necessary + // fields and to help to explore the best breakdown of storage + // within Instruction. + constexpr uint32_t memory_masks[] = {0x0000'ffff, 0xffff'ffff}; + const uint32_t memory_mask = memory_masks[int(instruction.address_size())]; + address = (address & memory_mask) + (base & memory_mask) + instruction.displacement(); + return address; + } + +template +template void DataPointerResolver::access( + RegistersT ®isters, + MemoryT &memory, + const Instruction &instruction, + DataPointer pointer, + DataT &value) { + const Source source = pointer.source(); + + switch(source) { + default: + if constexpr (!is_write) { + value = 0; + } + return; + + ALLREGS(value, is_write); + + case Source::DirectAddress: + if constexpr(is_write) { + memory.template write(instruction.data_segment(), instruction.displacement(), value); + } else { + value = memory.template read(instruction.data_segment(), instruction.displacement()); + } + break; + case Source::Immediate: + value = DataT(instruction.operand()); + break; + +#define indirect(has_base) { \ + const auto address = effective_address \ + (registers, instruction, pointer); \ + \ + if constexpr (is_write) { \ + memory.template write( \ + instruction.data_segment(), \ + address, \ + value \ + ); \ + } else { \ + value = memory.template read( \ + instruction.data_segment(), \ + address \ + ); \ + } \ +} + case Source::IndirectNoBase: + indirect(false); + break; + + case Source::Indirect: + indirect(true); + break; +#undef indirect + + } + } +#undef ALLREGS +#undef rw + +} +} + +#endif /* DataPointerResolver_hpp */ diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp index 4414684d9..ae8447e14 100644 --- a/InstructionSets/x86/Decoder.cpp +++ b/InstructionSets/x86/Decoder.cpp @@ -14,11 +14,20 @@ using namespace InstructionSet::x86; -// Only 8086 is suppoted for now. -Decoder::Decoder(Model) {} - -std::pair Decoder::decode(const uint8_t *source, size_t length) { - const uint8_t *const end = source + length; +template +std::pair::InstructionT> Decoder::decode(const uint8_t *source, size_t length) { + // Instruction length limits: + // + // 8086/80186: none* + // 80286: 10 bytes + // 80386: 15 bytes + // + // * but, can treat internally as a limit of 65536 bytes — after that distance the IP will + // be back to wherever it started, so it's safe to spit out a NOP and reset parsing + // without any loss of context. This reduces the risk of the decoder tricking a caller into + // an infinite loop. + constexpr int max_instruction_length = model >= Model::i80386 ? 15 : (model == Model::i80286 ? 10 : 65536); + const uint8_t *const end = source + std::min(length, size_t(max_instruction_length - consumed_)); // MARK: - Prefixes (if present) and the opcode. @@ -39,105 +48,138 @@ std::pair Decoder::decode(const uint8_t * SetOpSrcDestSize(op, DirectAddress, dest, size); \ source_ = Source::Immediate; \ operand_size_ = size; \ - phase_ = Phase::AwaitingDisplacementOrOperand + phase_ = Phase::DisplacementOrOperand /// Handles instructions of the form Ax, jjkk where the latter is implicitly an address. #define RegAddr(op, dest, op_size, addr_size) \ SetOpSrcDestSize(op, DirectAddress, dest, op_size); \ operand_size_ = addr_size; \ - phase_ = Phase::AwaitingDisplacementOrOperand + phase_ = Phase::DisplacementOrOperand /// Handles instructions of the form jjkk, Ax where the former is implicitly an address. #define AddrReg(op, source, op_size, addr_size) \ SetOpSrcDestSize(op, source, DirectAddress, op_size); \ operand_size_ = addr_size; \ destination_ = Source::DirectAddress; \ - phase_ = Phase::AwaitingDisplacementOrOperand + phase_ = Phase::DisplacementOrOperand /// Covers both `mem/reg, reg` and `reg, mem/reg`. #define MemRegReg(op, format, size) \ operation_ = Operation::op; \ phase_ = Phase::ModRegRM; \ modregrm_format_ = ModRegRMFormat::format; \ - operand_size_ = 0; \ + operand_size_ = DataSize::None; \ operation_size_ = size -/// Handles JO, JNO, JB, etc — jumps with a single byte displacement. -#define Jump(op) \ +/// Handles JO, JNO, JB, etc — anything with only a displacement. +#define Displacement(op, size) \ operation_ = Operation::op; \ - phase_ = Phase::AwaitingDisplacementOrOperand; \ - displacement_size_ = 1 + phase_ = Phase::DisplacementOrOperand; \ + displacement_size_ = size -/// Handles far CALL and far JMP — fixed four byte operand operations. +/// Handles PUSH [immediate], etc — anything with only an immediate operand. +#define Immediate(op, size) \ + operation_ = Operation::op; \ + source_ = Source::Immediate; \ + phase_ = Phase::DisplacementOrOperand; \ + operand_size_ = size + +/// Handles far CALL and far JMP — fixed four or six byte operand operations. #define Far(op) \ operation_ = Operation::op; \ - phase_ = Phase::AwaitingDisplacementOrOperand; \ - operand_size_ = 4; \ + phase_ = Phase::DisplacementOrOperand; \ + operand_size_ = DataSize::Word; \ + displacement_size_ = data_size(default_address_size_) + +/// Handles ENTER — a fixed three-byte operation. +#define Displacement16Operand8(op) \ + operation_ = Operation::op; \ + phase_ = Phase::DisplacementOrOperand; \ + displacement_size_ = DataSize::Word; \ + operand_size_ = DataSize::Byte + +/// Sets up the operation size, oncoming phase and modregrm format for a member of the shift group (i.e. 'group 2'). +#define ShiftGroup() { \ + const DataSize sizes[] = {DataSize::Byte, data_size_}; \ + phase_ = Phase::ModRegRM; \ + modregrm_format_ = ModRegRMFormat::MemRegROL_to_SAR; \ + operation_size_ = sizes[instr & 1]; \ +} + +#define undefined() { \ + const auto result = std::make_pair(consumed_, InstructionT()); \ + reset_parsing(); \ + return result; \ +} + +#define Requires(x) if constexpr (model != Model::x) undefined(); +#define RequiresMin(x) if constexpr (model < Model::x) undefined(); while(phase_ == Phase::Instruction && source != end) { - // Retain the instruction byte, in case additional decoding is deferred - // to the ModRegRM byte. - instr_ = *source; + const uint8_t instr = *source; ++source; ++consumed_; - switch(instr_) { - default: { - const auto result = std::make_pair(consumed_, Instruction()); - reset_parsing(); - return result; - } + switch(instr) { + default: undefined(); -#define PartialBlock(start, operation) \ - case start + 0x00: MemRegReg(operation, MemReg_Reg, 1); break; \ - case start + 0x01: MemRegReg(operation, MemReg_Reg, 2); break; \ - case start + 0x02: MemRegReg(operation, Reg_MemReg, 1); break; \ - case start + 0x03: MemRegReg(operation, Reg_MemReg, 2); break; \ - case start + 0x04: RegData(operation, AL, 1); break; \ - case start + 0x05: RegData(operation, AX, 2) +#define PartialBlock(start, operation) \ + case start + 0x00: MemRegReg(operation, MemReg_Reg, DataSize::Byte); break; \ + case start + 0x01: MemRegReg(operation, MemReg_Reg, data_size_); break; \ + case start + 0x02: MemRegReg(operation, Reg_MemReg, DataSize::Byte); break; \ + case start + 0x03: MemRegReg(operation, Reg_MemReg, data_size_); break; \ + case start + 0x04: RegData(operation, eAX, DataSize::Byte); break; \ + case start + 0x05: RegData(operation, eAX, data_size_) - PartialBlock(0x00, ADD); break; - case 0x06: Complete(PUSH, ES, None, 2); break; - case 0x07: Complete(POP, None, ES, 2); break; + PartialBlock(0x00, ADD); break; + case 0x06: Complete(PUSH, ES, None, data_size_); break; + case 0x07: Complete(POP, None, ES, data_size_); break; - PartialBlock(0x08, OR); break; - case 0x0e: Complete(PUSH, CS, None, 2); break; + PartialBlock(0x08, OR); break; + case 0x0e: Complete(PUSH, CS, None, data_size_); break; - PartialBlock(0x10, ADC); break; - case 0x16: Complete(PUSH, SS, None, 2); break; - case 0x17: Complete(POP, None, SS, 2); break; + // The 286 onwards have a further set of instructions + // prefixed with $0f. + case 0x0f: + RequiresMin(i80286); + phase_ = Phase::InstructionPageF; + break; - PartialBlock(0x18, SBB); break; - case 0x1e: Complete(PUSH, DS, None, 2); break; - case 0x1f: Complete(POP, None, DS, 2); break; + PartialBlock(0x10, ADC); break; + case 0x16: Complete(PUSH, SS, None, DataSize::Word); break; + case 0x17: Complete(POP, None, SS, DataSize::Word); break; - PartialBlock(0x20, AND); break; - case 0x26: segment_override_ = Source::ES; break; - case 0x27: Complete(DAA, AL, AL, 1); break; + PartialBlock(0x18, SBB); break; + case 0x1e: Complete(PUSH, DS, None, DataSize::Word); break; + case 0x1f: Complete(POP, None, DS, DataSize::Word); break; - PartialBlock(0x28, SUB); break; - case 0x2e: segment_override_ = Source::CS; break; - case 0x2f: Complete(DAS, AL, AL, 1); break; + PartialBlock(0x20, AND); break; + case 0x26: segment_override_ = Source::ES; break; + case 0x27: Complete(DAA, eAX, eAX, DataSize::Byte); break; - PartialBlock(0x30, XOR); break; - case 0x36: segment_override_ = Source::SS; break; - case 0x37: Complete(AAA, AL, AX, 1); break; + PartialBlock(0x28, SUB); break; + case 0x2e: segment_override_ = Source::CS; break; + case 0x2f: Complete(DAS, eAX, eAX, DataSize::Byte); break; - PartialBlock(0x38, CMP); break; - case 0x3e: segment_override_ = Source::DS; break; - case 0x3f: Complete(AAS, AL, AX, 1); break; + PartialBlock(0x30, XOR); break; + case 0x36: segment_override_ = Source::SS; break; + case 0x37: Complete(AAA, eAX, eAX, DataSize::Word); break; + + PartialBlock(0x38, CMP); break; + case 0x3e: segment_override_ = Source::DS; break; + case 0x3f: Complete(AAS, eAX, eAX, DataSize::Word); break; #undef PartialBlock -#define RegisterBlock(start, operation) \ - case start + 0x00: Complete(operation, AX, AX, 2); break; \ - case start + 0x01: Complete(operation, CX, CX, 2); break; \ - case start + 0x02: Complete(operation, DX, DX, 2); break; \ - case start + 0x03: Complete(operation, BX, BX, 2); break; \ - case start + 0x04: Complete(operation, SP, SP, 2); break; \ - case start + 0x05: Complete(operation, BP, BP, 2); break; \ - case start + 0x06: Complete(operation, SI, SI, 2); break; \ - case start + 0x07: Complete(operation, DI, DI, 2) +#define RegisterBlock(start, operation) \ + case start + 0x00: Complete(operation, eAX, eAX, data_size_); break; \ + case start + 0x01: Complete(operation, eCX, eCX, data_size_); break; \ + case start + 0x02: Complete(operation, eDX, eDX, data_size_); break; \ + case start + 0x03: Complete(operation, eBX, eBX, data_size_); break; \ + case start + 0x04: Complete(operation, eSP, eSP, data_size_); break; \ + case start + 0x05: Complete(operation, eBP, eBP, data_size_); break; \ + case start + 0x06: Complete(operation, eSI, eSI, data_size_); break; \ + case start + 0x07: Complete(operation, eDI, eDI, data_size_) RegisterBlock(0x40, INC); break; RegisterBlock(0x48, DEC); break; @@ -146,182 +188,414 @@ std::pair Decoder::decode(const uint8_t * #undef RegisterBlock - // 0x60–0x6f: not used. + case 0x60: + RequiresMin(i80186); + Complete(PUSHA, None, None, data_size_); + break; + case 0x61: + RequiresMin(i80186); + Complete(POPA, None, None, data_size_); + break; + case 0x62: + RequiresMin(i80186); + MemRegReg(BOUND, Reg_MemReg, data_size_); + break; + case 0x63: + RequiresMin(i80286); + MemRegReg(ARPL, MemReg_Reg, DataSize::Word); + break; + case 0x64: + RequiresMin(i80386); + segment_override_ = Source::FS; + break; + case 0x65: + RequiresMin(i80386); + segment_override_ = Source::GS; + break; + case 0x66: + RequiresMin(i80386); + data_size_ = DataSize(int(default_data_size_) ^ int(DataSize::Word) ^ int(DataSize::DWord)); + break; + case 0x67: + RequiresMin(i80386); + address_size_ = AddressSize(int(default_address_size_) ^ int(AddressSize::b16) ^ int(AddressSize::b32)); + break; + case 0x68: + RequiresMin(i80286); + Immediate(PUSH, data_size_); + operation_size_ = data_size_; + break; + case 0x69: + RequiresMin(i80286); + MemRegReg(IMUL_3, Reg_MemReg, data_size_); + operand_size_ = data_size_; + break; + case 0x6a: + RequiresMin(i80286); + Immediate(PUSH, DataSize::Byte); + break; + case 0x6b: + RequiresMin(i80286); + MemRegReg(IMUL_3, Reg_MemReg, data_size_); + operand_size_ = DataSize::Byte; + sign_extend_ = true; + break; + case 0x6c: // INSB + RequiresMin(i80186); + Complete(INS, None, None, DataSize::Byte); + break; + case 0x6d: // INSW/INSD + RequiresMin(i80186); + Complete(INS, None, None, data_size_); + break; + case 0x6e: // OUTSB + RequiresMin(i80186); + Complete(OUTS, None, None, DataSize::Byte); + break; + case 0x6f: // OUTSW/OUSD + RequiresMin(i80186); + Complete(OUTS, None, None, data_size_); + break; - case 0x70: Jump(JO); break; - case 0x71: Jump(JNO); break; - case 0x72: Jump(JB); break; - case 0x73: Jump(JNB); break; - case 0x74: Jump(JE); break; - case 0x75: Jump(JNE); break; - case 0x76: Jump(JBE); break; - case 0x77: Jump(JNBE); break; - case 0x78: Jump(JS); break; - case 0x79: Jump(JNS); break; - case 0x7a: Jump(JP); break; - case 0x7b: Jump(JNP); break; - case 0x7c: Jump(JL); break; - case 0x7d: Jump(JNL); break; - case 0x7e: Jump(JLE); break; - case 0x7f: Jump(JNLE); break; + case 0x70: Displacement(JO, DataSize::Byte); break; + case 0x71: Displacement(JNO, DataSize::Byte); break; + case 0x72: Displacement(JB, DataSize::Byte); break; + case 0x73: Displacement(JNB, DataSize::Byte); break; + case 0x74: Displacement(JE, DataSize::Byte); break; + case 0x75: Displacement(JNE, DataSize::Byte); break; + case 0x76: Displacement(JBE, DataSize::Byte); break; + case 0x77: Displacement(JNBE, DataSize::Byte); break; + case 0x78: Displacement(JS, DataSize::Byte); break; + case 0x79: Displacement(JNS, DataSize::Byte); break; + case 0x7a: Displacement(JP, DataSize::Byte); break; + case 0x7b: Displacement(JNP, DataSize::Byte); break; + case 0x7c: Displacement(JL, DataSize::Byte); break; + case 0x7d: Displacement(JNL, DataSize::Byte); break; + case 0x7e: Displacement(JLE, DataSize::Byte); break; + case 0x7f: Displacement(JNLE, DataSize::Byte); break; - case 0x80: MemRegReg(Invalid, MemRegADD_to_CMP, 1); break; - case 0x81: MemRegReg(Invalid, MemRegADD_to_CMP, 2); break; - case 0x82: MemRegReg(Invalid, MemRegADC_to_CMP, 1); break; - case 0x83: MemRegReg(Invalid, MemRegADC_to_CMP, 2); break; + case 0x80: MemRegReg(Invalid, MemRegADD_to_CMP, DataSize::Byte); break; + case 0x81: MemRegReg(Invalid, MemRegADD_to_CMP, data_size_); break; + case 0x82: MemRegReg(Invalid, MemRegADD_to_CMP_SignExtend, DataSize::Byte); break; + case 0x83: MemRegReg(Invalid, MemRegADD_to_CMP_SignExtend, data_size_); break; - case 0x84: MemRegReg(TEST, MemReg_Reg, 1); break; - case 0x85: MemRegReg(TEST, MemReg_Reg, 2); break; - case 0x86: MemRegReg(XCHG, Reg_MemReg, 1); break; - case 0x87: MemRegReg(XCHG, Reg_MemReg, 2); break; - case 0x88: MemRegReg(MOV, MemReg_Reg, 1); break; - case 0x89: MemRegReg(MOV, MemReg_Reg, 2); break; - case 0x8a: MemRegReg(MOV, Reg_MemReg, 1); break; - case 0x8b: MemRegReg(MOV, Reg_MemReg, 2); break; - // 0x8c: not used. - case 0x8d: MemRegReg(LEA, Reg_MemReg, 2); break; - case 0x8e: MemRegReg(MOV, SegReg, 2); break; - case 0x8f: MemRegReg(POP, MemRegPOP, 2); break; + case 0x84: MemRegReg(TEST, MemReg_Reg, DataSize::Byte); break; + case 0x85: MemRegReg(TEST, MemReg_Reg, data_size_); break; + case 0x86: MemRegReg(XCHG, Reg_MemReg, DataSize::Byte); break; + case 0x87: MemRegReg(XCHG, Reg_MemReg, data_size_); break; + case 0x88: MemRegReg(MOV, MemReg_Reg, DataSize::Byte); break; + case 0x89: MemRegReg(MOV, MemReg_Reg, data_size_); break; + case 0x8a: MemRegReg(MOV, Reg_MemReg, DataSize::Byte); break; + case 0x8b: MemRegReg(MOV, Reg_MemReg, data_size_); break; + case 0x8c: MemRegReg(MOV, MemReg_Seg, DataSize::Word); break; + case 0x8d: MemRegReg(LEA, Reg_MemReg, data_size_); break; + case 0x8e: MemRegReg(MOV, Seg_MemReg, DataSize::Word); break; + case 0x8f: MemRegReg(POP, MemRegSingleOperand, data_size_); break; - case 0x90: Complete(NOP, None, None, 0); break; // Or XCHG AX, AX? - case 0x91: Complete(XCHG, AX, CX, 2); break; - case 0x92: Complete(XCHG, AX, DX, 2); break; - case 0x93: Complete(XCHG, AX, BX, 2); break; - case 0x94: Complete(XCHG, AX, SP, 2); break; - case 0x95: Complete(XCHG, AX, BP, 2); break; - case 0x96: Complete(XCHG, AX, SI, 2); break; - case 0x97: Complete(XCHG, AX, DI, 2); break; + case 0x90: Complete(NOP, None, None, DataSize::None); break; // Or XCHG AX, AX? + case 0x91: Complete(XCHG, eAX, eCX, data_size_); break; + case 0x92: Complete(XCHG, eAX, eDX, data_size_); break; + case 0x93: Complete(XCHG, eAX, eBX, data_size_); break; + case 0x94: Complete(XCHG, eAX, eSP, data_size_); break; + case 0x95: Complete(XCHG, eAX, eBP, data_size_); break; + case 0x96: Complete(XCHG, eAX, eSI, data_size_); break; + case 0x97: Complete(XCHG, eAX, eDI, data_size_); break; - case 0x98: Complete(CBW, AL, AH, 1); break; - case 0x99: Complete(CWD, AX, DX, 2); break; - case 0x9a: Far(CALLF); break; - case 0x9b: Complete(WAIT, None, None, 0); break; - case 0x9c: Complete(PUSHF, None, None, 2); break; - case 0x9d: Complete(POPF, None, None, 2); break; - case 0x9e: Complete(SAHF, None, None, 1); break; - case 0x9f: Complete(LAHF, None, None, 1); break; + case 0x98: Complete(CBW, eAX, AH, data_size_); break; + case 0x99: Complete(CWD, eAX, eDX, data_size_); break; + case 0x9a: Far(CALLfar); break; + case 0x9b: Complete(WAIT, None, None, DataSize::None); break; + case 0x9c: Complete(PUSHF, None, None, data_size_); break; + case 0x9d: Complete(POPF, None, None, data_size_); break; + case 0x9e: Complete(SAHF, None, None, DataSize::Byte); break; + case 0x9f: Complete(LAHF, None, None, DataSize::Byte); break; - case 0xa0: RegAddr(MOV, AL, 1, 1); break; - case 0xa1: RegAddr(MOV, AX, 2, 2); break; - case 0xa2: AddrReg(MOV, AL, 1, 1); break; - case 0xa3: AddrReg(MOV, AX, 2, 2); break; + case 0xa0: RegAddr(MOV, eAX, DataSize::Byte, data_size(address_size_)); break; + case 0xa1: RegAddr(MOV, eAX, data_size_, data_size(address_size_)); break; + case 0xa2: AddrReg(MOV, eAX, DataSize::Byte, data_size(address_size_)); break; + case 0xa3: AddrReg(MOV, eAX, data_size_, data_size(address_size_)); break; - case 0xa4: Complete(MOVS, None, None, 1); break; - case 0xa5: Complete(MOVS, None, None, 2); break; - case 0xa6: Complete(CMPS, None, None, 1); break; - case 0xa7: Complete(CMPS, None, None, 2); break; - case 0xa8: RegData(TEST, AL, 1); break; - case 0xa9: RegData(TEST, AX, 2); break; - case 0xaa: Complete(STOS, None, None, 1); break; - case 0xab: Complete(STOS, None, None, 2); break; - case 0xac: Complete(LODS, None, None, 1); break; - case 0xad: Complete(LODS, None, None, 2); break; - case 0xae: Complete(SCAS, None, None, 1); break; - case 0xaf: Complete(SCAS, None, None, 2); break; + case 0xa4: Complete(MOVS, None, None, DataSize::Byte); break; + case 0xa5: Complete(MOVS, None, None, data_size_); break; + case 0xa6: Complete(CMPS, None, None, DataSize::Byte); break; + case 0xa7: Complete(CMPS, None, None, data_size_); break; + case 0xa8: RegData(TEST, eAX, DataSize::Byte); break; + case 0xa9: RegData(TEST, eAX, data_size_); break; + case 0xaa: Complete(STOS, None, None, DataSize::Byte); break; + case 0xab: Complete(STOS, None, None, data_size_); break; + case 0xac: Complete(LODS, None, None, DataSize::Byte); break; + case 0xad: Complete(LODS, None, None, data_size_); break; + case 0xae: Complete(SCAS, None, None, DataSize::Byte); break; + case 0xaf: Complete(SCAS, None, None, data_size_); break; - case 0xb0: RegData(MOV, AL, 1); break; - case 0xb1: RegData(MOV, CL, 1); break; - case 0xb2: RegData(MOV, DL, 1); break; - case 0xb3: RegData(MOV, BL, 1); break; - case 0xb4: RegData(MOV, AH, 1); break; - case 0xb5: RegData(MOV, CH, 1); break; - case 0xb6: RegData(MOV, DH, 1); break; - case 0xb7: RegData(MOV, BH, 1); break; - case 0xb8: RegData(MOV, AX, 2); break; - case 0xb9: RegData(MOV, CX, 2); break; - case 0xba: RegData(MOV, DX, 2); break; - case 0xbb: RegData(MOV, BX, 2); break; - case 0xbc: RegData(MOV, SP, 2); break; - case 0xbd: RegData(MOV, BP, 2); break; - case 0xbe: RegData(MOV, SI, 2); break; - case 0xbf: RegData(MOV, DI, 2); break; + case 0xb0: RegData(MOV, eAX, DataSize::Byte); break; + case 0xb1: RegData(MOV, eCX, DataSize::Byte); break; + case 0xb2: RegData(MOV, eDX, DataSize::Byte); break; + case 0xb3: RegData(MOV, eBX, DataSize::Byte); break; + case 0xb4: RegData(MOV, AH, DataSize::Byte); break; + case 0xb5: RegData(MOV, CH, DataSize::Byte); break; + case 0xb6: RegData(MOV, DH, DataSize::Byte); break; + case 0xb7: RegData(MOV, BH, DataSize::Byte); break; + case 0xb8: RegData(MOV, eAX, data_size_); break; + case 0xb9: RegData(MOV, eCX, data_size_); break; + case 0xba: RegData(MOV, eDX, data_size_); break; + case 0xbb: RegData(MOV, eBX, data_size_); break; + case 0xbc: RegData(MOV, eSP, data_size_); break; + case 0xbd: RegData(MOV, eBP, data_size_); break; + case 0xbe: RegData(MOV, eSI, data_size_); break; + case 0xbf: RegData(MOV, eDI, data_size_); break; - case 0xc2: RegData(RETN, None, 2); break; - case 0xc3: Complete(RETN, None, None, 2); break; - case 0xc4: MemRegReg(LES, Reg_MemReg, 2); break; - case 0xc5: MemRegReg(LDS, Reg_MemReg, 2); break; - case 0xc6: MemRegReg(MOV, MemRegMOV, 1); break; - case 0xc7: MemRegReg(MOV, MemRegMOV, 2); break; + case 0xc0: case 0xc1: + RequiresMin(i80186); + ShiftGroup(); + source_ = Source::Immediate; + operand_size_ = DataSize::Byte; + break; + case 0xc2: RegData(RETnear, None, data_size_); break; + case 0xc3: Complete(RETnear, None, None, DataSize::None); break; + case 0xc4: MemRegReg(LES, Reg_MemReg, data_size_); break; + case 0xc5: MemRegReg(LDS, Reg_MemReg, data_size_); break; + case 0xc6: MemRegReg(MOV, MemRegMOV, DataSize::Byte); break; + case 0xc7: MemRegReg(MOV, MemRegMOV, data_size_); break; - case 0xca: RegData(RETF, None, 2); break; - case 0xcb: Complete(RETF, None, None, 4); break; + case 0xc8: + RequiresMin(i80186); + Displacement16Operand8(ENTER); + break; + case 0xc9: + RequiresMin(i80186); + Complete(LEAVE, None, None, DataSize::None); + break; - case 0xcc: Complete(INT3, None, None, 0); break; - case 0xcd: RegData(INT, None, 1); break; - case 0xce: Complete(INTO, None, None, 0); break; - case 0xcf: Complete(IRET, None, None, 0); break; + case 0xca: RegData(RETfar, None, data_size_); break; + case 0xcb: Complete(RETfar, None, None, DataSize::DWord); break; + + case 0xcc: + // Encode INT3 as though it were INT with an + // immediate operand of 3. + Complete(INT, Immediate, None, DataSize::Byte); + operand_ = 3; + break; + case 0xcd: RegData(INT, None, DataSize::Byte); break; + case 0xce: Complete(INTO, None, None, DataSize::None); break; + case 0xcf: Complete(IRET, None, None, DataSize::None); break; case 0xd0: case 0xd1: - phase_ = Phase::ModRegRM; - modregrm_format_ = ModRegRMFormat::MemRegROL_to_SAR; - operation_size_ = 1 + (instr_ & 1); + ShiftGroup(); source_ = Source::Immediate; operand_ = 1; break; case 0xd2: case 0xd3: - phase_ = Phase::ModRegRM; - modregrm_format_ = ModRegRMFormat::MemRegROL_to_SAR; - operation_size_ = 1 + (instr_ & 1); - source_ = Source::CL; + ShiftGroup(); + source_ = Source::eCX; break; - case 0xd4: RegData(AAM, AX, 1); break; - case 0xd5: RegData(AAD, AX, 1); break; + case 0xd4: RegData(AAM, eAX, DataSize::Byte); break; + case 0xd5: RegData(AAD, eAX, DataSize::Byte); break; + // Unused: 0xd6. + case 0xd7: Complete(XLAT, None, None, DataSize::Byte); break; - case 0xd7: Complete(XLAT, None, None, 1); break; + case 0xd8: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; + case 0xd9: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; + case 0xda: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; + case 0xdb: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; + case 0xdc: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; + case 0xdd: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; + case 0xde: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; + case 0xdf: MemRegReg(ESC, MemReg_Reg, DataSize::None); break; - case 0xd8: MemRegReg(ESC, MemReg_Reg, 0); break; - case 0xd9: MemRegReg(ESC, MemReg_Reg, 0); break; - case 0xda: MemRegReg(ESC, MemReg_Reg, 0); break; - case 0xdb: MemRegReg(ESC, MemReg_Reg, 0); break; - case 0xdc: MemRegReg(ESC, MemReg_Reg, 0); break; - case 0xdd: MemRegReg(ESC, MemReg_Reg, 0); break; - case 0xde: MemRegReg(ESC, MemReg_Reg, 0); break; - case 0xdf: MemRegReg(ESC, MemReg_Reg, 0); break; + case 0xe0: Displacement(LOOPNE, DataSize::Byte); break; + case 0xe1: Displacement(LOOPE, DataSize::Byte); break; + case 0xe2: Displacement(LOOP, DataSize::Byte); break; + case 0xe3: Displacement(JPCX, DataSize::Byte); break; - case 0xe0: Jump(LOOPNE); break; - case 0xe1: Jump(LOOPE); break; - case 0xe2: Jump(LOOP); break; - case 0xe3: Jump(JPCX); break; + case 0xe4: RegAddr(IN, eAX, DataSize::Byte, DataSize::Byte); break; + case 0xe5: RegAddr(IN, eAX, data_size_, DataSize::Byte); break; + case 0xe6: AddrReg(OUT, eAX, DataSize::Byte, DataSize::Byte); break; + case 0xe7: AddrReg(OUT, eAX, data_size_, DataSize::Byte); break; - case 0xe4: RegAddr(IN, AL, 1, 1); break; - case 0xe5: RegAddr(IN, AX, 2, 1); break; - case 0xe6: AddrReg(OUT, AL, 1, 1); break; - case 0xe7: AddrReg(OUT, AX, 2, 1); break; + case 0xe8: Displacement(CALLrel, data_size_); break; + case 0xe9: Displacement(JMPrel, data_size_); break; + case 0xea: Far(JMPfar); break; + case 0xeb: Displacement(JMPrel, DataSize::Byte); break; - case 0xe8: RegData(CALLD, None, 2); break; - case 0xe9: RegData(JMPN, None, 2); break; - case 0xea: Far(JMPF); break; - case 0xeb: Jump(JMPN); break; + case 0xec: Complete(IN, eDX, eAX, DataSize::Byte); break; + case 0xed: Complete(IN, eDX, eAX, data_size_); break; + case 0xee: Complete(OUT, eAX, eDX, DataSize::Byte); break; + case 0xef: Complete(OUT, eAX, eDX, data_size_); break; - case 0xec: Complete(IN, DX, AL, 1); break; - case 0xed: Complete(IN, DX, AX, 1); break; - case 0xee: Complete(OUT, AL, DX, 1); break; - case 0xef: Complete(OUT, AX, DX, 2); break; + case 0xf0: lock_ = true; break; + // Unused: 0xf1 + case 0xf2: repetition_ = Repetition::RepNE; break; + case 0xf3: repetition_ = Repetition::RepE; break; - case 0xf4: Complete(HLT, None, None, 1); break; - case 0xf5: Complete(CMC, None, None, 1); break; - case 0xf6: MemRegReg(Invalid, MemRegTEST_to_IDIV, 1); break; - case 0xf7: MemRegReg(Invalid, MemRegTEST_to_IDIV, 2); break; + case 0xf4: Complete(HLT, None, None, DataSize::None); break; + case 0xf5: Complete(CMC, None, None, DataSize::None); break; + case 0xf6: MemRegReg(Invalid, MemRegTEST_to_IDIV, DataSize::Byte); break; + case 0xf7: MemRegReg(Invalid, MemRegTEST_to_IDIV, data_size_); break; - case 0xf8: Complete(CLC, None, None, 1); break; - case 0xf9: Complete(STC, None, None, 1); break; - case 0xfa: Complete(CLI, None, None, 1); break; - case 0xfb: Complete(STI, None, None, 1); break; - case 0xfc: Complete(CLD, None, None, 1); break; - case 0xfd: Complete(STD, None, None, 1); break; + case 0xf8: Complete(CLC, None, None, DataSize::None); break; + case 0xf9: Complete(STC, None, None, DataSize::None); break; + case 0xfa: Complete(CLI, None, None, DataSize::None); break; + case 0xfb: Complete(STI, None, None, DataSize::None); break; + case 0xfc: Complete(CLD, None, None, DataSize::None); break; + case 0xfd: Complete(STD, None, None, DataSize::None); break; - case 0xfe: MemRegReg(Invalid, MemRegINC_DEC, 1); break; - case 0xff: MemRegReg(Invalid, MemRegINC_to_PUSH, 1); break; - - // Other prefix bytes. - case 0xf0: lock_ = true; break; - case 0xf2: repetition_ = Repetition::RepNE; break; - case 0xf3: repetition_ = Repetition::RepE; break; + case 0xfe: MemRegReg(Invalid, MemRegINC_DEC, DataSize::Byte); break; + case 0xff: MemRegReg(Invalid, MemRegINC_to_PUSH, data_size_); break; } } + // MARK: - Additional F page of instructions. + + if(phase_ == Phase::InstructionPageF && source != end) { + // Update the instruction acquired. + const uint8_t instr = *source; + ++source; + ++consumed_; + + // NB: to reach here, the instruction set must be at least + // that of an 80286. + switch(instr) { + default: undefined(); + + case 0x00: MemRegReg(Invalid, MemRegSLDT_to_VERW, data_size_); break; + case 0x01: MemRegReg(Invalid, MemRegSGDT_to_LMSW, data_size_); break; + case 0x02: MemRegReg(LAR, Reg_MemReg, data_size_); break; + case 0x03: MemRegReg(LSL, Reg_MemReg, data_size_); break; + case 0x05: + Requires(i80286); + Complete(LOADALL, None, None, DataSize::None); + break; + case 0x06: Complete(CLTS, None, None, DataSize::Byte); break; + + case 0x20: + RequiresMin(i80386); + MemRegReg(MOVfromCr, Reg_MemReg, DataSize::DWord); + break; + case 0x21: + RequiresMin(i80386); + MemRegReg(MOVfromDr, Reg_MemReg, DataSize::DWord); + break; + case 0x22: + RequiresMin(i80386); + MemRegReg(MOVtoCr, Reg_MemReg, DataSize::DWord); + break; + case 0x23: + RequiresMin(i80386); + MemRegReg(MOVtoDr, Reg_MemReg, DataSize::DWord); + break; + case 0x24: + RequiresMin(i80386); + MemRegReg(MOVfromTr, Reg_MemReg, DataSize::DWord); + break; + case 0x26: + RequiresMin(i80386); + MemRegReg(MOVtoTr, Reg_MemReg, DataSize::DWord); + break; + + case 0x70: RequiresMin(i80386); Displacement(JO, data_size_); break; + case 0x71: RequiresMin(i80386); Displacement(JNO, data_size_); break; + case 0x72: RequiresMin(i80386); Displacement(JB, data_size_); break; + case 0x73: RequiresMin(i80386); Displacement(JNB, data_size_); break; + case 0x74: RequiresMin(i80386); Displacement(JE, data_size_); break; + case 0x75: RequiresMin(i80386); Displacement(JNE, data_size_); break; + case 0x76: RequiresMin(i80386); Displacement(JBE, data_size_); break; + case 0x77: RequiresMin(i80386); Displacement(JNBE, data_size_); break; + case 0x78: RequiresMin(i80386); Displacement(JS, data_size_); break; + case 0x79: RequiresMin(i80386); Displacement(JNS, data_size_); break; + case 0x7a: RequiresMin(i80386); Displacement(JP, data_size_); break; + case 0x7b: RequiresMin(i80386); Displacement(JNP, data_size_); break; + case 0x7c: RequiresMin(i80386); Displacement(JL, data_size_); break; + case 0x7d: RequiresMin(i80386); Displacement(JNL, data_size_); break; + case 0x7e: RequiresMin(i80386); Displacement(JLE, data_size_); break; + case 0x7f: RequiresMin(i80386); Displacement(JNLE, data_size_); break; + +#define Set(x) \ + RequiresMin(i80386); \ + MemRegReg(SET##x, MemRegSingleOperand, DataSize::Byte); + + case 0x90: Set(O); break; + case 0x91: Set(NO); break; + case 0x92: Set(B); break; + case 0x93: Set(NB); break; + case 0x94: Set(Z); break; + case 0x95: Set(NZ); break; + case 0x96: Set(BE); break; + case 0x97: Set(NBE); break; + case 0x98: Set(S); break; + case 0x99: Set(NS); break; + case 0x9a: Set(P); break; + case 0x9b: Set(NP); break; + case 0x9c: Set(L); break; + case 0x9d: Set(NL); break; + case 0x9e: Set(LE); break; + case 0x9f: Set(NLE); break; + +#undef Set + + case 0xa0: RequiresMin(i80386); Complete(PUSH, FS, None, data_size_); break; + case 0xa1: RequiresMin(i80386); Complete(POP, FS, None, data_size_); break; + case 0xa3: RequiresMin(i80386); MemRegReg(BT, MemReg_Reg, data_size_); break; + case 0xa4: + RequiresMin(i80386); + MemRegReg(SHLDimm, Reg_MemReg, data_size_); + operand_size_ = DataSize::Byte; + break; + case 0xa5: + RequiresMin(i80386); + MemRegReg(SHLDCL, MemReg_Reg, data_size_); + break; + case 0xa8: RequiresMin(i80386); Complete(PUSH, GS, None, data_size_); break; + case 0xa9: RequiresMin(i80386); Complete(POP, GS, None, data_size_); break; + case 0xab: RequiresMin(i80386); MemRegReg(BTS, MemReg_Reg, data_size_); break; + case 0xac: + RequiresMin(i80386); + MemRegReg(SHRDimm, Reg_MemReg, data_size_); + operand_size_ = DataSize::Byte; + break; + case 0xad: + RequiresMin(i80386); + MemRegReg(SHRDCL, MemReg_Reg, data_size_); + break; + case 0xaf: + RequiresMin(i80386); + MemRegReg(IMUL_2, Reg_MemReg, data_size_); + break; + + case 0xb2: RequiresMin(i80386); MemRegReg(LSS, Reg_MemReg, data_size_); break; + case 0xb3: RequiresMin(i80386); MemRegReg(BTR, MemReg_Reg, data_size_); break; + case 0xb4: RequiresMin(i80386); MemRegReg(LFS, Reg_MemReg, data_size_); break; + case 0xb5: RequiresMin(i80386); MemRegReg(LGS, Reg_MemReg, data_size_); break; + case 0xb6: + RequiresMin(i80386); + MemRegReg(MOVZX, Reg_MemReg, DataSize::Byte); + break; + case 0xb7: + RequiresMin(i80386); + MemRegReg(MOVZX, Reg_MemReg, DataSize::Word); + break; + case 0xba: RequiresMin(i80386); MemRegReg(Invalid, MemRegBT_to_BTC, data_size_); break; + case 0xbb: RequiresMin(i80386); MemRegReg(BTC, MemReg_Reg, data_size_); break; + case 0xbc: RequiresMin(i80386); MemRegReg(BSF, MemReg_Reg, data_size_); break; + case 0xbd: RequiresMin(i80386); MemRegReg(BSR, MemReg_Reg, data_size_); break; + case 0xbe: + RequiresMin(i80386); + MemRegReg(MOVSX, Reg_MemReg, DataSize::Byte); + break; + case 0xbf: + RequiresMin(i80386); + MemRegReg(MOVSX, Reg_MemReg, DataSize::Word); + break; + } + } + +#undef Requires +#undef RequiresMin +#undef ShiftGroup +#undef Displacement16Operand8 #undef Far -#undef Jump +#undef Immediate +#undef Displacement #undef MemRegReg #undef AddrReg #undef RegAddr @@ -335,54 +609,68 @@ std::pair Decoder::decode(const uint8_t * const uint8_t mod = *source >> 6; // i.e. mode. const uint8_t reg = (*source >> 3) & 7; // i.e. register. const uint8_t rm = *source & 7; // i.e. register/memory. + bool expects_sib = false; ++source; ++consumed_; Source memreg; - constexpr Source reg_table[3][8] = { - {}, - { - Source::AL, Source::CL, Source::DL, Source::BL, - Source::AH, Source::CH, Source::DH, Source::BH, - }, { - Source::AX, Source::CX, Source::DX, Source::BX, - Source::SP, Source::BP, Source::SI, Source::DI, - } + + // These tables are fairly redundant due to the register ordering within + // Source, but act to improve readability and permit further Source + // reordering in the future. + constexpr Source reg_table[8] = { + Source::eAX, Source::eCX, Source::eDX, Source::eBX, + Source::eSPorAH, Source::eBPorCH, Source::eSIorDH, Source::eDIorBH, + }; + constexpr Source seg_table[6] = { + Source::ES, Source::CS, Source::SS, Source::DS, Source::FS, Source::GS }; - switch(mod) { - case 0: { - constexpr Source rm_table[8] = { - Source::IndBXPlusSI, Source::IndBXPlusDI, - Source::IndBPPlusSI, Source::IndBPPlusDI, - Source::IndSI, Source::IndDI, - Source::DirectAddress, Source::IndBX, - }; - memreg = rm_table[rm]; - } break; - - default: { - constexpr Source rm_table[8] = { - Source::IndBXPlusSI, Source::IndBXPlusDI, - Source::IndBPPlusSI, Source::IndBPPlusDI, - Source::IndSI, Source::IndDI, - Source::IndBP, Source::IndBX, - }; - memreg = rm_table[rm]; - - displacement_size_ = 1 + (mod == 2); - } break; + // Mode 3 is the same regardless of 16/32-bit mode. So deal with that up front. + if(mod == 3) { // Other operand is just a register. - case 3: - memreg = reg_table[operation_size_][rm]; + memreg = reg_table[rm]; - // LES and LDS accept a memory argument only, not a register. - if(operation_ == Operation::LES || operation_ == Operation::LDS) { - const auto result = std::make_pair(consumed_, Instruction()); - reset_parsing(); - return result; - } - break; + // LES, LDS, etc accept a memory argument only, not a register. + if( + operation_ == Operation::LES || + operation_ == Operation::LDS || + operation_ == Operation::LGS || + operation_ == Operation::LSS || + operation_ == Operation::LFS) { + undefined(); + } + } else { + const DataSize sizes[] = { + DataSize::None, + DataSize::Byte, + data_size(address_size_) + }; + displacement_size_ = sizes[mod]; + memreg = Source::Indirect; + + if(address_size_ == AddressSize::b32) { + // 32-bit decoding: the range of potential indirections is expanded, + // and may segue into obtaining a SIB. + sib_ = ScaleIndexBase(0, Source::None, reg_table[rm]); + expects_sib = rm == 4; // Indirect via eSP isn't directly supported; it's the + // escape indicator for reading a SIB. + } else { + // Classic 16-bit decoding: mode picks a displacement size, + // and a few fixed index+base pairs are defined. + constexpr ScaleIndexBase rm_table[8] = { + ScaleIndexBase(0, Source::eBX, Source::eSI), + ScaleIndexBase(0, Source::eBX, Source::eDI), + ScaleIndexBase(0, Source::eBP, Source::eSI), + ScaleIndexBase(0, Source::eBP, Source::eDI), + ScaleIndexBase(0, Source::None, Source::eSI), + ScaleIndexBase(0, Source::None, Source::eDI), + ScaleIndexBase(0, Source::None, Source::eBP), + ScaleIndexBase(0, Source::None, Source::eBX), + }; + + sib_ = rm_table[rm]; + } } switch(modregrm_format_) { @@ -390,9 +678,9 @@ std::pair Decoder::decode(const uint8_t * case ModRegRMFormat::MemReg_Reg: { if(modregrm_format_ == ModRegRMFormat::Reg_MemReg) { source_ = memreg; - destination_ = reg_table[operation_size_][reg]; + destination_ = reg_table[reg]; } else { - source_ = reg_table[operation_size_][reg]; + source_ = reg_table[reg]; destination_ = memreg; } } break; @@ -401,55 +689,54 @@ std::pair Decoder::decode(const uint8_t * source_ = destination_ = memreg; switch(reg) { - default: { - const auto result = std::make_pair(consumed_, Instruction()); - reset_parsing(); - return result; - } + default: undefined(); case 0: operation_ = Operation::TEST; break; case 2: operation_ = Operation::NOT; break; case 3: operation_ = Operation::NEG; break; case 4: operation_ = Operation::MUL; break; - case 5: operation_ = Operation::IMUL; break; + case 5: operation_ = Operation::IMUL_1; break; case 6: operation_ = Operation::DIV; break; case 7: operation_ = Operation::IDIV; break; } break; - case ModRegRMFormat::SegReg: { - source_ = memreg; - - constexpr Source seg_table[4] = { - Source::ES, Source::CS, - Source::SS, Source::DS, - }; - - if(reg & 4) { - const auto result = std::make_pair(consumed_, Instruction()); - reset_parsing(); - return result; + case ModRegRMFormat::Seg_MemReg: + case ModRegRMFormat::MemReg_Seg: + // The 16-bit chips have four segment registers; + // the 80386 onwards has six. + if(!is_32bit(model) && reg > 3) { + undefined(); + } else if(reg > 5) { + undefined(); } - destination_ = seg_table[reg]; - } break; + if(modregrm_format_ == ModRegRMFormat::Seg_MemReg) { + source_ = memreg; + destination_ = seg_table[reg]; + + // 80286 and later disallow MOV to CS. + if(model >= Model::i80286 && destination_ == Source::CS) { + undefined(); + } + } else { + source_ = seg_table[reg]; + destination_ = memreg; + } + break; case ModRegRMFormat::MemRegROL_to_SAR: destination_ = memreg; switch(reg) { - default: { - const auto result = std::make_pair(consumed_, Instruction()); - reset_parsing(); - return result; - } + default: undefined(); case 0: operation_ = Operation::ROL; break; - case 2: operation_ = Operation::ROR; break; - case 3: operation_ = Operation::RCL; break; - case 4: operation_ = Operation::RCR; break; - case 5: operation_ = Operation::SAL; break; - case 6: operation_ = Operation::SHR; break; + case 1: operation_ = Operation::ROR; break; + case 2: operation_ = Operation::RCL; break; + case 3: operation_ = Operation::RCR; break; + case 4: operation_ = Operation::SAL; break; + case 5: operation_ = Operation::SHR; break; case 7: operation_ = Operation::SAR; break; } break; @@ -458,11 +745,7 @@ std::pair Decoder::decode(const uint8_t * source_ = destination_ = memreg; switch(reg) { - default: { - const auto result = std::make_pair(consumed_, Instruction()); - reset_parsing(); - return result; - } + default: undefined(); case 0: operation_ = Operation::INC; break; case 1: operation_ = Operation::DEC; break; @@ -473,36 +756,23 @@ std::pair Decoder::decode(const uint8_t * source_ = destination_ = memreg; switch(reg) { - default: { - const auto result = std::make_pair(consumed_, Instruction()); - reset_parsing(); - return result; - } + default: undefined(); - case 0: operation_ = Operation::INC; break; - case 1: operation_ = Operation::DEC; break; - case 2: operation_ = Operation::CALLN; break; - case 3: - operation_ = Operation::CALLF; - operand_size_ = 4; - source_ = Source::Immediate; - break; - case 4: operation_ = Operation::JMPN; break; - case 5: - operation_ = Operation::JMPF; - operand_size_ = 4; - source_ = Source::Immediate; - break; + case 0: operation_ = Operation::INC; break; + case 1: operation_ = Operation::DEC; break; + case 2: operation_ = Operation::CALLabs; break; + case 3: operation_ = Operation::CALLfar; break; + case 4: operation_ = Operation::JMPabs; break; + case 5: operation_ = Operation::JMPfar; break; case 6: operation_ = Operation::PUSH; break; } break; - case ModRegRMFormat::MemRegPOP: + case ModRegRMFormat::MemRegSingleOperand: source_ = destination_ = memreg; if(reg != 0) { - reset_parsing(); - return std::make_pair(consumed_, Instruction()); + undefined(); } break; @@ -513,8 +783,11 @@ std::pair Decoder::decode(const uint8_t * break; case ModRegRMFormat::MemRegADD_to_CMP: + case ModRegRMFormat::MemRegADD_to_CMP_SignExtend: + source_ = Source::Immediate; destination_ = memreg; - operand_size_ = operation_size_; + operand_size_ = (modregrm_format_ == ModRegRMFormat::MemRegADD_to_CMP_SignExtend) ? DataSize::Byte : operation_size_; + sign_extend_ = true; // Will be effective only if modregrm_format_ == ModRegRMFormat::MemRegADD_to_CMP_SignExtend. switch(reg) { default: operation_ = Operation::ADD; break; @@ -528,45 +801,91 @@ std::pair Decoder::decode(const uint8_t * } break; - case ModRegRMFormat::MemRegADC_to_CMP: - destination_ = memreg; - source_ = Source::Immediate; - operand_size_ = 1; // ... and always 1; it'll be sign extended if - // the operation requires it. + case ModRegRMFormat::MemRegSLDT_to_VERW: + destination_ = source_ = memreg; switch(reg) { - default: { - const auto result = std::make_pair(consumed_, Instruction()); - reset_parsing(); - return result; - } + default: undefined(); - case 0: operation_ = Operation::ADD; break; - case 2: operation_ = Operation::ADC; break; - case 3: operation_ = Operation::SBB; break; - case 5: operation_ = Operation::SUB; break; - case 7: operation_ = Operation::CMP; break; + case 0: operation_ = Operation::SLDT; break; + case 1: operation_ = Operation::STR; break; + case 2: operation_ = Operation::LLDT; break; + case 3: operation_ = Operation::LTR; break; + case 4: operation_ = Operation::VERR; break; + case 5: operation_ = Operation::VERW; break; + } + break; + + case ModRegRMFormat::MemRegSGDT_to_LMSW: + destination_ = source_ = memreg; + + switch(reg) { + default: undefined(); + + case 0: operation_ = Operation::SGDT; break; + case 1: operation_ = Operation::SIDT; break; + case 2: operation_ = Operation::LGDT; break; + case 3: operation_ = Operation::LIDT; break; + case 4: operation_ = Operation::SMSW; break; + case 6: operation_ = Operation::LMSW; break; + } + break; + + case ModRegRMFormat::MemRegBT_to_BTC: + destination_ = memreg; + source_ = Source::Immediate; + operand_size_ = DataSize::Byte; + + switch(reg) { + default: undefined(); + + case 4: operation_ = Operation::BT; break; + case 5: operation_ = Operation::BTS; break; + case 6: operation_ = Operation::BTR; break; + case 7: operation_ = Operation::BTC; break; } break; default: assert(false); } - phase_ = (displacement_size_ + operand_size_) ? Phase::AwaitingDisplacementOrOperand : Phase::ReadyToPost; + if(expects_sib && (source_ == Source::Indirect | destination_ == Source::Indirect)) { + phase_ = Phase::ScaleIndexBase; + } else { + phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost; + } + } + +#undef undefined + + // MARK: - ScaleIndexBase + + if(phase_ == Phase::ScaleIndexBase && source != end) { + sib_ = *source; + ++source; + ++consumed_; + + // Potentially record the lack of a base. + if(displacement_size_ == DataSize::None && (uint8_t(sib_)&7) == 5) { + source_ = (source_ == Source::Indirect) ? Source::IndirectNoBase : source_; + destination_ = (destination_ == Source::Indirect) ? Source::IndirectNoBase : destination_; + } + + phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost; } // MARK: - Displacement and operand. - if(phase_ == Phase::AwaitingDisplacementOrOperand && source != end) { - const int required_bytes = displacement_size_ + operand_size_; + if(phase_ == Phase::DisplacementOrOperand) { + const auto required_bytes = int(byte_size(displacement_size_) + byte_size(operand_size_)); const int outstanding_bytes = required_bytes - operand_bytes_; const int bytes_to_consume = std::min(int(end - source), outstanding_bytes); - // TODO: I can surely do better than this? for(int c = 0; c < bytes_to_consume; c++) { - inward_data_ = (inward_data_ >> 8) | (uint64_t(source[0]) << 56); + inward_data_ |= decltype(inward_data_)(source[0]) << next_inward_data_shift_; ++source; + next_inward_data_shift_ += 8; } consumed_ += bytes_to_consume; @@ -575,28 +894,28 @@ std::pair Decoder::decode(const uint8_t * if(bytes_to_consume == outstanding_bytes) { phase_ = Phase::ReadyToPost; - switch(operand_size_) { - default: operand_ = 0; break; - case 1: - operand_ = inward_data_ >> 56; inward_data_ <<= 8; - - // Sign extend if a single byte operand is feeding a two-byte instruction. - if(operation_size_ == 2 && operation_ != Operation::IN && operation_ != Operation::OUT) { - operand_ |= (operand_ & 0x80) ? 0xff00 : 0x0000; - } - break; - case 4: displacement_size_ = 2; [[fallthrough]]; - case 2: operand_ = inward_data_ >> 48; inward_data_ <<= 16; break; - break; - } switch(displacement_size_) { - default: displacement_ = 0; break; - case 1: displacement_ = int8_t(inward_data_ >> 56); break; - case 2: displacement_ = int16_t(inward_data_ >> 48); break; + case DataSize::None: displacement_ = 0; break; + case DataSize::Byte: displacement_ = int8_t(inward_data_); break; + case DataSize::Word: displacement_ = int16_t(inward_data_); break; + case DataSize::DWord: displacement_ = int32_t(inward_data_); break; + } + inward_data_ >>= bit_size(displacement_size_); + + // Use inequality of sizes as a test for necessary sign extension. + if(operand_size_ == data_size_ || !sign_extend_) { + operand_ = decltype(operand_)(inward_data_); + } else { + switch(operand_size_) { + case DataSize::None: operand_ = 0; break; + case DataSize::Byte: operand_ = decltype(operand_)(int8_t(inward_data_)); break; + case DataSize::Word: operand_ = decltype(operand_)(int16_t(inward_data_)); break; + case DataSize::DWord: operand_ = decltype(operand_)(int32_t(inward_data_)); break; + } } } else { // Provide a genuine measure of further bytes required. - return std::make_pair(-(outstanding_bytes - bytes_to_consume), Instruction()); + return std::make_pair(-(outstanding_bytes - bytes_to_consume), InstructionT()); } } @@ -605,21 +924,58 @@ std::pair Decoder::decode(const uint8_t * if(phase_ == Phase::ReadyToPost) { const auto result = std::make_pair( consumed_, - Instruction( + InstructionT( operation_, source_, destination_, + sib_, lock_, + address_size_, segment_override_, repetition_, - Size(operation_size_), - displacement_, - operand_) + DataSize(operation_size_), + static_cast(displacement_), + static_cast(operand_), + consumed_ + ) ); reset_parsing(); return result; } + // Check for a too-long instruction. + if(consumed_ == max_instruction_length) { + std::pair result; + if(max_instruction_length == 65536) { + result = std::make_pair(consumed_, InstructionT(Operation::NOP, consumed_)); + } else { + result = std::make_pair(consumed_, InstructionT()); + } + reset_parsing(); + return result; + } + // i.e. not done yet. - return std::make_pair(0, Instruction()); + return std::make_pair(0, InstructionT()); } + +template void Decoder::set_32bit_protected_mode(bool enabled) { + if constexpr (!is_32bit(model)) { + assert(!enabled); + return; + } + + if(enabled) { + default_address_size_ = address_size_ = AddressSize::b32; + default_data_size_ = data_size_ = DataSize::DWord; + } else { + default_address_size_ = address_size_ = AddressSize::b16; + default_data_size_ = data_size_ = DataSize::Word; + } +} + +// Ensure all possible decoders are built. +template class InstructionSet::x86::Decoder; +template class InstructionSet::x86::Decoder; +template class InstructionSet::x86::Decoder; +template class InstructionSet::x86::Decoder; diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp index 5a21878cd..02af8bfba 100644 --- a/InstructionSets/x86/Decoder.hpp +++ b/InstructionSets/x86/Decoder.hpp @@ -10,6 +10,7 @@ #define InstructionSets_x86_Decoder_hpp #include "Instruction.hpp" +#include "Model.hpp" #include #include @@ -17,38 +18,54 @@ namespace InstructionSet { namespace x86 { -enum class Model { - i8086, -}; - /*! Implements Intel x86 instruction decoding. This is an experimental implementation; it has not yet undergone significant testing. */ -class Decoder { +template class Decoder { public: - Decoder(Model model); + using InstructionT = Instruction; /*! - @returns an @c Instruction plus a size; a positive size to indicate successful decoding; a - negative size specifies the [negatived] number of further bytes the caller should ideally - collect before calling again. The caller is free to call with fewer, but may not get a decoded - instruction in response, and the decoder may still not be able to complete decoding - even if given that number of bytes. + @returns an @c Instruction plus a size; a positive size indicates successful decoding of + an instruction that was that many bytes long in total; a negative size specifies the [negatived] + minimum number of further bytes the caller should ideally collect before calling again. The + caller is free to call with fewer, but may not get a decoded instruction in response, and the + decoder may still not be able to complete decoding even if given that number of bytes. + + Successful decoding is defined to mean that all decoding steps are complete. The output + may still be an illegal instruction (indicated by Operation::Invalid), if the byte sequence + supplied cannot form a valid instruction. + + @discussion although instructions also contain an indicator of their length, on chips prior + to the 80286 there is no limit to instruction length and that could in theory overflow the available + storage, which can describe instructions only up to 1kb in size. + + The 80286 and 80386 have instruction length limits of 10 and 15 bytes respectively, so + cannot overflow the field. */ - std::pair decode(const uint8_t *source, size_t length); + std::pair decode(const uint8_t *source, size_t length); + + /*! + Enables or disables 32-bit protected mode. Meaningful only if the @c Model supports it. + */ + void set_32bit_protected_mode(bool); private: enum class Phase { /// Captures all prefixes and continues until an instruction byte is encountered. Instruction, + /// Having encountered a 0x0f first instruction byte, waits for the next byte fully to determine the instruction. + InstructionPageF, /// Receives a ModRegRM byte and either populates the source_ and dest_ fields appropriately /// or completes decoding of the instruction, as per the instruction format. ModRegRM, + /// Awaits n 80386+-style scale-index-base byte ('SIB'), indicating the form of indirect addressing. + ScaleIndexBase, /// Waits for sufficiently many bytes to pass for the required displacement and operand to be captured. /// Cf. displacement_size_ and operand_size_. - AwaitingDisplacementOrOperand, + DisplacementOrOperand, /// Forms and returns an Instruction, and resets parsing state. ReadyToPost } phase_ = Phase::Instruction; @@ -59,29 +76,27 @@ class Decoder { /// are packaged into an Instruction. enum class ModRegRMFormat: uint8_t { // Parse the ModRegRM for mode, register and register/memory fields - // and populate the source_ and destination_ fields appropriate. + // and populate the source_ and destination_ fields appropriately. MemReg_Reg, Reg_MemReg, // Parse for mode and register/memory fields, populating both - // source_ and destination_ fields with the result. Use the 'register' - // field to pick an operation from the TEST/NOT/NEG/MUL/IMUL/DIV/IDIV group. - MemRegTEST_to_IDIV, - - // Parse for mode and register/memory fields, populating both - // source_ and destination_ fields with the result. Use the 'register' - // field to check for the POP operation. - MemRegPOP, + // source_ and destination_ fields with the single register/memory result. + MemRegSingleOperand, // Parse for mode and register/memory fields, populating both // the destination_ field with the result and setting source_ to Immediate. - // Use the 'register' field to check for the MOV operation. MemRegMOV, // Parse for mode and register/memory fields, populating the - // destination_ field with the result. Use the 'register' field - // to pick an operation from the ROL/ROR/RCL/RCR/SAL/SHR/SAR group. - MemRegROL_to_SAR, + // source_ field with the result. Fills destination_ with a segment + // register based on the reg field. + Seg_MemReg, + MemReg_Seg, + + // + // 'Group 1' + // // Parse for mode and register/memory fields, populating the // destination_ field with the result. Use the 'register' field @@ -89,32 +104,76 @@ class Decoder { // waits for an operand equal to the operation size. MemRegADD_to_CMP, + // Acts exactly as MemRegADD_to_CMP but the operand is fixed in size + // at a single byte, which is sign extended to the operation size. + MemRegADD_to_CMP_SignExtend, + + // + // 'Group 2' + // + // Parse for mode and register/memory fields, populating the - // source_ field with the result. Fills destination_ with a segment - // register based on the reg field. - SegReg, + // destination_ field with the result. Use the 'register' field + // to pick an operation from the ROL/ROR/RCL/RCR/SAL/SHR/SAR group. + MemRegROL_to_SAR, + + // + // 'Group 3' + // + + // Parse for mode and register/memory fields, populating both + // source_ and destination_ fields with the result. Use the 'register' + // field to pick an operation from the TEST/NOT/NEG/MUL/IMUL/DIV/IDIV group. + MemRegTEST_to_IDIV, + + // + // 'Group 4' + // // Parse for mode and register/memory fields, populating the // source_ and destination_ fields with the result. Uses the // 'register' field to pick INC or DEC. MemRegINC_DEC, + // + // 'Group 5' + // + // Parse for mode and register/memory fields, populating the // source_ and destination_ fields with the result. Uses the // 'register' field to pick from INC/DEC/CALL/JMP/PUSH, altering // the source to ::Immediate and setting an operand size if necessary. MemRegINC_to_PUSH, - // Parse for mode and register/memory fields, populating the - // source_ and destination_ fields with the result. Uses the - // 'register' field to pick from ADD/ADC/SBB/SUB/CMP, altering - // the source to ::Immediate and setting an appropriate operand size. - MemRegADC_to_CMP, + // + // 'Group 6' + // + + // Parse for mode and register/memory field, populating both source_ + // and destination_ fields with the result. Uses the 'register' field + // to pick from SLDT/STR/LLDT/LTR/VERR/VERW. + MemRegSLDT_to_VERW, + + // + // 'Group 7' + // + + // Parse for mode and register/memory field, populating both source_ + // and destination_ fields with the result. Uses the 'register' field + // to pick from SGDT/LGDT/SMSW/LMSW. + MemRegSGDT_to_LMSW, + + // + // 'Group 8' + // + + // Parse for mode and register/memory field, populating destination, + // and prepare to read a single byte as source. + MemRegBT_to_BTC, } modregrm_format_ = ModRegRMFormat::MemReg_Reg; // Ephemeral decoding state. Operation operation_ = Operation::Invalid; - uint8_t instr_ = 0x00; // TODO: is this desired, versus loading more context into ModRegRMFormat? int consumed_ = 0, operand_bytes_ = 0; // Source and destination locations. @@ -122,30 +181,49 @@ class Decoder { Source destination_ = Source::None; // Immediate fields. - int16_t displacement_ = 0; - uint16_t operand_ = 0; + int32_t displacement_ = 0; + uint32_t operand_ = 0; uint64_t inward_data_ = 0; + int next_inward_data_shift_ = 0; + + // Indirection style. + ScaleIndexBase sib_; // Facts about the instruction. - int displacement_size_ = 0; // i.e. size of in-stream displacement, if any. - int operand_size_ = 0; // i.e. size of in-stream operand, if any. - int operation_size_ = 0; // i.e. size of data manipulated by the operation. + DataSize displacement_size_ = DataSize::None; // i.e. size of in-stream displacement, if any. + DataSize operand_size_ = DataSize::None; // i.e. size of in-stream operand, if any. + DataSize operation_size_ = DataSize::None; // i.e. size of data manipulated by the operation. + + bool sign_extend_ = false; // If set then sign extend the operand up to the operation size; + // otherwise it'll be zero-padded. // Prefix capture fields. Repetition repetition_ = Repetition::None; bool lock_ = false; Source segment_override_ = Source::None; + // 32-bit/16-bit selection. + AddressSize default_address_size_ = AddressSize::b16; + DataSize default_data_size_ = DataSize::Word; + AddressSize address_size_ = AddressSize::b16; + DataSize data_size_ = DataSize::Word; + /// Resets size capture and all fields with default values. void reset_parsing() { consumed_ = operand_bytes_ = 0; - displacement_size_ = operand_size_ = 0; + displacement_size_ = operand_size_ = operation_size_ = DataSize::None; displacement_ = operand_ = 0; lock_ = false; + address_size_ = default_address_size_; + data_size_ = default_data_size_; segment_override_ = Source::None; repetition_ = Repetition::None; phase_ = Phase::Instruction; source_ = destination_ = Source::None; + sib_ = ScaleIndexBase(); + next_inward_data_shift_ = 0; + inward_data_ = 0; + sign_extend_ = false; } }; diff --git a/InstructionSets/x86/Documentation/80386 opcode map.html b/InstructionSets/x86/Documentation/80386 opcode map.html new file mode 100644 index 000000000..3bab74c42 --- /dev/null +++ b/InstructionSets/x86/Documentation/80386 opcode map.html @@ -0,0 +1,917 @@ + + + + + 80386 Opcode Map + + + +

Codes for Addressing Method

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ADirect address; the instruction has no MODRM field; the address of the operand is encoded in the instruction; no base register, index register, or scaling factor can be applied; e.g., far JMP (EA).
CThe reg field of the MODRM field selects a control register; e.g., MOV (0F20, 0F22).
DThe reg field of the MODRM field selects a debug register; e.g., MOV (0F21, 0F23).
EA MODRM field follows the opcode and specifies the operand. The operand is either a general register or a memory address. If it is a memory address, the address is computed from a segment register and any of the following values: a base register, an index register, a scaling factor, a displacement.
FFlags register
GThe reg field of the MODRM field selects a general register; e.g,. ADD (00).
IImmediate data. The value of the operand is encoded in subsequent bytes of the instruction.
JThe instruction contains a relative offset to be added to the instruction-pointer register; e.g., JMP short, LOOP.
MThe MODRM field may refer only to memory; e.g., BOUND, LES, LDS, LSS, LFS, LGS.
OThe instruction has no MODRM field; the offset of the operand is coded as a word or dword (depending on address sie attribute) in the instruction. No base register, index register, or scaling factor can be applied; e.g., MOV (A0–A3).
RThe mod field of the MODRM field may refer only to a general register; e.g., MOV(0F20–0F24, 0F26).
SThe reg field of the MODRM field selects a segment register; e.g., MOV (8C, 8E).
TThe reg field of the MODRM field selects a test register; e.g., MOV (0F24, 0F26).
XMemory addressed by DS:SI; e.g., MOVS, COMPS, OUTS, LODS, SCAS.
YMemory addressed by ES:DI; e.g., MOVS, CMPS, INS, STOS.
+ +

Codes for Operand Type

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
aTwo one-word operands in memory or two dword operands in memory, depending on operand size attribute (used only by BOUND).
bByte (regardless of operand size attribute).
cByte or word, depending on operand size attribute.
dDword (regardless of operand size attribute).
p32-bit or 48-bit pointer, depending on operand size attribute.
sSix-byte pesudo-descriptor.
vWord or dword, depending on operand size attribute.
wWord (regardless of operand size attribute).
+ +

Register Codes

+ + When an operand is a specific register encoded in the opcode, the register is identifed by its name; e.g., AX, CL, or ESI. The name of the register indicates whether the register is 32, 16, or 8 bits wide. A register identifier of the form eXX is used when the width of the register depends on the operand size attribute. For example, eAX indicates that the AX register is used when the operand size attribute is 16, and the EAX register is used when the operand size attribute is 32. + +

One-byte 80386 Opcode Map

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
x0x1x2x3x4x5x6x7x8x9xAxBxCxDxExF
0xADDPUSH ESPOP ESORPUSH CS2-byte escape codes
Eb, GbEv, GvGb, EbGv, EvAL, IbeAX, IvEb, GbEv, GvGb, EbGv, EvAL, IbeAX, Iv
1xADCPUSH SSPOP SSSBBPUSH DSPOP DS
Eb, GbEv, GvGb, EbGv, EvAL, IbeAX, IvEb, GbEv, GvGb, EbGv, EvAL, IbeAX, Iv
2xANDSEG =ESPOP ESSUBSEG =CSDAS
Eb, GbEv, GvGb, EbGv, EvAL, IbeAX, IvEb, GbEv, GvGb, EbGv, EvAL, IbeAX, Iv
3xXORSEG =SSAAACMPSEG =DSAAS
Eb, GbEv, GvGb, EbGv, EvAL, IbeAX, IvEb, GbEv, GvGb, EbGv, EvAL, IbeAX, Iv
4xINC general registerDEC general register
eAXeCXeDXeBXeSPeBPeSIeDIeAXeCXeDXeBXeSPeBPeSIeDI
5xPUSH general registerPOP general register
eAXeCXeDXeBXeSPeBPeSIeDIeAXeCXeDXeBXeSPeBPeSIeDI
6xPUSHAPOPABOUND Gv, MaARPL Gv, MaSEG =FSSEG =GSOperand SizeAddress SizePUSH IvIMUL GvEvIvPUSH IbIMUL GvEvIbINSB Yb, DxINSW/D Yv, DxOUTSB Dx, XbOUTSW/D Dx, Xb
7xShort-displacement jump on condition (Jb)
JOJNOJBJNBJZJNZJBEJNBEJSJNSJPJNPJLJNLJLEJNLE
8xImmediate Grp1Grp1 Ev, IbTESTXCHGMOVMOV Ew, SwLEA Gv, MMOV Sw, EwPOP Ev
Eb, IbEv, IvEb, GbEv, GvEb, GbEv, GvEb, GbEv, GvGb, EbGv, Ev
9xNOPXCHG word or double-word register with eAXCBWCWDCALL ApWAITPUSHF FvPOPF FvSAHFLAHF
eCXeDXeBXeSPeBPeSIeDI
AxMOVMOVSB Xb, YvMOVSW/D Xv, YvCMPSB Xb, YbCMPSW/D Xv, YvTESTSTOSB Yb, ALSTOSW/D Yv, eAXLDSB AL, XbLDSW/D eAX, YvSCASB AL, XbSCASW/D eAX, Xv
AL, ObeAX, OvOb, ALOv, eAXAL, IbeAX, Iv
BxMOV immediate byte into byte registerMOV immediate word or double into word or double register
ALCLDLBLAHCHDHBHeAXeCXeDXeBXeSPeBPeSIeDI
CxShift Grp2RET nearLES Gv, MpLDS Gv, MpMOVENTERLEAVERET farINT 3INT IbINTOIRET
Eb, IbEv, IvIwEb, IbEv, IvIw
DxShift Grp2AAMAADXLATESC (Escape to coprocessor instruction set)
Eb, 1Ev, 1Eb, CLEv, CL
ExLOOPNE JbLOOPE JbLOOP JbJCXZ JbINOUTCALL JvJMPINOUT
AL, IbeAX, IbIb, ALIb, eAXJvApJbAL, DXeAX, DXDX, ALDX, eAX
FxLOCKREPNEREP / REPEHLTCMCUnary Grp3CLCSTCCLISTICLDSTDINC/DEC Grp4Indirect Grp5
EbEv
+ +

Two-Byte 80386 Opcode Map (First byte is 0FH)

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
x0x1x2x3x4x5x6x7x8x9xAxBxCxDxExF
0xGrp6Grp7LAR Gv, EwLSL Gv, EwCLTS
1x
2xMOV Cd, RdMOV Dd, RdMOV Rd, CdMOV Rd, DdMOV Td, RdMOV Rd, Td
8xLong-displacement jump on condition (Jv)
JOJNOJBJNBJZJNZJBEJNBEJSJNSJPJNPJLJNLJLEJNLE
9xByte set on condition (Eb)
SETOSETNOSETBSETNBSETZSETNZSETBESETNBESETSSETNSSETPSETNPSETLSETNLSETLESETNLE
AxPUSH FSPOP FSBT Ev, GvSHLD EvGvIbSHLD EvGvCLPUSH GSPOP GSBTS Ev, GvSHRD EvGvIbSHRD EvGvCLIMUL Gv, Ev
BxLSS MpBTR Ev, GvLFS MpLGS MpMOVZXGrp8 Ev, IbBTC Ev, GvBSF Gv, EvBSR Gv, EvMOVSX
Gv, EbGv, EwGv, EbGv, Ew
Fx
+

Opcodes Determined by Bits 5, 4, 3 of MODRM Field

+ + + + + + +
modnnnR/M
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
000001010011100101110111
Group 1ADDORADCSBBANDSUBXORCMP
Group 2ROLRORRCLRCRSHLSHRSAR
Group 3TEST Ib/IvNOTNEGMUL AL/eAXIMUL AL/EAXDIV AL/eAXIDIV AL/eAX
Group 4INC EbDEC Eb
Group 5INC EvDEC EvCALL EvCALL EpJMP EvJMP EpPUSH Ev
Group 6SLDT EwSTR EwLLDT EwLTR EwVERR EwVERW Ew
Group 7SGDT MsSIDT MsLGDT MsLIDT MsSMSW EwLMSW Ew
Group 8BTBTSBTRBTC
+ + diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp index 3723139b6..d34acb44c 100644 --- a/InstructionSets/x86/Instruction.hpp +++ b/InstructionSets/x86/Instruction.hpp @@ -9,7 +9,9 @@ #ifndef InstructionSets_x86_Instruction_h #define InstructionSets_x86_Instruction_h +#include #include +#include namespace InstructionSet { namespace x86 { @@ -23,6 +25,10 @@ namespace x86 { enum class Operation: uint8_t { Invalid, + // + // 8086 instructions. + // + /// ASCII adjust after addition; source will be AL and destination will be AX. AAA, /// ASCII adjust before division; destination will be AX and source will be a multiplier. @@ -36,9 +42,13 @@ enum class Operation: uint8_t { /// Decimal adjust after subtraction; source and destination will be AL. DAS, - /// Convert byte into word; source will be AL, destination will be AH. + /// If data size is word, convert byte into word; source will be AL, destination will be AH. + /// If data size is DWord, convert word to dword; AX will be expanded to fill EAX. + /// In both cases, conversion will be by sign extension. CBW, - /// Convert word to double word; source will be AX and destination will be DX. + /// If data size is Word, converts word to double word; source will be AX and destination will be DX. + /// If data size is DWord, converts double word to quad word (i.e. CDW); source will be EAX and destination will be EDX:EAX. + /// In both cases, conversion will be by sign extension. CWD, /// Escape, for a coprocessor; perform the bus cycles necessary to read the source and destination and perform a NOP. @@ -59,8 +69,8 @@ enum class Operation: uint8_t { SUB, /// Unsigned multiply; multiplies the source value by AX or AL, storing the result in DX:AX or AX. MUL, - /// Signed multiply; multiplies the source value by AX or AL, storing the result in DX:AX or AX. - IMUL, + /// Single operand signed multiply; multiplies the source value by AX or AL, storing the result in DX:AX or AX. + IMUL_1, /// Unsigned divide; divide the source value by AX or AL, storing the quotient in AL and the remainder in AH. DIV, /// Signed divide; divide the source value by AX or AL, storing the quotient in AL and the remainder in AH. @@ -81,27 +91,27 @@ enum class Operation: uint8_t { JS, JNS, JP, JNP, JL, JNL, JLE, JNLE, /// Far call; see the segment() and offset() fields. - CALLF, - /// Displacement call; followed by a 16-bit operand providing a call offset. - CALLD, + CALLfar, + /// Relative call; see displacement(). + CALLrel, /// Near call. - CALLN, + CALLabs, /// Return from interrupt. IRET, /// Near return; if source is not ::None then it will be an ::Immediate indicating how many additional bytes to remove from the stack. - RETF, + RETfar, /// Far return; if source is not ::None then it will be an ::Immediate indicating how many additional bytes to remove from the stack. - RETN, - /// Near jump; if an operand is not ::None then it gives an absolute destination; otherwise see the displacement. - JMPN, + RETnear, + /// Near jump with an absolute destination. + JMPabs, + /// Near jump with a relative destination. + JMPrel, /// Far jump to the indicated segment and offset. - JMPF, + JMPfar, /// Relative jump performed only if CX = 0; see the displacement. JPCX, /// Generates a software interrupt of the level stated in the operand. INT, - /// Generates a software interrupt of level 3. - INT3, /// Generates a software interrupt of level 4 if overflow is set. INTO, @@ -152,19 +162,19 @@ enum class Operation: uint8_t { PUSH, /// PUSH the flags register to the stack. PUSHF, - /// Rotate the destination left through carry the number of bits indicated by source. + /// Rotate the destination left through carry the number of bits indicated by source; if the source is a register then implicitly its size is 1. RCL, - /// Rotate the destination right through carry the number of bits indicated by source. + /// Rotate the destination right through carry the number of bits indicated by source; if the source is a register then implicitly its size is 1. RCR, - /// Rotate the destination left the number of bits indicated by source. + /// Rotate the destination left the number of bits indicated by source; if the source is a register then implicitly its size is 1. ROL, - /// Rotate the destination right the number of bits indicated by source. + /// Rotate the destination right the number of bits indicated by source; if the source is a register then implicitly its size is 1. ROR, - /// Arithmetic shift left the destination by the number of bits indicated by source. + /// Arithmetic shift left the destination by the number of bits indicated by source; if the source is a register then implicitly its size is 1. SAL, - /// Arithmetic shift right the destination by the number of bits indicated by source. + /// Arithmetic shift right the destination by the number of bits indicated by source; if the source is a register then implicitly its size is 1. SAR, - /// Logical shift right the destination by the number of bits indicated by source. + /// Logical shift right the destination by the number of bits indicated by source; if the source is a register then implicitly its size is 1. SHR, /// Clear carry flag; no source or destination provided. @@ -192,110 +202,599 @@ enum class Operation: uint8_t { /// Load AL with DS:[AL+BX]. XLAT, + + // + // 80186 additions. + // + + /// Checks whether the signed value in the destination register is within the bounds + /// stored at the location indicated by the source register, which will point to two + /// 16- or 32-bit words, the first being a signed lower bound and the signed upper. + /// Raises a bounds exception if not. + BOUND, + + + /// Create stack frame. See operand() for the nesting level and offset() + /// for the dynamic storage size. + ENTER, + /// Procedure exit; copies BP to SP, then pops a new BP from the stack. + LEAVE, + + /// Inputs a byte, word or double word from the port specified by DX, writing it to + /// ES:[e]DI and incrementing or decrementing [e]DI as per the + /// current EFLAGS DF flag. + INS, + /// Outputs a byte, word or double word from ES:[e]DI to the port specified by DX, + /// incrementing or decrementing [e]DI as per the current EFLAGS DF flag.] + OUTS, + + /// Pushes all general purpose registers to the stack, in the order: + /// AX, CX, DX, BX, [original] SP, BP, SI, DI. + PUSHA, + /// Pops all general purpose registers from the stack, in the reverse of + /// the PUSHA order, i.e. DI, SI, BP, [final] SP, BX, DX, CX, AX. + POPA, + + // + // 80286 additions. + // + + // TODO: expand detail on all operations below. + + /// Adjusts requested privilege level. + ARPL, + /// Clears the task-switched flag. + CLTS, + /// Loads access rights. + LAR, + + /// Loads the global descriptor table. + LGDT, + /// Loads the interrupt descriptor table. + LIDT, + /// Loads the local descriptor table. + LLDT, + /// Stores the global descriptor table. + SGDT, + /// Stores the interrupt descriptor table. + SIDT, + /// Stores the local descriptor table. + SLDT, + + /// Verifies a segment for reading. + VERR, + /// Verifies a segment for writing. + VERW, + + /// Loads the machine status word. + LMSW, + /// Stores the machine status word. + SMSW, + /// Loads a segment limit + LSL, + /// Loads the task register. + LTR, + /// Stores the task register. + STR, + + /// Three-operand form of IMUL; multiply the immediate by the source and write to the destination. + IMUL_3, + + /// Undocumented (but used); loads all registers, including internal ones. + LOADALL, + + // + // 80386 additions. + // + + /// Loads a pointer to FS. + LFS, + /// Loads a pointer to GS. + LGS, + /// Loads a pointer to SS. + LSS, + + /// Shift left double. + SHLDimm, + SHLDCL, + /// Shift right double. + SHRDimm, + SHRDCL, + + /// Bit scan forwards. + BSF, + /// Bit scan reverse. + BSR, + /// Bit test. + BT, + /// Bit test and complement. + BTC, + /// Bit test and reset. + BTR, + /// Bit test and set. + BTS, + + /// Move from the source to the destination, extending the source with zeros. + /// The instruction data size dictates the size of the source; the destination will + /// be either 16- or 32-bit depending on the current processor operating mode. + MOVZX, + /// Move from the source to the destination, applying a sign extension. + /// The instruction data size dictates the size of the source; the destination will + /// be either 16- or 32-bit depending on the current processor operating mode. + MOVSX, + + /// Two-operand form of IMUL; multiply the source by the destination and write to the destination. + IMUL_2, + + // Various conditional sets; each sets the byte at the location given by the operand + // to $ff if the condition is met; $00 otherwise. + SETO, SETNO, SETB, SETNB, SETZ, SETNZ, SETBE, SETNBE, + SETS, SETNS, SETP, SETNP, SETL, SETNL, SETLE, SETNLE, + + // Various special-case moves (i.e. those where it is impractical to extend the + // Source enum, so the requirement for special handling is loaded into the operation). + // In all cases the Cx, Dx and Tx Source aliases can be used to reinterpret the relevant + // source or destination. + MOVtoCr, MOVfromCr, + MOVtoDr, MOVfromDr, + MOVtoTr, MOVfromTr, }; -enum class Size: uint8_t { - Implied = 0, - Byte = 1, - Word = 2, - DWord = 4, +enum class DataSize: uint8_t { + Byte = 0, + Word = 1, + DWord = 2, + None = 3, }; +constexpr int byte_size(DataSize size) { + return (1 << int(size)) & 7; +} + +constexpr int bit_size(DataSize size) { + return (8 << int(size)) & 0x3f; +} + +enum class AddressSize: uint8_t { + b16 = 0, + b32 = 1, +}; + +constexpr DataSize data_size(AddressSize size) { + return DataSize(int(size) + 1); +} + +constexpr int byte_size(AddressSize size) { + return 2 << int(size); +} + +constexpr int bit_size(AddressSize size) { + return 16 << int(size); +} + enum class Source: uint8_t { + // These are in SIB order; this matters for packing later on. + + /// AL, AX or EAX depending on size. + eAX, + /// CL, CX or ECX depending on size. + eCX, + /// DL, DX or EDX depending on size. + eDX, + /// BL, BX or BDX depending on size. + eBX, + /// AH if size is 1; SP or ESP otherwise. + eSPorAH, + /// CH if size is 1; BP or EBP otherwise. + eBPorCH, + /// DH if size is 1; SI or ESI otherwise. + eSIorDH, + /// BH if size is 1; DI or EDI otherwise. + eDIorBH, + + // Aliases for the dual-purpose enums. + eSP = eSPorAH, AH = eSPorAH, + eBP = eBPorCH, CH = eBPorCH, + eSI = eSIorDH, DH = eSIorDH, + eDI = eDIorBH, BH = eDIorBH, + + // Aliases for control, test and debug registers. + C0 = 0, C1 = 1, C2 = 2, C3 = 3, C4 = 4, C5 = 5, C6 = 6, C7 = 7, + T0 = 0, T1 = 1, T2 = 2, T3 = 3, T4 = 4, T5 = 5, T6 = 6, T7 = 7, + D0 = 0, D1 = 1, D2 = 2, D3 = 3, D4 = 4, D5 = 5, D6 = 6, D7 = 7, + + // Selectors. + ES, CS, SS, DS, FS, GS, + + /// @c None can be treated as a source that produces 0 when encountered; + /// it is semantically valid to receive it with that meaning in some contexts — + /// e.g. to indicate no index in indirect addressing. + /// It's listed here in order to allow an [optional] segment override to fit into three bits. None, - CS, DS, ES, SS, - AL, AH, AX, - BL, BH, BX, - CL, CH, CX, - DL, DH, DX, - - SI, DI, - BP, SP, - - IndBXPlusSI, - IndBXPlusDI, - IndBPPlusSI, - IndBPPlusDI, - IndSI, - IndDI, + /// The address included within this instruction should be used as the source. DirectAddress, - IndBP, - IndBX, - Immediate + /// The immediate value included within this instruction should be used as the source. + Immediate, + + /// The ScaleIndexBase associated with this source should be used. + Indirect = 0b11000, + // Elsewhere, as an implementation detail, the low three bits of an indirect source + // are reused; (Indirect-1) is also used as a sentinel value but is not a valid member + // of the enum and isn't exposed externally. + + /// The ScaleIndexBase associated with this source should be used, but + /// its base should be ignored (and is guaranteed to be zero if the default + /// getter is used). + IndirectNoBase = Indirect - 1, }; enum class Repetition: uint8_t { None, RepE, RepNE }; -class Instruction { +/// Provides a 32-bit-style scale, index and base; to produce the address this represents, +/// calcluate base() + (index() << scale()). +/// +/// This form of indirect addressing is used to describe both 16- and 32-bit indirect addresses, +/// even though it is a superset of that supported prior to the 80386. +/// +/// This class can represent only exactly what a SIB byte can — a scale of 0 to 3, a base +/// that is any one of the eight general purpose registers, and an index that is one of the seven +/// general purpose registers excluding eSP or is ::None. +/// +/// It cannot natively describe a base of ::None. +class ScaleIndexBase { public: - Operation operation = Operation::Invalid; + constexpr ScaleIndexBase() noexcept {} + constexpr ScaleIndexBase(uint8_t sib) noexcept : sib_(sib) {} + constexpr ScaleIndexBase(int scale, Source index, Source base) noexcept : + sib_(uint8_t( + scale << 6 | + (int(index != Source::None ? index : Source::eSI) << 3) | + int(base) + )) {} + constexpr ScaleIndexBase(Source index, Source base) noexcept : ScaleIndexBase(0, index, base) {} + constexpr explicit ScaleIndexBase(Source base) noexcept : ScaleIndexBase(0, Source::None, base) {} - bool operator ==(const Instruction &rhs) const { + /// @returns the power of two by which to multiply @c index() before adding it to @c base(). + constexpr int scale() const { + return sib_ >> 6; + } + + /// @returns the @c index for this address; this is guaranteed to be one of eAX, eBX, eCX, eDX, None, eBP, eSI or eDI. + constexpr Source index() const { + constexpr Source sources[] = { + Source::eAX, Source::eCX, Source::eDX, Source::eBX, Source::None, Source::eBP, Source::eSI, Source::eDI, + }; + static_assert(sizeof(sources) == 8); + return sources[(sib_ >> 3) & 0x7]; + } + + /// @returns the @c base for this address; this is guaranteed to be one of eAX, eBX, eCX, eDX, eSP, eBP, eSI or eDI. + constexpr Source base() const { + return Source(sib_ & 0x7); + } + + constexpr uint8_t without_base() const { + return sib_ & ~0x3; + } + + bool operator ==(const ScaleIndexBase &rhs) const { + // Permit either exact equality or index and base being equal + // but transposed with a scale of 1. return - repetition_size_ == rhs.repetition_size_ && - sources_ == rhs.sources_ && - displacement_ == rhs.displacement_ && - operand_ == rhs.operand_; + (sib_ == rhs.sib_) || + ( + !scale() && !rhs.scale() && + rhs.index() == base() && + rhs.base() == index() + ); + } + + operator uint8_t() const { + return sib_; } private: - // b0, b1: a Repetition; - // b2+: operation size. - uint8_t repetition_size_ = 0; + // Data is stored directly as an 80386 SIB byte. + uint8_t sib_ = 0; +}; +static_assert(sizeof(ScaleIndexBase) == 1); +static_assert(alignof(ScaleIndexBase) == 1); - // b0–b5: source; - // b6–b11: destination; - // b12–b14: segment override; - // b15: lock. - uint16_t sources_ = 0; +/// Provides the location of an operand's source or destination. +/// +/// Callers should use .source() as a first point of entry. If it directly nominates a register +/// then use the register contents directly. If it indicates ::DirectAddress or ::Immediate +/// then ask the instruction for the address or immediate value that was provided in +/// the instruction. +/// +/// If .source() indicates ::Indirect then use base(), index() and scale() to construct an address. +/// +/// In all cases, the applicable segment is indicated by the instruction. +class DataPointer { + public: + /// Constricts a DataPointer referring to the given source; it shouldn't be ::Indirect. + constexpr DataPointer(Source source) noexcept : source_(source) {} - // Unpackable fields. - int16_t displacement_ = 0; - uint16_t operand_ = 0; // ... or used to store a segment for far operations. + /// Constricts a DataPointer with a source of ::Indirect and the specified sib. + constexpr DataPointer(ScaleIndexBase sib) noexcept : sib_(sib) {} + + /// Constructs a DataPointer with a source and SIB; use the source to indicate + /// whether the base field of the SIB is effective. + constexpr DataPointer(Source source, ScaleIndexBase sib) noexcept : source_(source), sib_(sib) {} + + /// Constructs an indirect DataPointer referencing the given base, index and scale. + /// Automatically maps Source::Indirect to Source::IndirectNoBase if base is Source::None. + constexpr DataPointer(Source base, Source index, int scale) noexcept : + source_(base != Source::None ? Source::Indirect : Source::IndirectNoBase), + sib_(scale, index, base) {} + + constexpr bool operator ==(const DataPointer &rhs) const { + // Require a SIB match only if source_ is ::Indirect or ::IndirectNoBase. + return + source_ == rhs.source_ && ( + source_ < Source::IndirectNoBase || + (source_ == Source::Indirect && sib_ == rhs.sib_) || + (source_ == Source::IndirectNoBase && sib_.without_base() == rhs.sib_.without_base()) + ); + } + + template constexpr Source source() const { + if constexpr (obscure_indirectNoBase) { + return (source_ >= Source::IndirectNoBase) ? Source::Indirect : source_; + } + return source_; + } + + constexpr int scale() const { + return sib_.scale(); + } + + constexpr Source index() const { + return sib_.index(); + } + + template constexpr Source base() const { + if constexpr (obscure_indirectNoBase) { + return (source_ <= Source::IndirectNoBase) ? Source::None : sib_.base(); + } + return sib_.base(); + } + + private: + Source source_ = Source::Indirect; + ScaleIndexBase sib_; +}; + +template class Instruction { + public: + Operation operation = Operation::Invalid; + + bool operator ==(const Instruction &rhs) const { + if( operation != rhs.operation || + mem_exts_source_ != rhs.mem_exts_source_ || + source_data_dest_sib_ != rhs.source_data_dest_sib_) { + return false; + } + + // Have already established above that this and RHS have the + // same extensions, if any. + const int extension_count = has_length_extension() + has_displacement() + has_operand(); + for(int c = 0; c < extension_count; c++) { + if(extensions_[c] != rhs.extensions_[c]) return false; + } + + return true; + } + + using DisplacementT = typename std::conditional::type; + using ImmediateT = typename std::conditional::type; + using AddressT = ImmediateT; + + private: + // Packing and encoding of fields is admittedly somewhat convoluted; what this + // achieves is that instructions will be sized: + // + // four bytes + up to three extension words + // (two bytes for 16-bit instructions, four for 32) + // + // Two of the extension words are used to retain an operand and displacement + // if the instruction has those. The other can store sizes greater than 15 + // bytes (for earlier processors), plus any repetition, segment override or + // repetition prefixes. + + // b7: address size; + // b6: has displacement; + // b5: has operand; + // [b4, b0]: source. + uint8_t mem_exts_source_ = 0; + + bool has_displacement() const { + return mem_exts_source_ & (1 << 6); + } + bool has_operand() const { + return mem_exts_source_ & (1 << 5); + } + + // [b15, b14]: data size; + // [b13, b10]: source length (0 => has length extension); + // [b9, b5]: top five of SIB; + // [b4, b0]: dest. + uint16_t source_data_dest_sib_ = 1 << 10; // So that ::Invalid doesn't seem to have a length extension. + + bool has_length_extension() const { + return !((source_data_dest_sib_ >> 10) & 15); + } + + // {operand}, {displacement}, {length extension}. + // + // If length extension is present then: + // + // [b15, b6]: source length; + // [b5, b4]: repetition; + // [b3, b1]: segment override; + // b0: lock. + ImmediateT extensions_[3]{}; + + ImmediateT operand_extension() const { + return extensions_[0]; + } + ImmediateT displacement_extension() const { + return extensions_[(mem_exts_source_ >> 5) & 1]; + } + ImmediateT length_extension() const { + return extensions_[((mem_exts_source_ >> 5) & 1) + ((mem_exts_source_ >> 6) & 1)]; + } public: - Source source() const { return Source(sources_ & 0x3f); } - Source destination() const { return Source((sources_ >> 6) & 0x3f); } - bool lock() const { return sources_ & 0x8000; } - Source segment_override() const { return Source((sources_ >> 12) & 7); } + /// @returns The number of bytes used for meaningful content within this class. A receiver must use at least @c sizeof(Instruction) bytes + /// to store an @c Instruction but is permitted to reuse the trailing sizeof(Instruction) - packing_size() for any purpose it likes. Teleologically, + /// this allows a denser packing of instructions into containers. + size_t packing_size() const { + return + offsetof(Instruction, extensions) + + (has_displacement() + has_operand() + has_length_extension()) * sizeof(ImmediateT); - Repetition repetition() const { return Repetition(repetition_size_ & 3); } - Size operation_size() const { return Size(repetition_size_ >> 2); } + // To consider in the future: the length extension is always the last one, + // and uses only 8 bits of content within 32-bit instructions, so it'd be + // possible further to trim the packing size on little endian machines. + // + // ... but is that a speed improvement? How much space does it save, and + // is it enough to undo the costs of unaligned data? + } - uint16_t segment() const { return uint16_t(operand_); } - uint16_t offset() const { return uint16_t(displacement_); } + private: + // A lookup table to help with stripping parts of the SIB that have been + // hidden within the source/destination fields. + static constexpr uint8_t sib_masks[] = { + 0x1f, 0x1f, 0x1f, 0x18 + }; - int16_t displacement() const { return displacement_; } - uint16_t operand() const { return operand_; } + public: + DataPointer source() const { + return DataPointer( + Source(mem_exts_source_ & sib_masks[(mem_exts_source_ >> 3) & 3]), + ((source_data_dest_sib_ >> 2) & 0xf8) | (mem_exts_source_ & 0x07) + ); + } + DataPointer destination() const { + return DataPointer( + Source(source_data_dest_sib_ & sib_masks[(source_data_dest_sib_ >> 3) & 3]), + ((source_data_dest_sib_ >> 2) & 0xf8) | (source_data_dest_sib_ & 0x07) + ); + } + bool lock() const { + return has_length_extension() && length_extension()&1; + } - Instruction() noexcept {} - Instruction( + AddressSize address_size() const { + return AddressSize(mem_exts_source_ >> 7); + } + + /// @returns @c Source::DS if no segment override was found; the overridden segment otherwise. + /// On x86 a segment override cannot modify the segment used as a destination in string instructions, + /// or that used by stack instructions, but this function does not spend the time necessary to provide + /// the correct default for those. + Source data_segment() const { + if(!has_length_extension()) return Source::DS; + return Source( + int(Source::ES) + + ((length_extension() >> 1) & 7) + ); + } + + Repetition repetition() const { + if(!has_length_extension()) return Repetition::None; + return Repetition((length_extension() >> 4) & 3); + } + DataSize operation_size() const { + return DataSize(source_data_dest_sib_ >> 14); + } + + int length() const { + const int short_length = (source_data_dest_sib_ >> 10) & 15; + if(short_length) return short_length; + return length_extension() >> 6; + } + + ImmediateT operand() const { + const ImmediateT ops[] = {0, operand_extension()}; + return ops[has_operand()]; + } + DisplacementT displacement() const { + return DisplacementT(offset()); + } + + uint16_t segment() const { + return uint16_t(operand()); + } + ImmediateT offset() const { + const ImmediateT offsets[] = {0, displacement_extension()}; + return offsets[has_displacement()]; + } + + constexpr Instruction() noexcept {} + constexpr Instruction(Operation operation, int length) noexcept : + Instruction(operation, Source::None, Source::None, ScaleIndexBase(), false, AddressSize::b16, Source::None, Repetition::None, DataSize::None, 0, 0, length) {} + constexpr Instruction( Operation operation, Source source, Source destination, + ScaleIndexBase sib, bool lock, + AddressSize address_size, Source segment_override, Repetition repetition, - Size operation_size, - int16_t displacement, - uint16_t operand) noexcept : + DataSize data_size, + DisplacementT displacement, + ImmediateT operand, + int length) noexcept : operation(operation), - repetition_size_(uint8_t((int(operation_size) << 2) | int(repetition))), - sources_(uint16_t( + mem_exts_source_(uint8_t( + (int(address_size) << 7) | + (displacement ? 0x40 : 0x00) | + (operand ? 0x20 : 0x00) | int(source) | - (int(destination) << 6) | - (int(segment_override) << 12) | - (int(lock) << 15) + (source == Source::Indirect ? (uint8_t(sib) & 7) : 0) )), - displacement_(displacement), - operand_(operand) {} + source_data_dest_sib_(uint16_t( + (int(data_size) << 14) | + (( + (lock || (segment_override != Source::None) || (length > 15) || (repetition != Repetition::None)) + ) ? 0 : (length << 10)) | + ((uint8_t(sib) & 0xf8) << 2) | + int(destination) | + (destination == Source::Indirect ? (uint8_t(sib) & 7) : 0) + )) { + + // Decisions on whether to include operand, displacement and/or size extension words + // have implicitly been made in the int packing above; honour them here. + int extension = 0; + if(has_operand()) { + extensions_[extension] = operand; + ++extension; + } + if(has_displacement()) { + extensions_[extension] = ImmediateT(displacement); + ++extension; + } + if(has_length_extension()) { + // As per the rule stated for segment(), this class provides ::DS for any instruction + // that doesn't have a segment override. + if(segment_override == Source::None) segment_override = Source::DS; + extensions_[extension] = ImmediateT( + (length << 6) | (int(repetition) << 4) | ((int(segment_override) & 7) << 1) | int(lock) + ); + ++extension; + } + } }; -static_assert(sizeof(Instruction) <= 8); +static_assert(sizeof(Instruction) <= 16); +static_assert(sizeof(Instruction) <= 10); } } diff --git a/InstructionSets/x86/Model.hpp b/InstructionSets/x86/Model.hpp new file mode 100644 index 000000000..304214475 --- /dev/null +++ b/InstructionSets/x86/Model.hpp @@ -0,0 +1,27 @@ +// +// Model.hpp +// Clock Signal +// +// Created by Thomas Harte on 27/02/2022. +// Copyright © 2022 Thomas Harte. All rights reserved. +// + +#ifndef Model_h +#define Model_h + +namespace InstructionSet { +namespace x86 { + +enum class Model { + i8086, + i80186, + i80286, + i80386, +}; + +static constexpr bool is_32bit(Model model) { return model >= Model::i80386; } + +} +} + +#endif /* Model_h */ diff --git a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj index 7962a7be7..ff3d75cda 100644 --- a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj +++ b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj @@ -981,6 +981,7 @@ 4BE21219253FCE9C00435408 /* AppleIIgs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4BE21214253FCE9C00435408 /* AppleIIgs.cpp */; }; 4BE2121A253FCE9C00435408 /* AppleIIgs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4BE21214253FCE9C00435408 /* AppleIIgs.cpp */; }; 4BE34438238389E10058E78F /* AtariSTVideoTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 4BE34437238389E10058E78F /* AtariSTVideoTests.mm */; }; + 4BE3C69727CC32DC000EAD28 /* x86DataPointerTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 4BE3C69627CC32DC000EAD28 /* x86DataPointerTests.mm */; }; 4BE76CF922641ED400ACD6FA /* QLTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 4BE76CF822641ED300ACD6FA /* QLTests.mm */; }; 4BE8EB6625C750B50040BC40 /* DAT.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4BE8EB6425C750B50040BC40 /* DAT.cpp */; }; 4BE90FFD22D5864800FB464D /* MacintoshVideoTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 4BE90FFC22D5864800FB464D /* MacintoshVideoTests.mm */; }; @@ -2074,10 +2075,12 @@ 4BE3231520532AA7006EF799 /* Target.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Target.hpp; sourceTree = ""; }; 4BE3231620532BED006EF799 /* Target.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Target.hpp; sourceTree = ""; }; 4BE34437238389E10058E78F /* AtariSTVideoTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AtariSTVideoTests.mm; sourceTree = ""; }; + 4BE3C69327C793EF000EAD28 /* DataPointerResolver.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = DataPointerResolver.hpp; sourceTree = ""; }; + 4BE3C69527CBC540000EAD28 /* Model.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Model.hpp; sourceTree = ""; }; + 4BE3C69627CC32DC000EAD28 /* x86DataPointerTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = x86DataPointerTests.mm; sourceTree = ""; }; 4BE76CF822641ED300ACD6FA /* QLTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = QLTests.mm; sourceTree = ""; }; 4BE845201F2FF7F100A5EA22 /* CRTC6845.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = CRTC6845.hpp; sourceTree = ""; }; 4BE8EB5425C0E9D40040BC40 /* Disassembler.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Disassembler.hpp; sourceTree = ""; }; - 4BE8EB5525C0EA490040BC40 /* Sizes.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Sizes.hpp; sourceTree = ""; }; 4BE8EB6425C750B50040BC40 /* DAT.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = DAT.cpp; sourceTree = ""; }; 4BE8EB6525C750B50040BC40 /* DAT.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = DAT.hpp; sourceTree = ""; }; 4BE90FFC22D5864800FB464D /* MacintoshVideoTests.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = MacintoshVideoTests.mm; sourceTree = ""; }; @@ -4120,7 +4123,6 @@ children = ( 4B85322922778E4200F26553 /* Comparative68000.hpp */, 4B90467222C6FA31000E2074 /* TestRunner68000.hpp */, - 4BF7019F26FFD32300996424 /* AmigaBlitterTests.mm */, 4B90467522C6FD6E000E2074 /* 68000ArithmeticTests.mm */, 4B9D0C4A22C7D70900DE1AD3 /* 68000BCDTests.mm */, 4B90467322C6FADD000E2074 /* 68000BitwiseTests.mm */, @@ -4129,6 +4131,7 @@ 4BC5C3DF22C994CC00795658 /* 68000MoveTests.mm */, 4B9D0C4E22C7E0CF00DE1AD3 /* 68000RollShiftTests.mm */, 4BD388872239E198002D14B5 /* 68000Tests.mm */, + 4BF7019F26FFD32300996424 /* AmigaBlitterTests.mm */, 4B924E981E74D22700B76AF1 /* AtariStaticAnalyserTests.mm */, 4BE34437238389E10058E78F /* AtariSTVideoTests.mm */, 4BB2A9AE1E13367E001A5C23 /* CRCTests.mm */, @@ -4149,6 +4152,7 @@ 4B8DD3672633B2D400B3C866 /* SpectrumVideoContentionTests.mm */, 4B2AF8681E513FC20027EE29 /* TIATests.mm */, 4B1D08051E0F7A1100763741 /* TimeTests.mm */, + 4BE3C69627CC32DC000EAD28 /* x86DataPointerTests.mm */, 4BEE4BD325A26E2B00011BD2 /* x86DecoderTests.mm */, 4BDA8234261E8E000021AA19 /* Z80ContentionTests.mm */, 4BB73EB81B587A5100552FC2 /* Info.plist */, @@ -4707,7 +4711,6 @@ 4BEDA42925B3C26B000C2DBD /* AccessType.hpp */, 4BEDA45425B5ECAB000C2DBD /* CachingExecutor.hpp */, 4BE8EB5425C0E9D40040BC40 /* Disassembler.hpp */, - 4BE8EB5525C0EA490040BC40 /* Sizes.hpp */, 4BEDA3B625B25563000C2DBD /* README.md */, 4BEDA40925B2844B000C2DBD /* M50740 */, 4BEDA3B325B25563000C2DBD /* PowerPC */, @@ -4733,6 +4736,8 @@ 4BEDA3B925B25563000C2DBD /* Decoder.cpp */, 4BEDA3B825B25563000C2DBD /* Decoder.hpp */, 4BEDA3DB25B2588F000C2DBD /* Instruction.hpp */, + 4BE3C69327C793EF000EAD28 /* DataPointerResolver.hpp */, + 4BE3C69527CBC540000EAD28 /* Model.hpp */, ); path = x86; sourceTree = ""; @@ -5917,6 +5922,7 @@ 4B778F5C23A5F3070000D260 /* MSX.cpp in Sources */, 4B778F0323A5EBB00000D260 /* FAT12.cpp in Sources */, 4B778F4023A5F1910000D260 /* z8530.cpp in Sources */, + 4BE3C69727CC32DC000EAD28 /* x86DataPointerTests.mm in Sources */, 4B778EFD23A5EB8E0000D260 /* AppleDSK.cpp in Sources */, 4B778EFB23A5EB7E0000D260 /* HFE.cpp in Sources */, 4BC751B21D157E61006C31D9 /* 6522Tests.swift in Sources */, @@ -6118,9 +6124,7 @@ CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; CLANG_WARN_DOCUMENTATION_COMMENTS = YES; CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; - CODE_SIGN_IDENTITY = "-"; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = DV3346VVUN; FRAMEWORK_SEARCH_PATHS = ( "$(inherited)", "$(USER_LIBRARY_DIR)/Frameworks", @@ -6143,9 +6147,7 @@ CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; CLANG_WARN_DOCUMENTATION_COMMENTS = YES; CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; - CODE_SIGN_IDENTITY = "-"; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = DV3346VVUN; FRAMEWORK_SEARCH_PATHS = ( "$(inherited)", "$(USER_LIBRARY_DIR)/Frameworks", @@ -6190,9 +6192,9 @@ CLANG_WARN_SUSPICIOUS_MOVE = YES; CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; - CODE_SIGN_IDENTITY = "-"; COPY_PHASE_STRIP = NO; DEBUG_INFORMATION_FORMAT = dwarf; + DEVELOPMENT_TEAM = DV3346VVUN; ENABLE_STRICT_OBJC_MSGSEND = YES; ENABLE_TESTABILITY = YES; GCC_C_LANGUAGE_STANDARD = gnu99; @@ -6249,9 +6251,9 @@ CLANG_WARN_SUSPICIOUS_MOVE = YES; CLANG_WARN_UNREACHABLE_CODE = YES; CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; - CODE_SIGN_IDENTITY = "Mac Developer"; COPY_PHASE_STRIP = NO; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + DEVELOPMENT_TEAM = DV3346VVUN; ENABLE_NS_ASSERTIONS = NO; ENABLE_STRICT_OBJC_MSGSEND = YES; GCC_C_LANGUAGE_STANDARD = gnu99; @@ -6286,9 +6288,7 @@ CLANG_WARN_SEMICOLON_BEFORE_METHOD_BODY = YES; CLANG_WARN_SUSPICIOUS_IMPLICIT_CONVERSION = YES; CODE_SIGN_ENTITLEMENTS = "Clock Signal/Clock Signal.entitlements"; - CODE_SIGN_IDENTITY = "-"; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = DV3346VVUN; ENABLE_APP_SANDBOX = YES; ENABLE_HARDENED_RUNTIME = YES; FRAMEWORK_SEARCH_PATHS = ( @@ -6318,7 +6318,6 @@ ); PRODUCT_BUNDLE_IDENTIFIER = "TH.Clock-Signal"; PRODUCT_NAME = "$(TARGET_NAME)"; - PROVISIONING_PROFILE_SPECIFIER = ""; SWIFT_OBJC_BRIDGING_HEADER = "Clock Signal/ClockSignal-Bridging-Header.h"; SWIFT_OPTIMIZATION_LEVEL = "-Onone"; }; @@ -6337,9 +6336,7 @@ CLANG_WARN_SEMICOLON_BEFORE_METHOD_BODY = YES; CLANG_WARN_SUSPICIOUS_IMPLICIT_CONVERSION = YES; CODE_SIGN_ENTITLEMENTS = "Clock Signal/Clock Signal.entitlements"; - CODE_SIGN_IDENTITY = "-"; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = DV3346VVUN; ENABLE_APP_SANDBOX = YES; ENABLE_HARDENED_RUNTIME = YES; FRAMEWORK_SEARCH_PATHS = ( @@ -6371,7 +6368,6 @@ ); PRODUCT_BUNDLE_IDENTIFIER = "TH.Clock-Signal"; PRODUCT_NAME = "$(TARGET_NAME)"; - PROVISIONING_PROFILE_SPECIFIER = ""; SWIFT_OBJC_BRIDGING_HEADER = "Clock Signal/ClockSignal-Bridging-Header.h"; }; name = Release; @@ -6382,10 +6378,7 @@ BUNDLE_LOADER = "$(TEST_HOST)"; CLANG_CXX_LANGUAGE_STANDARD = "c++17"; CLANG_ENABLE_MODULES = YES; - CODE_SIGN_ENTITLEMENTS = "Clock Signal/Clock Signal.entitlements"; - CODE_SIGN_IDENTITY = "Apple Development"; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = ""; ENABLE_HARDENED_RUNTIME = NO; INFOPLIST_FILE = "Clock SignalTests/Info.plist"; LD_RUNPATH_SEARCH_PATHS = ( @@ -6396,7 +6389,6 @@ MACOSX_DEPLOYMENT_TARGET = 11.0; PRODUCT_BUNDLE_IDENTIFIER = "TH.Clock-SignalTests"; PRODUCT_NAME = "$(TARGET_NAME)"; - PROVISIONING_PROFILE_SPECIFIER = ""; SWIFT_OBJC_BRIDGING_HEADER = "Clock SignalTests/Bridges/Clock SignalTests-Bridging-Header.h"; SWIFT_OPTIMIZATION_LEVEL = "-Onone"; SWIFT_VERSION = 5.0; @@ -6410,10 +6402,7 @@ BUNDLE_LOADER = "$(TEST_HOST)"; CLANG_CXX_LANGUAGE_STANDARD = "c++17"; CLANG_ENABLE_MODULES = YES; - CODE_SIGN_ENTITLEMENTS = "Clock Signal/Clock Signal.entitlements"; - CODE_SIGN_IDENTITY = "Apple Development"; CODE_SIGN_STYLE = Automatic; - DEVELOPMENT_TEAM = ""; ENABLE_HARDENED_RUNTIME = NO; GCC_OPTIMIZATION_LEVEL = 2; INFOPLIST_FILE = "Clock SignalTests/Info.plist"; @@ -6426,7 +6415,6 @@ ONLY_ACTIVE_ARCH = YES; PRODUCT_BUNDLE_IDENTIFIER = "TH.Clock-SignalTests"; PRODUCT_NAME = "$(TARGET_NAME)"; - PROVISIONING_PROFILE_SPECIFIER = ""; SWIFT_OBJC_BRIDGING_HEADER = "Clock SignalTests/Bridges/Clock SignalTests-Bridging-Header.h"; SWIFT_VERSION = 5.0; TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Clock Signal.app/Contents/MacOS/Clock Signal"; diff --git a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm new file mode 100644 index 000000000..8db794516 --- /dev/null +++ b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm @@ -0,0 +1,103 @@ +// +// x86DataPointerTests.m +// Clock Signal +// +// Created by Thomas Harte on 27/02/2022. +// Copyright 2022 Thomas Harte. All rights reserved. +// + +#import + +#include "../../../InstructionSets/x86/DataPointerResolver.hpp" +#include + +using namespace InstructionSet::x86; + +@interface x86DataPointerTests : XCTestCase +@end + +@implementation x86DataPointerTests + +- (void)test16bitSize1 { + const DataPointer indirectPointer( + Source::eAX, Source::eDI, 0 + ); + const DataPointer registerPointer( + Source::eBX + ); + + struct Registers { + uint16_t ax = 0x1234, di = 0x00ee; + uint8_t bl = 0xaa; + + template DataT read() { + assert(is_sized(r)); + switch(r) { + case Register::AX: return ax; + case Register::BL: return bl; + case Register::DI: return di; + default: return 0; + } + } + template void write(DataT value) { + assert(is_sized(r)); + switch(r) { + case Register::BL: bl = value; break; + default: assert(false); + } + } + } registers; + + struct Memory { + std::map data; + + template DataT read(Source, uint32_t address) { + if(address == 0x1234 + 0x00ee) return 0xff; + return 0; + } + template void write(Source, uint32_t address, DataT value) { + data[address] = value; + } + } memory; + + // TODO: construct this more formally; the code below just assumes size = 1, which is not a contractual guarantee. + const auto instruction = Instruction(); + + using Resolver = DataPointerResolver; + const uint8_t memoryValue = Resolver::read( + registers, + memory, + instruction, + indirectPointer + ); + registers.ax = 0x0100; + Resolver::write( + registers, + memory, + instruction, + indirectPointer, + 0xef + ); + + XCTAssertEqual(memoryValue, 0xff); + XCTAssertEqual(memory.data[0x01ee], 0xef); + + const uint8_t registerValue = Resolver::read( + registers, + memory, + instruction, + registerPointer + ); + Resolver::write( + registers, + memory, + instruction, + registerPointer, + 0x93 + ); + + XCTAssertEqual(registerValue, 0xaa); + XCTAssertEqual(registers.bl, 0x93); +} + +@end diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm index 8f5cf18c6..77118a266 100644 --- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm +++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm @@ -9,91 +9,54 @@ #import #include +#include #include #include "../../../InstructionSets/x86/Decoder.hpp" +#include "../../../InstructionSets/x86/DataPointerResolver.hpp" + +using namespace InstructionSet::x86; namespace { - using Operation = InstructionSet::x86::Operation; - using Instruction = InstructionSet::x86::Instruction; - using Source = InstructionSet::x86::Source; - using Size = InstructionSet::x86::Size; -} - -@interface x86DecoderTests : XCTestCase -@end - -/*! - Tests 8086 decoding by throwing a bunch of randomly-generated - word streams and checking that the result matches what I got from a - disassembler elsewhere. -*/ -@implementation x86DecoderTests { - std::vector instructions; -} // MARK: - Specific instruction asserts. -- (void)assert:(Instruction &)instruction operation:(Operation)operation { +template void test(const InstructionT &instruction, DataSize size, Operation operation) { + XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::DataSize(size)); XCTAssertEqual(instruction.operation, operation); } -- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size { +template void test( + const InstructionT &instruction, + DataSize size, + Operation operation, + std::optional source, + std::optional destination = std::nullopt, + std::optional operand = std::nullopt, + std::optional displacement = std::nullopt) { + + XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::DataSize(size)); XCTAssertEqual(instruction.operation, operation); - XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::Size(size)); + if(source) XCTAssert(instruction.source() == *source); + if(destination) XCTAssert(instruction.destination() == *destination); + if(operand) XCTAssertEqual(instruction.operand(), *operand); + if(displacement) XCTAssertEqual(instruction.displacement(), *displacement); } -- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(Source)source destination:(Source)destination displacement:(int16_t)displacement { +template void test( + const InstructionT &instruction, + Operation operation, + std::optional operand = std::nullopt, + std::optional displacement = std::nullopt) { XCTAssertEqual(instruction.operation, operation); - XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::Size(size)); - XCTAssertEqual(instruction.source(), source); - XCTAssertEqual(instruction.destination(), destination); - XCTAssertEqual(instruction.displacement(), displacement); + if(operand) XCTAssertEqual(instruction.operand(), *operand); + if(displacement) XCTAssertEqual(instruction.displacement(), *displacement); } -- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(Source)source destination:(Source)destination displacement:(int16_t)displacement operand:(uint16_t)operand { - [self assert:instruction operation:operation size:size source:source destination:destination displacement:displacement]; - XCTAssertEqual(instruction.operand(), operand); -} - -- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(Source)source destination:(Source)destination operand:(uint16_t)operand { - [self assert:instruction operation:operation size:size source:source destination:destination displacement:0 operand:operand]; -} - -- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(Source)source destination:(Source)destination { - [self assert:instruction operation:operation size:size source:source destination:destination displacement:0]; -} - -- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(Source)source { - XCTAssertEqual(instruction.operation, operation); - XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::Size(size)); - XCTAssertEqual(instruction.source(), source); -} - -- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size destination:(Source)destination { - [self assert:instruction operation:operation size:size]; - XCTAssertEqual(instruction.destination(), destination); -} - -- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size operand:(uint16_t)operand destination:(Source)destination { - [self assert:instruction operation:operation size:size]; - XCTAssertEqual(instruction.destination(), destination); - XCTAssertEqual(instruction.source(), Source::Immediate); - XCTAssertEqual(instruction.operand(), operand); - XCTAssertEqual(instruction.displacement(), 0); -} - -- (void)assert:(Instruction &)instruction operation:(Operation)operation displacement:(int16_t)displacement { - XCTAssertEqual(instruction.operation, operation); - XCTAssertEqual(instruction.displacement(), displacement); -} - -- (void)assert:(Instruction &)instruction operation:(Operation)operation operand:(uint16_t)operand { - XCTAssertEqual(instruction.operation, operation); - XCTAssertEqual(instruction.operand(), operand); - XCTAssertEqual(instruction.displacement(), 0); -} - -- (void)assert:(Instruction &)instruction operation:(Operation)operation segment:(uint16_t)segment offset:(uint16_t)offset { +template void test_far( + const InstructionT &instruction, + Operation operation, + uint16_t segment, + typename InstructionT::DisplacementT offset) { XCTAssertEqual(instruction.operation, operation); XCTAssertEqual(instruction.segment(), segment); XCTAssertEqual(instruction.offset(), offset); @@ -101,9 +64,32 @@ namespace { // MARK: - Decoder -- (void)decode:(const std::initializer_list &)stream { +template +std::vector::InstructionT> +decode(const CollectionT &stream, bool set_32_bit = false) { + // Build instructions list with a byte-by-byte decoding. + std::vector::InstructionT> instructions; + + InstructionSet::x86::Decoder decoder; + decoder.set_32bit_protected_mode(set_32_bit); + + for(uint8_t item: stream) { + const auto [size, next] = decoder.decode(&item, 1); + if(size > 0) { + instructions.push_back(next); + } + } + + return instructions; +} + +template +std::vector::InstructionT> +decode(const std::initializer_list &stream, bool set_32_bit = false) { // Decode by offering up all data at once. - InstructionSet::x86::Decoder decoder(InstructionSet::x86::Model::i8086); + std::vector::InstructionT> instructions; + InstructionSet::x86::Decoder decoder; + decoder.set_32bit_protected_mode(set_32_bit); instructions.clear(); const uint8_t *byte = stream.begin(); while(byte != stream.end()) { @@ -114,23 +100,35 @@ namespace { } // Grab a byte-at-a-time decoding and check that it matches the previous. - { - InstructionSet::x86::Decoder decoder(InstructionSet::x86::Model::i8086); + const auto byte_instructions = decode(std::vector{stream}, set_32_bit); - auto previous_instruction = instructions.begin(); - for(auto item: stream) { - const auto [size, next] = decoder.decode(&item, 1); - if(size > 0) { - XCTAssert(next == *previous_instruction); - ++previous_instruction; - } - } + XCTAssertEqual(byte_instructions.size(), instructions.size()); + + auto previous_instruction = instructions.begin(); + auto byte_instruction = byte_instructions.begin(); + while(previous_instruction != instructions.end()) { + XCTAssert(*previous_instruction == *byte_instruction); + + ++previous_instruction; + ++byte_instruction; } + + return instructions; } -// MARK: - Tests +} -- (void)testSequence1 { +@interface x86DecoderTests : XCTestCase +@end + +/*! + Tests 8086 decoding by throwing a bunch of randomly-generated + word streams and checking that the result matches what I got from a + disassembler elsewhere. +*/ +@implementation x86DecoderTests + +- (void)test16BitSequence { // Sequences the Online Disassembler believes to exist but The 8086 Book does not: // // 0x6a 0x65 push $65 @@ -139,7 +137,7 @@ namespace { // 0x6c insb (%dx), %es:(%di) // 0xc9 leave // - [self decode:{ + const auto instructions = decode({ 0x2d, 0x77, 0xea, 0x72, 0xfc, 0x4b, 0xb5, 0x28, 0xc3, 0xca, 0x26, 0x48, /* 0x65, 0x6d, */ 0x7b, 0x9f, 0xc2, 0x65, 0x42, 0x4e, 0xef, 0x70, 0x20, 0x94, 0xc4, 0xd4, 0x93, 0x43, 0x3c, 0x8e, /* 0x6a, 0x65, */ 0x1a, 0x78, 0x45, 0x10, 0x7f, 0x3c, 0x19, 0x5a, 0x16, 0x31, 0x64, 0x2c, 0xe7, 0xc6, 0x7d, 0xb0, @@ -148,7 +146,7 @@ namespace { 0xbd, 0xa1, 0x12, 0xc5, 0x29, /* 0xc9, */ 0x9e, 0xd8, 0xf3, 0xcf, 0x92, 0x39, 0x5d, 0x90, 0x15, 0xc3, 0xb8, 0xad, 0xe8, 0xc8, 0x16, 0x4a, 0xb0, 0x9e, 0xf9, 0xbf, 0x56, 0xea, 0x4e, 0xfd, 0xe4, 0x5a, 0x23, 0xaa, 0x2c, 0x5b, 0x2a, 0xd2, 0xf7, 0x5f, 0x18, 0x86, 0x90, 0x25, 0x64, 0xb7, 0xc3 - }]; + }); // 63 instructions are expected. XCTAssertEqual(instructions.size(), 63); @@ -157,29 +155,29 @@ namespace { // jb 0x00000001 // dec %bx // mov $0x28,%ch - [self assert:instructions[0] operation:Operation::SUB size:2 operand:0xea77 destination:Source::AX]; - [self assert:instructions[1] operation:Operation::JB displacement:0xfffc]; - [self assert:instructions[2] operation:Operation::DEC size:2 source:Source::BX destination:Source::BX]; - [self assert:instructions[3] operation:Operation::MOV size:1 operand:0x28 destination:Source::CH]; + test(instructions[0], DataSize::Word, Operation::SUB, Source::Immediate, Source::eAX, 0xea77); + test(instructions[1], Operation::JB, std::nullopt, 0xfffc); + test(instructions[2], DataSize::Word, Operation::DEC, Source::eBX, Source::eBX); + test(instructions[3], DataSize::Byte, Operation::MOV, Source::Immediate, Source::CH, 0x28); // ret // lret $0x4826 // [[ omitted: gs insw (%dx),%es:(%di) ]] // jnp 0xffffffaf // ret $0x4265 - [self assert:instructions[4] operation:Operation::RETN]; - [self assert:instructions[5] operation:Operation::RETF operand:0x4826]; - [self assert:instructions[6] operation:Operation::JNP displacement:0xff9f]; - [self assert:instructions[7] operation:Operation::RETN operand:0x4265]; + test(instructions[4], Operation::RETnear); + test(instructions[5], Operation::RETfar, 0x4826); + test(instructions[6], Operation::JNP, std::nullopt, 0xff9f); + test(instructions[7], Operation::RETnear, 0x4265); // dec %si // out %ax,(%dx) // jo 0x00000037 // xchg %ax,%sp - [self assert:instructions[8] operation:Operation::DEC size:2 source:Source::SI destination:Source::SI]; - [self assert:instructions[9] operation:Operation::OUT size:2 source:Source::AX destination:Source::DX]; - [self assert:instructions[10] operation:Operation::JO displacement:0x20]; - [self assert:instructions[11] operation:Operation::XCHG size:2 source:Source::AX destination:Source::SP]; + test(instructions[8], DataSize::Word, Operation::DEC, Source::eSI, Source::eSI); + test(instructions[9], DataSize::Word, Operation::OUT, Source::eAX, Source::eDX); + test(instructions[10], Operation::JO, std::nullopt, 0x20); + test(instructions[11], DataSize::Word, Operation::XCHG, Source::eAX, Source::eSP); // ODA has: // c4 (bad) @@ -190,145 +188,496 @@ namespace { // // c4 d4 (bad) // 93 XCHG AX, BX - [self assert:instructions[12] operation:Operation::Invalid]; - [self assert:instructions[13] operation:Operation::XCHG size:2 source:Source::AX destination:Source::BX]; + test(instructions[12], Operation::Invalid); + test(instructions[13], DataSize::Word, Operation::XCHG, Source::eAX, Source::eBX); // inc %bx // cmp $0x8e,%al // [[ omitted: push $0x65 ]] // sbb 0x45(%bx,%si),%bh // adc %bh,0x3c(%bx) - [self assert:instructions[14] operation:Operation::INC size:2 source:Source::BX destination:Source::BX]; - [self assert:instructions[15] operation:Operation::CMP size:1 operand:0x8e destination:Source::AL]; - [self assert:instructions[16] operation:Operation::SBB size:1 source:Source::IndBXPlusSI destination:Source::BH displacement:0x45]; - [self assert:instructions[17] operation:Operation::ADC size:1 source:Source::BH destination:Source::IndBX displacement:0x3c]; + test(instructions[14], DataSize::Word, Operation::INC, Source::eBX, Source::eBX); + test(instructions[15], DataSize::Byte, Operation::CMP, Source::Immediate, Source::eAX, 0x8e); + test(instructions[16], DataSize::Byte, Operation::SBB, ScaleIndexBase(Source::eBX, Source::eSI), Source::BH, std::nullopt, 0x45); + test(instructions[17], DataSize::Byte, Operation::ADC, Source::BH, ScaleIndexBase(Source::eBX), std::nullopt, 0x3c); // sbb %bx,0x16(%bp,%si) // xor %sp,0x2c(%si) // out %ax,$0xc6 // jge 0xffffffe0 - [self assert:instructions[18] operation:Operation::SBB size:2 source:Source::BX destination:Source::IndBPPlusSI displacement:0x16]; - [self assert:instructions[19] operation:Operation::XOR size:2 source:Source::SP destination:Source::IndSI displacement:0x2c]; - [self assert:instructions[20] operation:Operation::OUT size:2 source:Source::AX destination:Source::DirectAddress operand:0xc6]; - [self assert:instructions[21] operation:Operation::JNL displacement:0xffb0]; + test(instructions[18], DataSize::Word, Operation::SBB, Source::eBX, ScaleIndexBase(Source::eBP, Source::eSI), std::nullopt, 0x16); + test(instructions[19], DataSize::Word, Operation::XOR, Source::eSP, ScaleIndexBase(Source::eSI), std::nullopt, 0x2c); + test(instructions[20], DataSize::Word, Operation::OUT, Source::eAX, Source::DirectAddress, 0xc6); + test(instructions[21], Operation::JNL, std::nullopt, 0xffb0); // mov $0x49,%ch // [[ omitted: addr32 popa ]] // mov $0xcbc0,%dx // adc $0x7e,%al // jno 0x0000000b - [self assert:instructions[22] operation:Operation::MOV size:1 operand:0x49 destination:Source::CH]; - [self assert:instructions[23] operation:Operation::MOV size:2 operand:0xcbc0 destination:Source::DX]; - [self assert:instructions[24] operation:Operation::ADC size:1 operand:0x7e destination:Source::AL]; - [self assert:instructions[25] operation:Operation::JNO displacement:0xffd0]; + test(instructions[22], DataSize::Byte, Operation::MOV, Source::Immediate, Source::CH, 0x49); + test(instructions[23], DataSize::Word, Operation::MOV, Source::Immediate, Source::eDX, 0xcbc0); + test(instructions[24], DataSize::Byte, Operation::ADC, Source::Immediate, Source::eAX, 0x7e); + test(instructions[25], Operation::JNO, std::nullopt, 0xffd0); // push %ax // js 0x0000007b // add (%di),%bx // in $0xc9,%ax - [self assert:instructions[26] operation:Operation::PUSH size:2 source:Source::AX]; - [self assert:instructions[27] operation:Operation::JS displacement:0x3d]; - [self assert:instructions[28] operation:Operation::ADD size:2 source:Source::IndDI destination:Source::BX]; - [self assert:instructions[29] operation:Operation::IN size:2 source:Source::DirectAddress destination:Source::AX operand:0xc9]; + test(instructions[26], DataSize::Word, Operation::PUSH, Source::eAX); + test(instructions[27], Operation::JS, std::nullopt, 0x3d); + test(instructions[28], DataSize::Word, Operation::ADD, ScaleIndexBase(Source::eDI), Source::eBX); + test(instructions[29], DataSize::Word, Operation::IN, Source::DirectAddress, Source::eAX, 0xc9); // xchg %ax,%di // ret // fwait // out %al,$0xd3 - [self assert:instructions[30] operation:Operation::XCHG size:2 source:Source::AX destination:Source::DI]; - [self assert:instructions[31] operation:Operation::RETN]; - [self assert:instructions[32] operation:Operation::WAIT]; - [self assert:instructions[33] operation:Operation::OUT size:1 source:Source::AL destination:Source::DirectAddress operand:0xd3]; + test(instructions[30], DataSize::Word, Operation::XCHG, Source::eAX, Source::eDI); + test(instructions[31], Operation::RETnear); + test(instructions[32], Operation::WAIT); + test(instructions[33], DataSize::Byte, Operation::OUT, Source::eAX, Source::DirectAddress, 0xd3); // [[ omitted: insb (%dx),%es:(%di) ]] // pop %ax // dec %bp // jbe 0xffffffcc // inc %sp - [self assert:instructions[34] operation:Operation::POP size:2 destination:Source::AX]; - [self assert:instructions[35] operation:Operation::DEC size:2 source:Source::BP destination:Source::BP]; - [self assert:instructions[36] operation:Operation::JBE displacement:0xff80]; - [self assert:instructions[37] operation:Operation::INC size:2 source:Source::SP destination:Source::SP]; + test(instructions[34], DataSize::Word, Operation::POP, Source::eAX, Source::eAX); + test(instructions[35], DataSize::Word, Operation::DEC, Source::eBP, Source::eBP); + test(instructions[36], Operation::JBE, std::nullopt, 0xff80); + test(instructions[37], DataSize::Word, Operation::INC, Source::eSP, Source::eSP); // (bad) // lahf // movsw %ds:(%si),%es:(%di) // mov $0x12a1,%bp - [self assert:instructions[38] operation:Operation::Invalid]; - [self assert:instructions[39] operation:Operation::LAHF]; - [self assert:instructions[40] operation:Operation::MOVS size:2]; - [self assert:instructions[41] operation:Operation::MOV size:2 operand:0x12a1 destination:Source::BP]; + test(instructions[38], Operation::Invalid); + test(instructions[39], Operation::LAHF); + test(instructions[40], DataSize::Word, Operation::MOVS); // Arguments are implicit. + test(instructions[41], DataSize::Word, Operation::MOV, Source::Immediate, Source::eBP, 0x12a1); // lds (%bx,%di),%bp // [[ omitted: leave ]] // sahf // fdiv %st(3),%st // iret - [self assert:instructions[42] operation:Operation::LDS size:2]; - [self assert:instructions[43] operation:Operation::SAHF]; - [self assert:instructions[44] operation:Operation::ESC]; - [self assert:instructions[45] operation:Operation::IRET]; + test(instructions[42], DataSize::Word, Operation::LDS); + test(instructions[43], Operation::SAHF); + test(instructions[44], Operation::ESC); + test(instructions[45], Operation::IRET); // xchg %ax,%dx // cmp %bx,-0x70(%di) // adc $0xb8c3,%ax // lods %ds:(%si),%ax - [self assert:instructions[46] operation:Operation::XCHG size:2 source:Source::AX destination:Source::DX]; - [self assert:instructions[47] operation:Operation::CMP size:2 source:Source::BX destination:Source::IndDI displacement:0xff90]; - [self assert:instructions[48] operation:Operation::ADC size:2 operand:0xb8c3 destination:Source::AX]; - [self assert:instructions[49] operation:Operation::LODS size:2]; + test(instructions[46], DataSize::Word, Operation::XCHG, Source::eAX, Source::eDX); + test(instructions[47], DataSize::Word, Operation::CMP, Source::eBX, ScaleIndexBase(Source::eDI), std::nullopt, 0xff90); + test(instructions[48], DataSize::Word, Operation::ADC, Source::Immediate, Source::eAX, 0xb8c3); + test(instructions[49], DataSize::Word, Operation::LODS); // call 0x0000172d // dec %dx // mov $0x9e,%al // stc - [self assert:instructions[50] operation:Operation::CALLD operand:0x16c8]; - [self assert:instructions[51] operation:Operation::DEC size:2 source:Source::DX destination:Source::DX]; - [self assert:instructions[52] operation:Operation::MOV size:1 operand:0x9e destination:Source::AL]; - [self assert:instructions[53] operation:Operation::STC]; + test(instructions[50], Operation::CALLrel, 0, 0x16c8); + test(instructions[51], DataSize::Word, Operation::DEC, Source::eDX, Source::eDX); + test(instructions[52], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eAX, 0x9e); + test(instructions[53], Operation::STC); // mov $0xea56,%di // dec %si // std // in $0x5a,%al - [self assert:instructions[54] operation:Operation::MOV size:2 operand:0xea56 destination:Source::DI]; - [self assert:instructions[55] operation:Operation::DEC size:2 source:Source::SI destination:Source::SI]; - [self assert:instructions[56] operation:Operation::STD]; - [self assert:instructions[57] operation:Operation::IN size:1 source:Source::DirectAddress destination:Source::AL operand:0x5a]; + test(instructions[54], DataSize::Word, Operation::MOV, Source::Immediate, Source::eDI, 0xea56); + test(instructions[55], DataSize::Word, Operation::DEC, Source::eSI, Source::eSI); + test(instructions[56], Operation::STD); + test(instructions[57], DataSize::Byte, Operation::IN, Source::DirectAddress, Source::eAX, 0x5a); // and 0x5b2c(%bp,%si),%bp // sub %dl,%dl // negw 0x18(%bx) // xchg %dl,0x6425(%bx,%si) - [self assert:instructions[58] operation:Operation::AND size:2 source:Source::IndBPPlusSI destination:Source::BP displacement:0x5b2c]; - [self assert:instructions[59] operation:Operation::SUB size:1 source:Source::DL destination:Source::DL]; - [self assert:instructions[60] operation:Operation::NEG size:2 source:Source::IndBX destination:Source::IndBX displacement:0x18]; - [self assert:instructions[61] operation:Operation::XCHG size:1 source:Source::IndBXPlusSI destination:Source::DL displacement:0x6425]; + test(instructions[58], DataSize::Word, Operation::AND, ScaleIndexBase(Source::eBP, Source::eSI), Source::eBP, std::nullopt, 0x5b2c); + test(instructions[59], DataSize::Byte, Operation::SUB, Source::eDX, Source::eDX); + test(instructions[60], DataSize::Word, Operation::NEG, ScaleIndexBase(Source::eBX), ScaleIndexBase(Source::eBX), std::nullopt, 0x18); + test(instructions[61], DataSize::Byte, Operation::XCHG, ScaleIndexBase(Source::eBX, Source::eSI), Source::eDX, std::nullopt, 0x6425); // mov $0xc3,%bh - [self assert:instructions[62] operation:Operation::MOV size:1 operand:0xc3 destination:Source::BH]; + test(instructions[62], DataSize::Byte, Operation::MOV, Source::Immediate, Source::BH, 0xc3); } - (void)test83 { - [self decode:{ + const auto instructions = decode({ 0x83, 0x10, 0x80, // adcw $0xff80,(%bx,%si) 0x83, 0x3b, 0x04, // cmpw $0x4,(%bp,%di) 0x83, 0x2f, 0x09, // subw $0x9,(%bx) - }]; + }); XCTAssertEqual(instructions.size(), 3); - [self assert:instructions[0] operation:Operation::ADC size:2 source:Source::Immediate destination:Source::IndBXPlusSI operand:0xff80]; - [self assert:instructions[1] operation:Operation::CMP size:2 source:Source::Immediate destination:Source::IndBPPlusDI operand:0x4]; - [self assert:instructions[2] operation:Operation::SUB size:2 source:Source::Immediate destination:Source::IndBX operand:0x9]; + test(instructions[0], DataSize::Word, Operation::ADC, Source::Immediate, ScaleIndexBase(Source::eBX, Source::eSI), 0xff80); + test(instructions[1], DataSize::Word, Operation::CMP, Source::Immediate, ScaleIndexBase(Source::eBP, Source::eDI), 0x4); + test(instructions[2], DataSize::Word, Operation::SUB, Source::Immediate, ScaleIndexBase(Source::eBX), 0x9); } - (void)testFar { - [self decode:{ + const auto instructions = decode({ 0x9a, 0x12, 0x34, 0x56, 0x78, // lcall 0x7856, 0x3412 - }]; + }); XCTAssertEqual(instructions.size(), 1); - [self assert:instructions[0] operation:Operation::CALLF segment:0x7856 offset:0x3412]; + test_far(instructions[0], Operation::CALLfar, 0x7856, 0x3412); +} + +- (void)testLDSLESEtc { + auto run_test = [](bool is_32, DataSize size) { + const auto instructions = decode({ + 0xc5, 0x33, // 16-bit: lds si, (bp, di); 32-bit: lds esi, (ebx) + 0xc4, 0x17, // 16-bit: les dx, (bx); 32-bit: les edx, (edi) + 0x0f, 0xb2, 0x17, // 16-bit: lss dx, (bx); 32-bit: lss edx, (edi) + }, is_32); + + XCTAssertEqual(instructions.size(), 3); + if(is_32) { + test(instructions[0], size, Operation::LDS, ScaleIndexBase(Source::eBX), Source::eSI); + test(instructions[1], size, Operation::LES, ScaleIndexBase(Source::eDI), Source::eDX); + test(instructions[2], size, Operation::LSS, ScaleIndexBase(Source::eDI), Source::eDX); + } else { + test(instructions[0], size, Operation::LDS, ScaleIndexBase(Source::eBP, Source::eDI), Source::eSI); + test(instructions[1], size, Operation::LES, ScaleIndexBase(Source::eBX), Source::eDX); + test(instructions[2], size, Operation::LSS, ScaleIndexBase(Source::eBX), Source::eDX); + } + }; + + run_test(false, DataSize::Word); + run_test(true, DataSize::DWord); +} + +- (void)testSIB { + const auto instructions = decode({ + // add edx, -0x7d(ebp + eax*2) + 0x01, 0x54, 0x45, 0x83, + + // add edx, -0x80(si) + 0x67, 0x01, 0x54, 0x80, + }, true); + + XCTAssertEqual(instructions.size(), 2); + test(instructions[0], DataSize::DWord, Operation::ADD, Source::eDX, ScaleIndexBase(1, Source::eAX, Source::eBP), 0x00, -125); + test(instructions[1], DataSize::DWord, Operation::ADD, Source::eDX, ScaleIndexBase(Source::eSI), 0x00, -128); + XCTAssertEqual(instructions[1].address_size(), AddressSize::b16); +} + +- (void)testJMP { + const auto instructions = decode({ + // JMP +0x00efcdab + 0xe9, 0xab, 0xcd, 0xef, 0x00, + // JMP 0xc389:0x67452301 + 0xea, 0x01, 0x23, 0x45, 0x67, 0x89, 0xc3, + // JMP -79 + 0xeb, 0xb1, + // JMP DWORD (edx) + 0xff, 0x22, + // JMP FWORD (eax) + 0xff, 0x28, + }, true); + + XCTAssertEqual(instructions.size(), 5); + test(instructions[0], Operation::JMPrel, 0, 0xefcdab); + test_far(instructions[1], Operation::JMPfar, 0xc389, 0x67452301); + test(instructions[2], Operation::JMPrel, 0, -79); + test(instructions[3], DataSize::DWord, Operation::JMPabs, ScaleIndexBase(Source::eDX)); + test(instructions[4], DataSize::DWord, Operation::JMPfar, ScaleIndexBase(Source::eAX)); +} + +- (void)test32bitSequence { + const auto instructions = decode({ + 0x2e, 0x42, 0x0c, 0x09, 0x81, 0x47, 0xbe, 0xa9, 0x3a, 0x68, 0x9f, 0xf0, 0x7a, 0xe2, 0x3e, 0xb4, + 0xc1, 0x1f, 0xaa, 0x60, 0xb4, 0xe1, 0x91, 0xdc, 0xf6, 0x62, 0x90, 0x90, 0xdf, 0xcd, 0xf9, 0x0f, + 0xbb, 0x71, 0x4b, 0x58, 0x55, 0x38, 0x2c, 0xf9, 0x50, 0xfe, 0xce, 0xe0, 0xc1, 0xda, 0x83, 0x8c, + 0x19, 0x0c, 0x9b, 0x89, 0x13, 0x34, 0x45, 0xc5, 0x11, 0xa2, 0xd3, 0xa6, 0xdb, 0xe4, 0x1f, 0xa5, + 0x79, 0xf3, 0x7d, 0x1c, 0xb8, 0xda, 0x6b, 0x76, 0x8a, 0x79, 0x28, 0x52, 0xcd, 0xc4, 0xe9, 0xba, + 0x11, 0xcf, 0x29, 0x09, 0x46, 0x1a, 0xc0, 0x5d, 0x88, 0x34, 0xa5, 0x83, 0xe2, 0xd0, 0xf5, 0x44, + 0x9d, 0xa5, 0xc1, 0x5e, 0x4f, 0x07, 0x51, 0xd4, 0xed, 0xb0, 0x69, 0xd7, 0x00, 0xc5, 0x51, 0xfb, + 0x68, 0x85, 0x3a, 0x8b, 0x69, 0x28, 0x0c, 0xec, 0xb1, 0xb7, 0x3b, 0x8d, 0x5f, 0x44, 0x87, 0x2c, + 0xe3, 0x02, 0x9e, 0x74, 0x6e, 0x1b, 0x8f, 0x4d, 0xc5, 0x33, 0x04, 0x9f, 0xac, 0xc0, 0xc9, 0x60, + 0x9a, 0x8a, 0xf5, 0xd0, 0x97, 0x1b, 0xe2, 0x64, 0x60, 0xb0, 0xcf, 0xe3, 0x37, + }, true); + + XCTAssertEqual(instructions.size(), 64); + + // cs inc edx + // or al,0x9 + // add DWORD PTR [edi-0x42],0x9f683aa9 + // lock jp 0xfffffff0 (from 0000000e) + test(instructions[0], DataSize::DWord, Operation::INC, Source::eDX); + XCTAssertEqual(instructions[0].data_segment(), Source::CS); + test(instructions[1], DataSize::Byte, Operation::OR, Source::Immediate, Source::eAX, 0x9); + test(instructions[2], DataSize::DWord, Operation::ADD, Source::Immediate, ScaleIndexBase(Source::eDI), 0x9f683aa9, -0x42); + test(instructions[3], Operation::JP, 0, -30); + XCTAssert(instructions[3].lock()); + + // ds mov ah,0xc1 + // pop ds + // stos BYTE PTR es:[edi],al + // pusha + test(instructions[4], DataSize::Byte, Operation::MOV, Source::Immediate, Source::AH, 0xc1); + XCTAssertEqual(instructions[4].data_segment(), Source::DS); + test(instructions[5], DataSize::Word, Operation::POP, Source::None, Source::DS); + test(instructions[6], DataSize::Byte, Operation::STOS); + test(instructions[7], Operation::PUSHA); + + // mov ah,0xe1 + // xchg ecx,eax + // fdivr st(6),st + // bound edx,QWORD PTR [eax-0x6322070] + test(instructions[8], DataSize::Byte, Operation::MOV, Source::Immediate, Source::AH, 0xe1); + test(instructions[9], DataSize::DWord, Operation::XCHG, Source::eAX, Source::eCX); + test(instructions[10], DataSize::None, Operation::ESC); + test(instructions[11], DataSize::DWord, Operation::BOUND, ScaleIndexBase(Source::eAX), Source::eDX, 0, -0x6322070); + + // btc DWORD PTR [ecx+0x4b],esi + // pop eax + // push ebp + // cmp BYTE PTR [ecx+edi*8],ch + test(instructions[12], DataSize::DWord, Operation::BTC, Source::eSI, ScaleIndexBase(Source::eCX), 0, 0x4b); + test(instructions[13], DataSize::DWord, Operation::POP, Source::eAX, Source::eAX); + test(instructions[14], DataSize::DWord, Operation::PUSH, Source::eBP); + test(instructions[15], DataSize::Byte, Operation::CMP, Source::CH, ScaleIndexBase(3, Source::eDI, Source::eCX)); + + // Possibly TODO: pick a lane on whether PUSH/POP duplicate source and destination. + // It doesn't really matter outside of these tests though. + + // push eax + // dec dh + // loopne 0xffffffee (from 0x2d) + // fiadd DWORD PTR [ebx-0x64f3e674] + test(instructions[16], DataSize::DWord, Operation::PUSH, Source::eAX); + test(instructions[17], DataSize::Byte, Operation::DEC, Source::DH); + test(instructions[18], Operation::LOOPNE, 0, -63); + test(instructions[19], Operation::ESC); + + // mov DWORD PTR [ebx],edx + // xor al,0x45 + // lds edx,FWORD PTR [ecx] + // mov ds:0xe4dba6d3,al + test(instructions[20], DataSize::DWord, Operation::MOV, Source::eDX, ScaleIndexBase(Source::eBX)); + test(instructions[21], DataSize::Byte, Operation::XOR, Source::Immediate, Source::eAX, 0x45); + test(instructions[22], DataSize::DWord, Operation::LDS, ScaleIndexBase(Source::eCX), Source::eDX); + test(instructions[23], DataSize::Byte, Operation::MOV, Source::eAX, Source::DirectAddress, 0xe4dba6d3); + XCTAssertEqual(instructions[23].data_segment(), Source::DS); + + // pop ds + // movs DWORD PTR es:[edi],DWORD PTR ds:[esi] + // jns 0x00000035 (from 0x42) + // jge 0x00000060 (from 0x44) + test(instructions[24], DataSize::Word, Operation::POP, Source::None, Source::DS); + test(instructions[25], DataSize::DWord, Operation::MOVS); + test(instructions[26], Operation::JNS, 0, -0xd); + test(instructions[27], Operation::JNL, 0, 0x1c); + + // mov eax,0x8a766bda + // jns 0x00000073 (from 0x4b) + // push edx + // int 0xc4 + test(instructions[28], DataSize::DWord, Operation::MOV, Source::Immediate, Source::eAX, 0x8a766bda); + test(instructions[29], Operation::JNS, 0, 0x28); + test(instructions[30], DataSize::DWord, Operation::PUSH, Source::eDX); + test(instructions[31], Operation::INT, 0xc4); + + // jmp 0x29cf120d (from 0x53) + // or DWORD PTR [esi+0x1a],eax + // rcr BYTE PTR [ebp-0x78],0x34 + // movs DWORD PTR es:[edi],DWORD PTR ds:[esi] + test(instructions[32], Operation::JMPrel, 0, 0x29cf120d - 0x53); + test(instructions[33], DataSize::DWord, Operation::OR, Source::eAX, ScaleIndexBase(Source::eSI), 0, 0x1a); + test(instructions[34], DataSize::Byte, Operation::RCR, Source::Immediate, ScaleIndexBase(Source::eBP), 0x34, -0x78); + test(instructions[35], DataSize::DWord, Operation::MOVS); + + // and edx,0xffffffd0 + // cmc + // inc esp + // popf + test(instructions[36], DataSize::DWord, Operation::AND, Source::Immediate, Source::eDX); + test(instructions[37], DataSize::None, Operation::CMC); + test(instructions[38], DataSize::DWord, Operation::INC, Source::eSP); + test(instructions[39], DataSize::DWord, Operation::POPF); + + // movs DWORD PTR es:[edi],DWORD PTR ds:[esi] + // rcr DWORD PTR [esi+0x4f],0x7 + // push ecx + // aam 0xed + test(instructions[40], DataSize::DWord, Operation::MOVS); + test(instructions[41], DataSize::DWord, Operation::RCR, Source::Immediate, ScaleIndexBase(Source::eSI), 0x07, 0x4f); + test(instructions[42], DataSize::DWord, Operation::PUSH, Source::eCX); + test(instructions[43], Operation::AAM, 0xed); + + // mov al,0x69 + // xlat BYTE PTR ds:[ebx] + // add ch,al + // push ecx + test(instructions[44], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eAX, 0x69); + test(instructions[45], Operation::XLAT); + test(instructions[46], DataSize::Byte, Operation::ADD, Source::eAX, Source::CH); + test(instructions[47], DataSize::DWord, Operation::PUSH, Source::eCX); + + // sti + // push 0x698b3a85 + // sub BYTE PTR [esp+ebp*8],cl + // mov cl,0xb7 + test(instructions[48], Operation::STI); + test(instructions[49], DataSize::DWord, Operation::PUSH, Source::Immediate, Source::None, 0x698b3a85); + test(instructions[50], DataSize::Byte, Operation::SUB, Source::eCX, ScaleIndexBase(3, Source::eBP, Source::eSP)); + test(instructions[51], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eCX, 0xb7); + + // cmp ecx,DWORD PTR [ebp+0x2c87445f] + // jecxz 0x00000084 (from 0x82) + // sahf + // je 0x000000f3 (from 0x85) + test(instructions[52], DataSize::DWord, Operation::CMP, ScaleIndexBase(Source::eBP), Source::eCX, 0, 0x2c87445f); + test(instructions[53], Operation::JPCX, 0, 0x02); + test(instructions[54], Operation::SAHF); + test(instructions[55], Operation::JE, 0, 0x6e); + + // sbb ecx,DWORD PTR [edi+0x433c54d] + // lahf + // lods al,BYTE PTR ds:[esi] + // ror cl,0x60 + test(instructions[56], DataSize::DWord, Operation::SBB, ScaleIndexBase(Source::eDI), Source::eCX, 0, 0x433c54d); + test(instructions[57], Operation::LAHF); + test(instructions[58], Operation::LODS); + test(instructions[59], DataSize::Byte, Operation::ROR, Source::Immediate, Source::eCX, 0x60); + + // call 0xe21b:0x97d0f58a + // fs pusha + // mov al,0xcf + // jecxz 0x000000d4 (from 0x9d) + test_far(instructions[60], Operation::CALLfar, 0xe21b, 0x97d0f58a); + test(instructions[61], Operation::PUSHA); + test(instructions[62], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eAX, 0xcf); + test(instructions[63], Operation::JPCX, 0, 0xd4 - 0x9d); +} + +- (void)testSourceModRegRM1 { + const auto instructions = decode({ + 0x62, 0x90, 0x90, 0xdf, 0xcd, 0xf9 + }, true); + + XCTAssertEqual(instructions.size(), 1); + test(instructions[0], DataSize::DWord, Operation::BOUND, ScaleIndexBase(Source::eAX), Source::eDX, 0, -0x6322070); +} + +- (void)testSourceModRegRM2 { + const auto instructions = decode({ + 0x81, 0x47, 0xbe, 0xa9, 0x3a, 0x68, 0x9f + }, true); + + XCTAssertEqual(instructions.size(), 1); + test(instructions[0], DataSize::DWord, Operation::ADD, Source::Immediate, ScaleIndexBase(Source::eDI), 0x9f683aa9, -0x42); +} + +- (void)test8086LengthLimit { + const std::vector all_prefix(65536, 0x26); + const auto instructions = decode(all_prefix); + XCTAssertEqual(instructions.size(), 1); + test(instructions[0], Operation::NOP); +} + +- (void)test286LengthLimit { + const auto instructions = decode({ + 0x90, + 0x26, 0x90, + 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + }); + + XCTAssertEqual(instructions.size(), 12); + test(instructions[0], Operation::NOP); + test(instructions[1], Operation::NOP); + test(instructions[2], Operation::NOP); + test(instructions[3], Operation::NOP); + test(instructions[4], Operation::NOP); + test(instructions[5], Operation::NOP); + test(instructions[6], Operation::NOP); + test(instructions[7], Operation::NOP); + test(instructions[8], Operation::NOP); + test(instructions[9], Operation::NOP); + test(instructions[10], Operation::Invalid); + test(instructions[11], Operation::NOP); +} + + - (void)test386LengthLimit { + const auto instructions = decode({ + 0x90, + 0x26, 0x90, + 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90, + }); + + XCTAssertEqual(instructions.size(), 17); + test(instructions[0], Operation::NOP); + test(instructions[1], Operation::NOP); + test(instructions[2], Operation::NOP); + test(instructions[3], Operation::NOP); + test(instructions[4], Operation::NOP); + test(instructions[5], Operation::NOP); + test(instructions[6], Operation::NOP); + test(instructions[7], Operation::NOP); + test(instructions[8], Operation::NOP); + test(instructions[9], Operation::NOP); + test(instructions[10], Operation::NOP); + test(instructions[11], Operation::NOP); + test(instructions[12], Operation::NOP); + test(instructions[13], Operation::NOP); + test(instructions[14], Operation::NOP); + test(instructions[15], Operation::Invalid); + test(instructions[16], Operation::NOP); +} + +- (void)testAddressSizeModifier { + const auto instructions = decode({ + 0x67, 0xf3, 0x5d, 0x67, 0x3f, 0x67, 0x5a, 0x67, 0xea, 0x17, 0xa2, 0x38, 0x0b, 0xeb, 0xbc, 0x67, + 0x4c, 0x67, 0x3a, 0x1f, 0x67, 0x00, 0x8d, 0xf9, 0x43, 0x67, 0xb1, 0x7c, 0x67, 0x88, 0xd1, 0x67, + 0x31, 0xed, 0x67, 0x22, 0x00, 0x67, 0x79, 0xa7, 0x67, 0x87, 0x3c, 0x67, 0xd4, 0xa2, 0x67, 0x57, + 0x67, 0x02, 0x21, 0x67, 0x48, 0x67, 0x33, 0x5d, 0xd7, 0x67, 0x3c, 0xe1, 0x67, 0x91, 0x67, 0x1b, + 0x84, 0x43, 0x7f, 0x67, 0x15, 0xf6, 0x06, 0x2b, 0x6d + }, true); + + // Lazy: just check that the right number of operations came out. + // Since the potential issue is reading the wrong size of address, that'll do. + XCTAssertEqual(instructions.size(), 22); +} + +- (void)testAddressSizeModifierSIB { + const auto instructions = decode({ + // add dword ptr [bx + si + 256], eax + 0x67, 0x01, 0x80, 0x00, 0x01, + // add [eax + 256], eax + 0x01, 0x80, 0x00, 0x01, 0x00, 0x00 + }, true); + + XCTAssertEqual(instructions.size(), 2); + test(instructions[0], DataSize::DWord, Operation::ADD, Source::eAX, ScaleIndexBase(Source::eBX, Source::eSI), 0, 0x100); + test(instructions[1], DataSize::DWord, Operation::ADD, Source::eAX, ScaleIndexBase(Source::eAX), 0, 0x100); } @end