From 8e35a56ff7cb0d937c9a8d0c9887fced5ecf823f Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Thu, 26 Oct 2023 23:08:07 -0400 Subject: [PATCH] Include repetition in operation; simplify Instruction constructor. --- InstructionSets/x86/Decoder.cpp | 9 +- .../Implementation/PerformImplementation.hpp | 125 +++++++++++------- InstructionSets/x86/Instruction.cpp | 80 ++++++----- InstructionSets/x86/Instruction.hpp | 82 ++++++------ 4 files changed, 172 insertions(+), 124 deletions(-) diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp index 21d3370ab..442bfc37d 100644 --- a/InstructionSets/x86/Decoder.cpp +++ b/InstructionSets/x86/Decoder.cpp @@ -33,8 +33,7 @@ std::pair::InstructionT> Decoder::decode(con /// Sets the operation and verifies that the current repetition, if any, is compatible, discarding it otherwise. #define SetOperation(op) \ - operation_ = op; \ - repetition_ = supports(op, repetition_) ? repetition_ : Repetition::None + operation_ = rep_operation(op, repetition_); /// Helper macro for those that follow. #define SetOpSrcDestSize(op, src, dest, size) \ @@ -1052,11 +1051,9 @@ std::pair::InstructionT> Decoder::decode(con lock_, address_size_, segment_override_, - repetition_, operation_size_, static_cast(displacement_), - static_cast(operand_), - consumed_ + static_cast(operand_) ) ); reset_parsing(); @@ -1067,7 +1064,7 @@ std::pair::InstructionT> Decoder::decode(con if(consumed_ == max_instruction_length) { std::pair result; if(max_instruction_length == 65536) { - result = std::make_pair(consumed_, InstructionT(Operation::NOP, consumed_)); + result = std::make_pair(consumed_, InstructionT(Operation::NOP)); } else { result = std::make_pair(consumed_, InstructionT()); } diff --git a/InstructionSets/x86/Implementation/PerformImplementation.hpp b/InstructionSets/x86/Implementation/PerformImplementation.hpp index eb76cc5fb..95e17007d 100644 --- a/InstructionSets/x86/Implementation/PerformImplementation.hpp +++ b/InstructionSets/x86/Implementation/PerformImplementation.hpp @@ -1379,17 +1379,17 @@ void pushf(MemoryT &memory, RegistersT ®isters, Status &status) { push(value, memory, registers); } -template -bool repetition_over(const InstructionT &instruction, AddressT &eCX) { - return instruction.repetition() != Repetition::None && !eCX; +template +bool repetition_over(const AddressT &eCX) { + return repetition != Repetition::None && !eCX; } -template -void repeat_ene(const InstructionT &instruction, Status &status, AddressT &eCX, FlowControllerT &flow_controller) { +template +void repeat_ene(Status &status, AddressT &eCX, FlowControllerT &flow_controller) { if( - instruction.repetition() == Repetition::None || // No repetition => stop. + repetition == Repetition::None || // No repetition => stop. !(--eCX) || // [e]cx is zero after being decremented => stop. - (instruction.repetition() == Repetition::RepNE) == status.flag() + (repetition == Repetition::RepNE) == status.flag() // repe and !zero, or repne and zero => stop. ) { return; @@ -1397,20 +1397,20 @@ void repeat_ene(const InstructionT &instruction, Status &status, AddressT &eCX, flow_controller.repeat_last(); } -template -void repeat(const InstructionT &instruction, AddressT &eCX, FlowControllerT &flow_controller) { +template +void repeat(AddressT &eCX, FlowControllerT &flow_controller) { if( - instruction.repetition() == Repetition::None || // No repetition => stop. - !(--eCX) // [e]cx is zero after being decremented => stop. + repetition == Repetition::None || // No repetition => stop. + !(--eCX) // [e]cx is zero after being decremented => stop. ) { return; } flow_controller.repeat_last(); } -template +template void cmps(const InstructionT &instruction, AddressT &eCX, AddressT &eSI, AddressT &eDI, MemoryT &memory, Status &status, FlowControllerT &flow_controller) { - if(repetition_over(instruction, eCX)) { + if(repetition_over(eCX)) { return; } @@ -1424,12 +1424,12 @@ void cmps(const InstructionT &instruction, AddressT &eCX, AddressT &eSI, Address Primitive::sub(lhs, rhs, status); - repeat_ene(instruction, status, eCX, flow_controller); + repeat_ene(status, eCX, flow_controller); } -template -void scas(const InstructionT &instruction, AddressT &eCX, AddressT &eDI, IntT &eAX, MemoryT &memory, Status &status, FlowControllerT &flow_controller) { - if(repetition_over(instruction, eCX)) { +template +void scas(AddressT &eCX, AddressT &eDI, IntT &eAX, MemoryT &memory, Status &status, FlowControllerT &flow_controller) { + if(repetition_over(eCX)) { return; } @@ -1438,12 +1438,12 @@ void scas(const InstructionT &instruction, AddressT &eCX, AddressT &eDI, IntT &e Primitive::sub(eAX, rhs, status); - repeat_ene(instruction, status, eCX, flow_controller); + repeat_ene(status, eCX, flow_controller); } -template +template void lods(const InstructionT &instruction, AddressT &eCX, AddressT &eSI, IntT &eAX, MemoryT &memory, Status &status, FlowControllerT &flow_controller) { - if(repetition_over(instruction, eCX)) { + if(repetition_over(eCX)) { return; } @@ -1453,12 +1453,12 @@ void lods(const InstructionT &instruction, AddressT &eCX, AddressT &eSI, IntT &e eAX = memory.template access(source_segment, eSI); eSI += status.direction() * sizeof(IntT); - repeat(instruction, eCX, flow_controller); + repeat(eCX, flow_controller); } -template +template void movs(const InstructionT &instruction, AddressT &eCX, AddressT &eSI, AddressT &eDI, MemoryT &memory, Status &status, FlowControllerT &flow_controller) { - if(repetition_over(instruction, eCX)) { + if(repetition_over(eCX)) { return; } @@ -1470,24 +1470,24 @@ void movs(const InstructionT &instruction, AddressT &eCX, AddressT &eSI, Address eSI += status.direction() * sizeof(IntT); eDI += status.direction() * sizeof(IntT); - repeat(instruction, eCX, flow_controller); + repeat(eCX, flow_controller); } -template -void stos(const InstructionT &instruction, AddressT &eCX, AddressT &eDI, IntT &eAX, MemoryT &memory, Status &status, FlowControllerT &flow_controller) { - if(repetition_over(instruction, eCX)) { +template +void stos(AddressT &eCX, AddressT &eDI, IntT &eAX, MemoryT &memory, Status &status, FlowControllerT &flow_controller) { + if(repetition_over(eCX)) { return; } memory.template access(Source::ES, eDI) = eAX; eDI += status.direction() * sizeof(IntT); - repeat(instruction, eCX, flow_controller); + repeat(eCX, flow_controller); } -template +template void outs(const InstructionT &instruction, AddressT &eCX, uint16_t port, AddressT &eSI, MemoryT &memory, IOT &io, Status &status, FlowControllerT &flow_controller) { - if(repetition_over(instruction, eCX)) { + if(repetition_over(eCX)) { return; } @@ -1496,19 +1496,19 @@ void outs(const InstructionT &instruction, AddressT &eCX, uint16_t port, Address io.template out(port, memory.template access(source_segment, eSI)); eSI += status.direction() * sizeof(IntT); - repeat(instruction, eCX, flow_controller); + repeat(eCX, flow_controller); } -template -void ins(const InstructionT &instruction, AddressT &eCX, uint16_t port, AddressT &eDI, MemoryT &memory, IOT &io, Status &status, FlowControllerT &flow_controller) { - if(repetition_over(instruction, eCX)) { +template +void ins(AddressT &eCX, uint16_t port, AddressT &eDI, MemoryT &memory, IOT &io, Status &status, FlowControllerT &flow_controller) { + if(repetition_over(eCX)) { return; } memory.template access(Source::ES, eDI) = io.template in(port); eDI += status.direction() * sizeof(IntT); - repeat(instruction, eCX, flow_controller); + repeat(eCX, flow_controller); } template @@ -1774,25 +1774,58 @@ template < case Operation::PUSHF: Primitive::pushf(memory, registers, status); break; case Operation::CMPS: - Primitive::cmps(instruction, eCX(), eSI(), eDI(), memory, status, flow_controller); + Primitive::cmps(instruction, eCX(), eSI(), eDI(), memory, status, flow_controller); break; - case Operation::LODS: - Primitive::lods(instruction, eCX(), eSI(), pair_low(), memory, status, flow_controller); + case Operation::CMPS_REPE: + Primitive::cmps(instruction, eCX(), eSI(), eDI(), memory, status, flow_controller); break; - case Operation::MOVS: - Primitive::movs(instruction, eCX(), eSI(), eDI(), memory, status, flow_controller); - break; - case Operation::STOS: - Primitive::stos(instruction, eCX(), eDI(), pair_low(), memory, status, flow_controller); + case Operation::CMPS_REPNE: + Primitive::cmps(instruction, eCX(), eSI(), eDI(), memory, status, flow_controller); break; + case Operation::SCAS: - Primitive::scas(instruction, eCX(), eDI(), pair_low(), memory, status, flow_controller); + Primitive::scas(eCX(), eDI(), pair_low(), memory, status, flow_controller); break; + case Operation::SCAS_REPE: + Primitive::scas(eCX(), eDI(), pair_low(), memory, status, flow_controller); + break; + case Operation::SCAS_REPNE: + Primitive::scas(eCX(), eDI(), pair_low(), memory, status, flow_controller); + break; + + case Operation::LODS: + Primitive::lods(instruction, eCX(), eSI(), pair_low(), memory, status, flow_controller); + break; + case Operation::LODS_REP: + Primitive::lods(instruction, eCX(), eSI(), pair_low(), memory, status, flow_controller); + break; + + case Operation::MOVS: + Primitive::movs(instruction, eCX(), eSI(), eDI(), memory, status, flow_controller); + break; + case Operation::MOVS_REP: + Primitive::movs(instruction, eCX(), eSI(), eDI(), memory, status, flow_controller); + break; + + case Operation::STOS: + Primitive::stos(eCX(), eDI(), pair_low(), memory, status, flow_controller); + break; + case Operation::STOS_REP: + Primitive::stos(eCX(), eDI(), pair_low(), memory, status, flow_controller); + break; + case Operation::OUTS: - Primitive::outs(instruction, eCX(), registers.dx(), eSI(), memory, io, status, flow_controller); + Primitive::outs(instruction, eCX(), registers.dx(), eSI(), memory, io, status, flow_controller); break; + case Operation::OUTS_REP: + Primitive::outs(instruction, eCX(), registers.dx(), eSI(), memory, io, status, flow_controller); + break; + case Operation::INS: - Primitive::outs(instruction, eCX(), registers.dx(), eDI(), memory, io, status, flow_controller); + Primitive::ins(eCX(), registers.dx(), eDI(), memory, io, status, flow_controller); + break; + case Operation::INS_REP: + Primitive::ins(eCX(), registers.dx(), eDI(), memory, io, status, flow_controller); break; } diff --git a/InstructionSets/x86/Instruction.cpp b/InstructionSets/x86/Instruction.cpp index 91a8b5e31..824c36d96 100644 --- a/InstructionSets/x86/Instruction.cpp +++ b/InstructionSets/x86/Instruction.cpp @@ -160,22 +160,54 @@ std::string InstructionSet::x86::to_string(Operation operation, DataSize size, M constexpr char sizes[][6] = { "cmpsb", "cmpsw", "cmpsd", "?" }; return sizes[static_cast(size)]; } - case Operation::LODS: { - constexpr char sizes[][6] = { "lodsb", "lodsw", "lodsd", "?" }; + case Operation::CMPS_REPE: { + constexpr char sizes[][11] = { "repe cmpsb", "repe cmpsw", "repe cmpsd", "?" }; return sizes[static_cast(size)]; } - case Operation::MOVS: { - constexpr char sizes[][6] = { "movsb", "movsw", "movsd", "?" }; + case Operation::CMPS_REPNE: { + constexpr char sizes[][12] = { "repne cmpsb", "repne cmpsw", "repne cmpsd", "?" }; return sizes[static_cast(size)]; } + case Operation::SCAS: { constexpr char sizes[][6] = { "scasb", "scasw", "scasd", "?" }; return sizes[static_cast(size)]; } + case Operation::SCAS_REPE: { + constexpr char sizes[][11] = { "repe scasb", "repe scasw", "repe scasd", "?" }; + return sizes[static_cast(size)]; + } + case Operation::SCAS_REPNE: { + constexpr char sizes[][12] = { "repne scasb", "repne scasw", "repne scasd", "?" }; + return sizes[static_cast(size)]; + } + + case Operation::LODS: { + constexpr char sizes[][6] = { "lodsb", "lodsw", "lodsd", "?" }; + return sizes[static_cast(size)]; + } + case Operation::LODS_REP: { + constexpr char sizes[][10] = { "rep lodsb", "rep lodsw", "rep lodsd", "?" }; + return sizes[static_cast(size)]; + } + + case Operation::MOVS: { + constexpr char sizes[][6] = { "movsb", "movsw", "movsd", "?" }; + return sizes[static_cast(size)]; + } + case Operation::MOVS_REP: { + constexpr char sizes[][10] = { "rep movsb", "rep movsw", "rep movsd", "?" }; + return sizes[static_cast(size)]; + } + case Operation::STOS: { constexpr char sizes[][6] = { "stosb", "stosw", "stosd", "?" }; return sizes[static_cast(size)]; } + case Operation::STOS_REP: { + constexpr char sizes[][10] = { "rep stosb", "rep stosw", "rep stosd", "?" }; + return sizes[static_cast(size)]; + } case Operation::LOOP: return "loop"; case Operation::LOOPE: return "loope"; @@ -445,10 +477,21 @@ std::string InstructionSet::x86::to_string( default: break; case Operation::CMPS: + case Operation::CMPS_REPE: + case Operation::CMPS_REPNE: case Operation::SCAS: + case Operation::SCAS_REPE: + case Operation::SCAS_REPNE: case Operation::STOS: + case Operation::STOS_REP: case Operation::LODS: + case Operation::LODS_REP: case Operation::MOVS: + case Operation::MOVS_REP: + case Operation::INS: + case Operation::INS_REP: + case Operation::OUTS: + case Operation::OUTS_REP: switch(instruction.second.segment_override()) { default: break; case Source::ES: operation += "es "; break; @@ -461,35 +504,6 @@ std::string InstructionSet::x86::to_string( break; } - // Add a repetition prefix; it'll be one of 'rep', 'repe' or 'repne'. - switch(instruction.second.repetition()) { - case Repetition::None: break; - case Repetition::RepE: - switch(instruction.second.operation) { - case Operation::CMPS: - case Operation::SCAS: - operation += "repe "; - break; - - default: - operation += "rep "; - break; - } - break; - case Repetition::RepNE: - switch(instruction.second.operation) { - case Operation::CMPS: - case Operation::SCAS: - operation += "repne "; - break; - - default: - operation += "rep "; - break; - } - break; - } - // Add operation itself. operation += to_string(instruction.second.operation, instruction.second.operation_size(), model); operation += " "; diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp index 7a95dd540..bcd186a88 100644 --- a/InstructionSets/x86/Instruction.hpp +++ b/InstructionSets/x86/Instruction.hpp @@ -128,16 +128,23 @@ enum class Operation: uint8_t { /// Computes the effective address of the source and loads it into the destination. LEA, - /// Compare [bytes or words, per operation size]; source and destination implied to be DS:[SI] and ES:[DI]. - CMPS, - /// Load string; reads from DS:SI into AL or AX, subject to segment override. - LODS, /// Move string; moves a byte or word from DS:SI to ES:DI. If a segment override is provided, it overrides the the source. MOVS, - /// Scan string; reads a byte or word from DS:SI and compares it to AL or AX. - SCAS, + MOVS_REP, + /// Load string; reads from DS:SI into AL or AX, subject to segment override. + LODS, + LODS_REP, /// Store string; store AL or AX to ES:DI. STOS, + STOS_REP, + /// Compare [bytes or words, per operation size]; source and destination implied to be DS:[SI] and ES:[DI]. + CMPS, + CMPS_REPE, + CMPS_REPNE, + /// Scan string; reads a byte or word from DS:SI and compares it to AL or AX. + SCAS, + SCAS_REPE, + SCAS_REPNE, // Perform a possibly-conditional loop, decrementing CX. See the displacement. LOOP, LOOPE, LOOPNE, @@ -246,9 +253,11 @@ enum class Operation: uint8_t { /// ES:[e]DI and incrementing or decrementing [e]DI as per the /// current EFLAGS DF flag. INS, + INS_REP, /// Outputs a byte, word or double word from ES:[e]DI to the port specified by DX, /// incrementing or decrementing [e]DI as per the current EFLAGS DF flag. OUTS, + OUTS_REP, /// Pushes all general purpose registers to the stack, in the order: /// AX, CX, DX, BX, [original] SP, BP, SI, DI. @@ -465,31 +474,39 @@ enum class Repetition: uint8_t { }; /// @returns @c true if @c operation supports repetition mode @c repetition; @c false otherwise. -constexpr bool supports(Operation operation, [[maybe_unused]] Repetition repetition) { +constexpr Operation rep_operation(Operation operation, Repetition repetition) { switch(operation) { - default: return false; + default: return operation; - case Operation::Invalid: // Retain context here; it's used as an intermediate - // state sometimes. case Operation::INS: + return repetition != Repetition::None ? Operation::INS_REP : Operation::INS; case Operation::OUTS: - case Operation::CMPS: + return repetition != Repetition::None ? Operation::OUTS_REP : Operation::OUTS; case Operation::LODS: + return repetition != Repetition::None ? Operation::LODS_REP : Operation::LODS; case Operation::MOVS: - case Operation::SCAS: + return repetition != Repetition::None ? Operation::MOVS_REP : Operation::MOVS; case Operation::STOS: - return true; + return repetition != Repetition::None ? Operation::STOS_REP : Operation::STOS; - // TODO: my new understanding is that the 8086 and 8088 recognise rep and repne on - // IDIV — and possibly DIV — as a quirk, affecting the outcome (possibly negativing the result?). - // So the test below should be a function of model, if I come to a conclusion about whether I'm - // going for fidelity to the instruction set as generally implemented, or to Intel's specific implementation. -// case Operation::IDIV: -// return repetition == Repetition::RepNE; + case Operation::CMPS: + switch(repetition) { + default: + case Repetition::None: return Operation::CMPS; + case Repetition::RepE: return Operation::CMPS_REPE; + case Repetition::RepNE: return Operation::CMPS_REPNE; + } + + case Operation::SCAS: + switch(repetition) { + default: + case Repetition::None: return Operation::SCAS; + case Repetition::RepE: return Operation::SCAS_REPE; + case Repetition::RepNE: return Operation::SCAS_REPNE; + } } } - /// Provides a 32-bit-style scale, index and base; to produce the address this represents, /// calcluate base() + (index() << scale()). /// @@ -790,11 +807,6 @@ template class Instruction { ); } - Repetition repetition() const { - if(!has_length_extension()) return Repetition::None; - return Repetition((length_extension() >> 4) & 3); - } - /// @returns The data size of this operation — e.g. `MOV AX, BX` has a data size of `::Word` but `MOV EAX, EBX` has a data size of /// `::DWord`. This value is guaranteed never to be `DataSize::None` even for operations such as `CLI` that don't have operands and operate /// on data that is not a byte, word or double word. @@ -802,12 +814,6 @@ template class Instruction { return DataSize(source_data_dest_sib_ >> 14); } -// int length() const { -// const int short_length = (source_data_dest_sib_ >> 10) & 15; -// if(short_length) return short_length; -// return length_extension() >> 6; -// } - ImmediateT operand() const { const ImmediateT ops[] = {0, operand_extension()}; return ops[has_operand()]; @@ -825,8 +831,8 @@ template class Instruction { } constexpr Instruction() noexcept {} - constexpr Instruction(Operation operation, int length) noexcept : - Instruction(operation, Source::None, Source::None, ScaleIndexBase(), false, AddressSize::b16, Source::None, Repetition::None, DataSize::None, 0, 0, length) {} + constexpr Instruction(Operation operation) noexcept : + Instruction(operation, Source::None, Source::None, ScaleIndexBase(), false, AddressSize::b16, Source::None, DataSize::None, 0, 0) {} constexpr Instruction( Operation operation, Source source, @@ -835,11 +841,9 @@ template class Instruction { bool lock, AddressSize address_size, Source segment_override, - Repetition repetition, DataSize data_size, DisplacementT displacement, - ImmediateT operand, - int length) noexcept : + ImmediateT operand) noexcept : operation(operation), mem_exts_source_(uint8_t( (int(address_size) << 7) | @@ -851,8 +855,8 @@ template class Instruction { source_data_dest_sib_(uint16_t( (int(data_size) << 14) | (( - (lock || (segment_override != Source::None) || (length > 15) || (repetition != Repetition::None)) - ) ? 0 : (length << 10)) | + (lock || (segment_override != Source::None)) + ) ? 0 : (1 << 10)) | ((uint8_t(sib) & 0xf8) << 2) | int(destination) | (destination == Source::Indirect ? (uint8_t(sib) & 7) : 0) @@ -871,7 +875,7 @@ template class Instruction { } if(has_length_extension()) { extensions_[extension] = ImmediateT( - (length << 6) | (int(repetition) << 4) | ((int(segment_override) & 7) << 1) | int(lock) + ((int(segment_override) & 7) << 1) | int(lock) ); ++extension; }