From 11b6c1d4b58048b5ee52eef3d6418e0179f16f0e Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Sun, 3 Jan 2021 17:03:50 -0500 Subject: [PATCH] Proceeds to three instructions correctly decoded. 'Wow'. --- .../Mac/Clock SignalTests/x86DecoderTests.mm | 72 +++++++++++++++++++ Processors/Decoders/x86/x86.cpp | 52 ++++++++++++-- Processors/Decoders/x86/x86.hpp | 18 +++-- 3 files changed, 131 insertions(+), 11 deletions(-) diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm index 4dab3defb..9e7857364 100644 --- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm +++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm @@ -61,6 +61,78 @@ namespace { 0xb8, 0xad, 0xe8, 0xc8, 0x16, 0x4a, 0xb0, 0x9e, 0xf9, 0xbf, 0x56, 0xea, 0x4e, 0xfd, 0xe4, 0x5a, 0x23, 0xaa, 0x2c, 0x5b, 0x2a, 0xd2, 0xf7, 0x5f, 0x18, 0x86, 0x90, 0x25, 0x64, 0xb7, 0xc3 }]; + + // 68 instructions are expected. + XCTAssertEqual(instructions.size(), 68); + + // sub $0xea77,%ax + // jb 0x00000001 + // dec %bx + // mov $0x28,%ch + // ret + // lret $0x4826 + // gs insw (%dx),%es:(%di) + // jnp 0xffffffaf + // ret $0x4265 + // dec %si + // out %ax,(%dx) + // jo 0x00000037 + // xchg %ax,%sp + // (bad) + // aam $0x93 + // inc %bx + // cmp $0x8e,%al + // push $0x65 + // sbb 0x45(%bx,%si),%bh + // adc %bh,0x3c(%bx) + // sbb %bx,0x16(%bp,%si) + // xor %sp,0x2c(%si) + // out %ax,$0xc6 + // jge 0xffffffe0 + // mov $0x49,%ch + // addr32 popa + // mov $0xcbc0,%dx + // adc $0x7e,%al + // jno 0x0000000b + // push %ax + // js 0x0000007b + // add (%di),%bx + // in $0xc9,%ax + // xchg %ax,%di + // ret + // fwait + // out %al,$0xd3 + // insb (%dx),%es:(%di) + // pop %ax + // dec %bp + // jbe 0xffffffcc + // inc %sp + // (bad) + // lahf + // movsw %ds:(%si),%es:(%di) + // mov $0x12a1,%bp + // lds (%bx,%di),%bp + // leave + // sahf + // fdiv %st(3),%st + // iret + // xchg %ax,%dx + // cmp %bx,-0x70(%di) + // adc $0xb8c3,%ax + // lods %ds:(%si),%ax + // call 0x0000172d + // dec %dx + // mov $0x9e,%al + // stc + // mov $0xea56,%di + // dec %si + // std + // in $0x5a,%al + // and 0x5b2c(%bp,%si),%bp + // sub %dl,%dl + // negw 0x18(%bx) + // xchg %dl,0x6425(%bx,%si) + // mov $0xc3,%bh } @end diff --git a/Processors/Decoders/x86/x86.cpp b/Processors/Decoders/x86/x86.cpp index 8e9833235..55f25ed96 100644 --- a/Processors/Decoders/x86/x86.cpp +++ b/Processors/Decoders/x86/x86.cpp @@ -8,6 +8,7 @@ #include "x86.hpp" +#include #include using namespace CPU::Decoder::x86; @@ -16,7 +17,8 @@ using namespace CPU::Decoder::x86; Decoder::Decoder(Model) {} Instruction Decoder::decode(uint8_t *source, size_t length) { - uint8_t *const limit = source + length; + uint8_t *const begin = source; + uint8_t *const end = source + length; #define MapPartial(value, op, lrg, fmt, phs) \ case value: \ @@ -31,10 +33,11 @@ Instruction Decoder::decode(uint8_t *source, size_t length) { operation_ = Operation::op; \ source_ = Source::src; \ destination_ = Source::dest; \ + format_ = Format::Implied; \ phase_ = Phase::ReadyToPost; \ break - while(phase_ == Phase::Instruction && source != limit) { + while(phase_ == Phase::Instruction && source != end) { // Retain the instruction byte, in case additional decoding is deferred // to the ModRM byte. instr_ = *source; @@ -169,7 +172,7 @@ Instruction Decoder::decode(uint8_t *source, size_t length) { #undef MapInstr - if(phase_ == Phase::ModRM && source != limit) { + if(phase_ == Phase::ModRM && source != end) { const uint8_t mod = *source >> 6; // i.e. mode. const uint8_t reg = (*source >> 3) & 7; // i.e. register. const uint8_t rm = *source & 7; // i.e. register/memory. @@ -234,12 +237,51 @@ Instruction Decoder::decode(uint8_t *source, size_t length) { ++consumed_; } - if(phase_ == Phase::AwaitingOperands && source != limit) { + if(phase_ == Phase::AwaitingOperands && source != end) { // TODO: calculate number of expected operands. + const int required_bytes = large_operand_ ? 2 : 1; + + const int outstanding_bytes = required_bytes - operand_bytes_; + const int bytes_to_consume = std::min(int(end - source), outstanding_bytes); + source += bytes_to_consume; + consumed_ += bytes_to_consume; + operand_bytes_ += bytes_to_consume; + if(bytes_to_consume == outstanding_bytes) { + phase_ = Phase::ReadyToPost; + } else { + // Provide a genuine measure of further bytes required. + return Instruction(-(outstanding_bytes - bytes_to_consume)); + } } if(phase_ == Phase::ReadyToPost) { - // TODO: construct actual Instruction. + Instruction result; + switch(format_) { + case Format::Ac_Data: + if(large_operand_) { + result = Instruction(operation_, Size::Word, Source::AX, Source::Immediate, consumed_); + } else { + result = Instruction(operation_, Size::Byte, Source::AL, Source::Immediate, consumed_); + } + break; + + case Format::Disp: + result = Instruction(operation_, Size::Byte, Source::Immediate, Source::None, consumed_); + break; + + case Format::Implied: + result = Instruction(operation_, large_operand_ ? Size::Word : Size::Byte, source_, destination_, consumed_); + break; + + default: break; + } + + // Reset parser. + consumed_ = operand_bytes_ = 0; + lock_ = add_offset_ = large_offset_ = false; + phase_ = Phase::Instruction; + + return result; } return Instruction(); diff --git a/Processors/Decoders/x86/x86.hpp b/Processors/Decoders/x86/x86.hpp index 4ddbe0d1a..3c796d5b8 100644 --- a/Processors/Decoders/x86/x86.hpp +++ b/Processors/Decoders/x86/x86.hpp @@ -76,18 +76,23 @@ enum class Source: uint8_t { class Instruction { public: - const Operation operation = Operation::Invalid; - const Size operand_size = Size::Byte; + Operation operation = Operation::Invalid; + Size operand_size = Size::Byte; - const Source source = Source::AL; - const Source destination = Source::AL; + Source source = Source::AL; + Source destination = Source::AL; int size() const { return size_; } + Instruction() {} + Instruction(int size) : size_(size) {} + Instruction(Operation operation, Size operand_size, Source source, Source destination, int size) : + operation(operation), operand_size(operand_size), source(source), destination(destination), size_(size) {} + private: - int size_ = 0; + int size_ = -1; }; /*! @@ -117,6 +122,7 @@ struct Decoder { } phase_ = Phase::Instruction; enum class Format: uint8_t { + Implied, MemReg_Reg, Reg_MemReg, Ac_Data, @@ -134,8 +140,8 @@ struct Decoder { None, RepE, RepNE } repetition_ = Repetition::None; - int consumed_ = 0; + int operand_bytes_ = 0; Operation operation_ = Operation::Invalid; bool large_operand_ = false; Source source_ = Source::None;