2021-01-03 00:16:07 +00:00
|
|
|
//
|
|
|
|
// x86.hpp
|
|
|
|
// Clock Signal
|
|
|
|
//
|
|
|
|
// Created by Thomas Harte on 1/1/21.
|
|
|
|
// Copyright © 2021 Thomas Harte. All rights reserved.
|
|
|
|
//
|
|
|
|
|
|
|
|
#ifndef x86_hpp
|
|
|
|
#define x86_hpp
|
|
|
|
|
|
|
|
#include <cstddef>
|
|
|
|
#include <cstdint>
|
|
|
|
|
|
|
|
namespace CPU {
|
|
|
|
namespace Decoder {
|
|
|
|
namespace x86 {
|
|
|
|
|
|
|
|
enum class Model {
|
|
|
|
i8086,
|
|
|
|
};
|
|
|
|
|
|
|
|
enum class Operation: uint8_t {
|
|
|
|
Invalid,
|
|
|
|
|
|
|
|
AAA, AAD, AAM, AAS, ADC, ADD, AND, CALL, CBW, CLC, CLD, CLI, CMC,
|
|
|
|
CMP, CMPS, CWD, DAA, DAS, DEC, DIV, ESC, HLT, IDIV, IMUL, IN,
|
|
|
|
INC, INT, INTO, IRET,
|
|
|
|
JO, JNO,
|
|
|
|
JB, JNB,
|
|
|
|
JE, JNE,
|
|
|
|
JBE, JNBE,
|
|
|
|
JS, JNS,
|
|
|
|
JP, JNP,
|
|
|
|
JL, JNL,
|
|
|
|
JLE, JNLE,
|
|
|
|
JMP, JCXZ,
|
|
|
|
LAHF, LDS, LEA,
|
|
|
|
LODS, LOOPE, LOOPNE, MOV, MOVS, MUL, NEG, NOP, NOT, OR, OUT,
|
2021-01-04 00:37:37 +00:00
|
|
|
POP, POPF, PUSH, PUSHF, RCL, RCR, REP, ROL, ROR, SAHF,
|
2021-01-03 00:16:07 +00:00
|
|
|
SAR, SBB, SCAS, SHL, SHR, STC, STD, STI, STOS, SUB, TEST,
|
2021-01-04 00:37:37 +00:00
|
|
|
WAIT, XCHG, XLAT, XOR,
|
|
|
|
|
|
|
|
RETInter,
|
|
|
|
RETIntra,
|
2021-01-03 00:16:07 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
enum class Size: uint8_t {
|
|
|
|
Implied = 0,
|
|
|
|
Byte = 1,
|
|
|
|
Word = 2,
|
2021-01-03 02:11:19 +00:00
|
|
|
DWord = 4,
|
2021-01-03 00:16:07 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
enum class Source: uint8_t {
|
|
|
|
None,
|
|
|
|
|
|
|
|
AL, AH, AX,
|
|
|
|
BL, BH, BX,
|
|
|
|
CL, CH, CX,
|
|
|
|
DL, DH, DX,
|
|
|
|
|
|
|
|
CS, DS, ES, SS,
|
|
|
|
SI, DI,
|
|
|
|
BP, SP,
|
|
|
|
|
2021-01-03 02:11:19 +00:00
|
|
|
IndBXPlusSI,
|
|
|
|
IndBXPlusDI,
|
|
|
|
IndBPPlusSI,
|
|
|
|
IndBPPlusDI,
|
|
|
|
IndSI,
|
|
|
|
IndDI,
|
|
|
|
DirectAddress,
|
|
|
|
IndBP,
|
|
|
|
IndBX,
|
|
|
|
|
|
|
|
Immediate
|
2021-01-03 00:16:07 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
class Instruction {
|
|
|
|
public:
|
2021-01-03 22:03:50 +00:00
|
|
|
Operation operation = Operation::Invalid;
|
|
|
|
Size operand_size = Size::Byte;
|
2021-01-03 00:16:07 +00:00
|
|
|
|
2021-01-03 22:03:50 +00:00
|
|
|
Source source = Source::AL;
|
|
|
|
Source destination = Source::AL;
|
2021-01-03 00:16:07 +00:00
|
|
|
|
|
|
|
int size() const {
|
|
|
|
return size_;
|
|
|
|
}
|
|
|
|
|
2021-01-03 22:03:50 +00:00
|
|
|
Instruction() {}
|
|
|
|
Instruction(int size) : size_(size) {}
|
|
|
|
Instruction(Operation operation, Size operand_size, Source source, Source destination, int size) :
|
|
|
|
operation(operation), operand_size(operand_size), source(source), destination(destination), size_(size) {}
|
|
|
|
|
2021-01-03 00:16:07 +00:00
|
|
|
private:
|
2021-01-03 22:03:50 +00:00
|
|
|
int size_ = -1;
|
2021-01-03 00:16:07 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
/*!
|
|
|
|
Implements Intel x86 instruction decoding.
|
|
|
|
|
|
|
|
This is an experimental implementation; it has not yet undergone significant testing.
|
|
|
|
*/
|
|
|
|
struct Decoder {
|
|
|
|
public:
|
|
|
|
Decoder(Model model);
|
|
|
|
|
|
|
|
/*!
|
|
|
|
@returns an @c Instruction with a positive size to indicate successful decoding; a
|
2021-01-03 02:19:45 +00:00
|
|
|
negative size specifies the [negatived] number of further bytes the caller should ideally
|
|
|
|
collect before calling again. The caller is free to call with fewer, but may not get a decoded
|
|
|
|
instruction in response, and the decoder may still not be able to complete decoding
|
|
|
|
even if given that number of bytes.
|
2021-01-03 00:16:07 +00:00
|
|
|
*/
|
|
|
|
Instruction decode(uint8_t *source, size_t length);
|
|
|
|
|
|
|
|
private:
|
|
|
|
enum class Phase {
|
2021-01-04 00:33:16 +00:00
|
|
|
/// Captures all prefixes and continues until an instruction byte is encountered.
|
2021-01-03 00:16:07 +00:00
|
|
|
Instruction,
|
2021-01-04 00:33:16 +00:00
|
|
|
/// Receives a ModRM byte and either populates the source_ and dest_ fields appropriately
|
|
|
|
/// or completes decoding of the instruction, as per the instruction format.
|
2021-01-03 00:16:07 +00:00
|
|
|
ModRM,
|
2021-01-04 00:33:16 +00:00
|
|
|
/// Waits for sufficiently many bytes to pass for all associated operands to be captured.
|
2021-01-03 00:16:07 +00:00
|
|
|
AwaitingOperands,
|
2021-01-04 00:33:16 +00:00
|
|
|
/// Forms and returns an Instruction, and resets parsing state.
|
2021-01-03 00:16:07 +00:00
|
|
|
ReadyToPost
|
|
|
|
} phase_ = Phase::Instruction;
|
|
|
|
|
2021-01-04 00:33:16 +00:00
|
|
|
/// During the ModRM phase, format dictates interpretation of the ModRM byte.
|
|
|
|
///
|
|
|
|
/// During the ReadyToPost phase, format determines how transiently-recorded fields
|
|
|
|
/// are packaged into an Instruction.
|
2021-01-03 00:16:07 +00:00
|
|
|
enum class Format: uint8_t {
|
2021-01-03 22:03:50 +00:00
|
|
|
Implied,
|
2021-01-04 00:33:16 +00:00
|
|
|
|
|
|
|
// In both cases: pass the ModRM for mode, register and register/memory
|
|
|
|
// flags and populate the source_ and destination_ fields appropriate.
|
|
|
|
// During the ModRM phase they'll be populated as source_ = register,
|
|
|
|
// destination_ = register/memory; the ReadyToPost phase should switch
|
|
|
|
// those around as necessary.
|
2021-01-03 00:16:07 +00:00
|
|
|
MemReg_Reg,
|
|
|
|
Reg_MemReg,
|
2021-01-04 00:33:16 +00:00
|
|
|
|
|
|
|
Reg_Data,
|
|
|
|
|
|
|
|
//
|
2021-01-03 00:29:43 +00:00
|
|
|
Reg_Addr,
|
2021-01-03 02:11:19 +00:00
|
|
|
Addr_Reg,
|
2021-01-04 00:33:16 +00:00
|
|
|
|
|
|
|
SegReg_MemReg,
|
2021-01-03 02:11:19 +00:00
|
|
|
Disp,
|
|
|
|
Addr
|
2021-01-03 00:16:07 +00:00
|
|
|
} format_ = Format::MemReg_Reg;
|
2021-01-03 02:11:19 +00:00
|
|
|
// TODO: figure out which Formats can be folded together,
|
|
|
|
// and which are improperly elided.
|
|
|
|
|
2021-01-04 00:33:16 +00:00
|
|
|
// Ephemeral decoding state.
|
2021-01-03 00:16:07 +00:00
|
|
|
Operation operation_ = Operation::Invalid;
|
2021-01-03 02:11:19 +00:00
|
|
|
bool large_operand_ = false;
|
2021-01-03 00:16:07 +00:00
|
|
|
Source source_ = Source::None;
|
|
|
|
Source destination_ = Source::None;
|
2021-01-03 00:29:43 +00:00
|
|
|
uint8_t instr_ = 0x00;
|
2021-01-03 02:11:19 +00:00
|
|
|
bool add_offset_ = false;
|
|
|
|
bool large_offset_ = false;
|
2021-01-03 22:28:29 +00:00
|
|
|
|
2021-01-04 00:33:16 +00:00
|
|
|
// Prefix capture fields.
|
|
|
|
enum class Repetition: uint8_t {
|
|
|
|
None, RepE, RepNE
|
|
|
|
} repetition_ = Repetition::None;
|
2021-01-03 22:28:29 +00:00
|
|
|
bool lock_ = false;
|
|
|
|
Source segment_override_ = Source::None;
|
2021-01-04 00:33:16 +00:00
|
|
|
|
|
|
|
// Size capture.
|
|
|
|
int consumed_ = 0;
|
|
|
|
int operand_bytes_ = 0;
|
|
|
|
|
|
|
|
/// Resets size capture and all fields with default values.
|
2021-01-03 22:28:29 +00:00
|
|
|
void reset_parsing() {
|
|
|
|
consumed_ = operand_bytes_ = 0;
|
|
|
|
lock_ = false;
|
|
|
|
segment_override_ = Source::None;
|
2021-01-04 00:33:16 +00:00
|
|
|
repetition_ = Repetition::None;
|
2021-01-03 22:28:29 +00:00
|
|
|
}
|
2021-01-03 00:16:07 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* x86_hpp */
|