2021-01-03 00:16:07 +00:00
|
|
|
|
//
|
|
|
|
|
// x86.hpp
|
|
|
|
|
// Clock Signal
|
|
|
|
|
//
|
|
|
|
|
// Created by Thomas Harte on 1/1/21.
|
|
|
|
|
// Copyright © 2021 Thomas Harte. All rights reserved.
|
|
|
|
|
//
|
|
|
|
|
|
|
|
|
|
#ifndef x86_hpp
|
|
|
|
|
#define x86_hpp
|
|
|
|
|
|
|
|
|
|
#include <cstddef>
|
|
|
|
|
#include <cstdint>
|
|
|
|
|
|
|
|
|
|
namespace CPU {
|
|
|
|
|
namespace Decoder {
|
|
|
|
|
namespace x86 {
|
|
|
|
|
|
|
|
|
|
enum class Model {
|
|
|
|
|
i8086,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum class Operation: uint8_t {
|
|
|
|
|
Invalid,
|
|
|
|
|
|
2021-01-08 02:30:01 +00:00
|
|
|
|
/// ASCII adjust after addition; source will be AL and destination will be AX.
|
|
|
|
|
AAA,
|
|
|
|
|
/// ASCII adjust before division; destination will be AX and source will be a multiplier.
|
|
|
|
|
AAD,
|
|
|
|
|
/// ASCII adjust after multiplication; destination will be AX and source will be a divider.
|
|
|
|
|
AAM,
|
|
|
|
|
/// ASCII adjust after subtraction; source will be AL and destination will be AX.
|
|
|
|
|
AAS,
|
|
|
|
|
/// Add with carry; source, destination, operand and displacement will be populated appropriately.
|
|
|
|
|
ADC,
|
|
|
|
|
/// Add; source, destination, operand and displacement will be populated appropriately.
|
|
|
|
|
ADD,
|
|
|
|
|
/// And; source, destination, operand and displacement will be populated appropriately.
|
|
|
|
|
AND,
|
|
|
|
|
/// Far call; followed by a 32-bit operand.
|
|
|
|
|
CALLF,
|
|
|
|
|
/// Displacement call; followed by a 16-bit operand providing a call offset.
|
|
|
|
|
CALLD,
|
2021-01-08 02:59:00 +00:00
|
|
|
|
CALLN,
|
2021-01-08 02:30:01 +00:00
|
|
|
|
/// Convert byte into word; source will be AL, destination will be AH.
|
|
|
|
|
CBW,
|
|
|
|
|
/// Clear carry flag; no source or destination provided.
|
|
|
|
|
CLC,
|
|
|
|
|
/// Clear direction flag; no source or destination provided.
|
|
|
|
|
CLD,
|
|
|
|
|
/// Clear interrupt flag; no source or destination provided.
|
|
|
|
|
CLI,
|
|
|
|
|
/// Complement carry flag; no source or destination provided.
|
|
|
|
|
CMC,
|
|
|
|
|
/// Compare; source, destination, operand and displacement will be populated appropriately.
|
|
|
|
|
CMP,
|
|
|
|
|
/// Compare [bytes or words, per operation size]; source and destination implied to be DS:[SI] and ES:[DI].
|
|
|
|
|
CMPS,
|
|
|
|
|
/// Convert word to double word; source will be AX and destination will be DX.
|
|
|
|
|
CWD,
|
|
|
|
|
/// Decimal adjust after addition; source and destination will be AL.
|
|
|
|
|
DAA,
|
|
|
|
|
/// Decimal adjust after subtraction; source and destination will be AL.
|
|
|
|
|
DAS,
|
|
|
|
|
/// Dec; source, destination, operand and displacement will be populated appropriately.
|
|
|
|
|
DEC,
|
|
|
|
|
DIV, ESC, HLT, IDIV, IMUL, IN,
|
2021-01-06 02:47:12 +00:00
|
|
|
|
INC, INT, INT3, INTO, IRET,
|
2021-01-08 02:30:01 +00:00
|
|
|
|
JO, JNO, JB, JNB, JE, JNE, JBE, JNBE,
|
|
|
|
|
JS, JNS, JP, JNP, JL, JNL, JLE, JNLE,
|
2021-01-08 02:59:00 +00:00
|
|
|
|
JMPN,
|
|
|
|
|
JMPF,
|
|
|
|
|
JCXZ,
|
2021-01-03 00:16:07 +00:00
|
|
|
|
LAHF, LDS, LEA,
|
|
|
|
|
LODS, LOOPE, LOOPNE, MOV, MOVS, MUL, NEG, NOP, NOT, OR, OUT,
|
2021-01-04 00:37:37 +00:00
|
|
|
|
POP, POPF, PUSH, PUSHF, RCL, RCR, REP, ROL, ROR, SAHF,
|
2021-01-08 02:30:01 +00:00
|
|
|
|
SAR, SBB, SCAS, SAL, SHR, STC, STD, STI, STOS, SUB, TEST,
|
2021-01-04 00:37:37 +00:00
|
|
|
|
WAIT, XCHG, XLAT, XOR,
|
2021-01-06 02:47:12 +00:00
|
|
|
|
LES, LOOP, JPCX,
|
2021-01-04 00:37:37 +00:00
|
|
|
|
|
2021-01-07 02:18:24 +00:00
|
|
|
|
RETF,
|
|
|
|
|
RETN,
|
2021-01-03 00:16:07 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum class Size: uint8_t {
|
|
|
|
|
Implied = 0,
|
|
|
|
|
Byte = 1,
|
|
|
|
|
Word = 2,
|
2021-01-03 02:11:19 +00:00
|
|
|
|
DWord = 4,
|
2021-01-03 00:16:07 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
enum class Source: uint8_t {
|
|
|
|
|
None,
|
2021-01-09 02:33:01 +00:00
|
|
|
|
CS, DS, ES, SS,
|
2021-01-03 00:16:07 +00:00
|
|
|
|
|
|
|
|
|
AL, AH, AX,
|
|
|
|
|
BL, BH, BX,
|
|
|
|
|
CL, CH, CX,
|
|
|
|
|
DL, DH, DX,
|
|
|
|
|
|
|
|
|
|
SI, DI,
|
|
|
|
|
BP, SP,
|
|
|
|
|
|
2021-01-03 02:11:19 +00:00
|
|
|
|
IndBXPlusSI,
|
|
|
|
|
IndBXPlusDI,
|
|
|
|
|
IndBPPlusSI,
|
|
|
|
|
IndBPPlusDI,
|
|
|
|
|
IndSI,
|
|
|
|
|
IndDI,
|
|
|
|
|
DirectAddress,
|
|
|
|
|
IndBP,
|
|
|
|
|
IndBX,
|
|
|
|
|
|
|
|
|
|
Immediate
|
2021-01-03 00:16:07 +00:00
|
|
|
|
};
|
|
|
|
|
|
2021-01-06 02:25:12 +00:00
|
|
|
|
enum class Repetition: uint8_t {
|
|
|
|
|
None, RepE, RepNE
|
|
|
|
|
};
|
|
|
|
|
|
2021-01-03 00:16:07 +00:00
|
|
|
|
class Instruction {
|
|
|
|
|
public:
|
2021-01-03 22:03:50 +00:00
|
|
|
|
Operation operation = Operation::Invalid;
|
2021-01-03 00:16:07 +00:00
|
|
|
|
|
2021-01-14 02:51:18 +00:00
|
|
|
|
bool operator ==(const Instruction &rhs) const {
|
|
|
|
|
return
|
|
|
|
|
repetition_size_ == rhs.repetition_size_ &&
|
|
|
|
|
sources_ == rhs.sources_ &&
|
|
|
|
|
displacement_ == rhs.displacement_ &&
|
|
|
|
|
operand_ == rhs.operand_;
|
|
|
|
|
}
|
|
|
|
|
|
2021-01-09 02:33:01 +00:00
|
|
|
|
private:
|
|
|
|
|
// b0, b1: a Repetition;
|
2021-01-09 03:22:07 +00:00
|
|
|
|
// b2+: operation size.
|
2021-01-09 02:33:01 +00:00
|
|
|
|
uint8_t repetition_size_ = 0;
|
2021-01-03 00:16:07 +00:00
|
|
|
|
|
2021-01-09 02:33:01 +00:00
|
|
|
|
// b0–b5: source;
|
2021-01-09 03:22:07 +00:00
|
|
|
|
// b6–b11: destination;
|
2021-01-09 02:33:01 +00:00
|
|
|
|
// b12–b14: segment override;
|
|
|
|
|
// b15: lock.
|
|
|
|
|
uint16_t sources_ = 0;
|
2021-01-03 00:16:07 +00:00
|
|
|
|
|
2021-01-09 02:33:01 +00:00
|
|
|
|
// Unpackable fields.
|
2021-01-09 03:38:56 +00:00
|
|
|
|
int16_t displacement_ = 0;
|
2021-01-09 03:22:07 +00:00
|
|
|
|
uint16_t operand_ = 0; // ... or used to store a segment for far operations.
|
2021-01-06 02:25:12 +00:00
|
|
|
|
|
2021-01-09 02:33:01 +00:00
|
|
|
|
public:
|
2021-01-09 03:22:07 +00:00
|
|
|
|
Source source() const { return Source(sources_ & 0x3f); }
|
|
|
|
|
Source destination() const { return Source((sources_ >> 6) & 0x3f); }
|
|
|
|
|
bool lock() const { return sources_ & 0x8000; }
|
|
|
|
|
Source segment_override() const { return Source((sources_ >> 12) & 7); }
|
2021-01-09 02:33:01 +00:00
|
|
|
|
|
|
|
|
|
Repetition repetition() const { return Repetition(repetition_size_ & 3); }
|
2021-01-09 03:22:07 +00:00
|
|
|
|
Size operation_size() const { return Size(repetition_size_ >> 2); }
|
|
|
|
|
|
|
|
|
|
uint16_t segment() const { return uint16_t(operand_); }
|
2021-01-09 02:33:01 +00:00
|
|
|
|
|
2021-01-09 03:38:56 +00:00
|
|
|
|
int16_t displacement() const { return displacement_; }
|
|
|
|
|
uint16_t operand() const { return operand_; }
|
2021-01-09 02:33:01 +00:00
|
|
|
|
|
2021-01-09 03:38:56 +00:00
|
|
|
|
Instruction() noexcept {}
|
2021-01-09 03:22:07 +00:00
|
|
|
|
Instruction(
|
|
|
|
|
Operation operation,
|
|
|
|
|
Source source,
|
|
|
|
|
Source destination,
|
|
|
|
|
bool lock,
|
|
|
|
|
Source segment_override,
|
|
|
|
|
Repetition repetition,
|
|
|
|
|
Size operation_size,
|
2021-01-09 03:38:56 +00:00
|
|
|
|
int16_t displacement,
|
|
|
|
|
uint16_t operand) noexcept :
|
2021-01-09 03:22:07 +00:00
|
|
|
|
operation(operation),
|
|
|
|
|
repetition_size_(uint8_t((int(operation_size) << 2) | int(repetition))),
|
|
|
|
|
sources_(uint16_t(
|
|
|
|
|
int(source) |
|
|
|
|
|
(int(destination) << 6) |
|
|
|
|
|
(int(segment_override) << 12) |
|
|
|
|
|
(int(lock) << 15)
|
|
|
|
|
)),
|
|
|
|
|
displacement_(displacement),
|
|
|
|
|
operand_(operand) {}
|
2021-01-03 00:16:07 +00:00
|
|
|
|
};
|
|
|
|
|
|
2021-01-09 03:22:07 +00:00
|
|
|
|
static_assert(sizeof(Instruction) <= 8);
|
|
|
|
|
|
2021-01-03 00:16:07 +00:00
|
|
|
|
/*!
|
|
|
|
|
Implements Intel x86 instruction decoding.
|
|
|
|
|
|
|
|
|
|
This is an experimental implementation; it has not yet undergone significant testing.
|
|
|
|
|
*/
|
|
|
|
|
struct Decoder {
|
|
|
|
|
public:
|
|
|
|
|
Decoder(Model model);
|
|
|
|
|
|
|
|
|
|
/*!
|
2021-01-09 03:22:07 +00:00
|
|
|
|
@returns an @c Instruction plus a size; a positive size to indicate successful decoding; a
|
2021-01-03 02:19:45 +00:00
|
|
|
|
negative size specifies the [negatived] number of further bytes the caller should ideally
|
|
|
|
|
collect before calling again. The caller is free to call with fewer, but may not get a decoded
|
|
|
|
|
instruction in response, and the decoder may still not be able to complete decoding
|
|
|
|
|
even if given that number of bytes.
|
2021-01-03 00:16:07 +00:00
|
|
|
|
*/
|
2021-01-09 03:22:07 +00:00
|
|
|
|
std::pair<int, Instruction> decode(const uint8_t *source, size_t length);
|
2021-01-03 00:16:07 +00:00
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
enum class Phase {
|
2021-01-04 00:33:16 +00:00
|
|
|
|
/// Captures all prefixes and continues until an instruction byte is encountered.
|
2021-01-03 00:16:07 +00:00
|
|
|
|
Instruction,
|
2021-01-06 02:25:12 +00:00
|
|
|
|
/// Receives a ModRegRM byte and either populates the source_ and dest_ fields appropriately
|
2021-01-04 00:33:16 +00:00
|
|
|
|
/// or completes decoding of the instruction, as per the instruction format.
|
2021-01-06 02:25:12 +00:00
|
|
|
|
ModRegRM,
|
|
|
|
|
/// Waits for sufficiently many bytes to pass for the required displacement and operand to be captured.
|
|
|
|
|
/// Cf. displacement_size_ and operand_size_.
|
|
|
|
|
AwaitingDisplacementOrOperand,
|
2021-01-04 00:33:16 +00:00
|
|
|
|
/// Forms and returns an Instruction, and resets parsing state.
|
2021-01-03 00:16:07 +00:00
|
|
|
|
ReadyToPost
|
|
|
|
|
} phase_ = Phase::Instruction;
|
|
|
|
|
|
2021-01-06 02:25:12 +00:00
|
|
|
|
/// During the ModRegRM phase, format dictates interpretation of the ModRegRM byte.
|
2021-01-04 00:33:16 +00:00
|
|
|
|
///
|
|
|
|
|
/// During the ReadyToPost phase, format determines how transiently-recorded fields
|
|
|
|
|
/// are packaged into an Instruction.
|
2021-01-06 02:25:12 +00:00
|
|
|
|
enum class ModRegRMFormat: uint8_t {
|
2021-01-06 02:34:35 +00:00
|
|
|
|
// Parse the ModRegRM for mode, register and register/memory fields
|
2021-01-06 02:25:12 +00:00
|
|
|
|
// and populate the source_ and destination_ fields appropriate.
|
2021-01-03 00:16:07 +00:00
|
|
|
|
MemReg_Reg,
|
|
|
|
|
Reg_MemReg,
|
2021-01-04 00:33:16 +00:00
|
|
|
|
|
2021-01-06 02:34:35 +00:00
|
|
|
|
// Parse for mode and register/memory fields, populating both
|
|
|
|
|
// source_ and destination_ fields with the result. Use the 'register'
|
|
|
|
|
// field to pick an operation from the TEST/NOT/NEG/MUL/IMUL/DIV/IDIV group.
|
|
|
|
|
MemRegTEST_to_IDIV,
|
|
|
|
|
|
2021-01-07 02:18:24 +00:00
|
|
|
|
// Parse for mode and register/memory fields, populating both
|
|
|
|
|
// source_ and destination_ fields with the result. Use the 'register'
|
|
|
|
|
// field to check for the POP operation.
|
|
|
|
|
MemRegPOP,
|
|
|
|
|
|
|
|
|
|
// Parse for mode and register/memory fields, populating both
|
|
|
|
|
// the destination_ field with the result and setting source_ to Immediate.
|
|
|
|
|
// Use the 'register' field to check for the MOV operation.
|
|
|
|
|
MemRegMOV,
|
|
|
|
|
|
2021-01-08 02:30:01 +00:00
|
|
|
|
// Parse for mode and register/memory fields, populating the
|
|
|
|
|
// destination_ field with the result. Use the 'register' field
|
|
|
|
|
// to pick an operation from the ROL/ROR/RCL/RCR/SAL/SHR/SAR group.
|
|
|
|
|
MemRegROL_to_SAR,
|
|
|
|
|
|
2021-01-09 03:50:59 +00:00
|
|
|
|
// Parse for mode and register/memory fields, populating the
|
|
|
|
|
// destination_ field with the result. Use the 'register' field
|
|
|
|
|
// to pick an operation from the ADD/OR/ADC/SBB/AND/SUB/XOR/CMP group and
|
|
|
|
|
// waits for an operand equal to the operation size.
|
|
|
|
|
MemRegADD_to_CMP,
|
|
|
|
|
|
2021-01-08 02:36:05 +00:00
|
|
|
|
// Parse for mode and register/memory fields, populating the
|
|
|
|
|
// source_ field with the result. Fills destination_ with a segment
|
|
|
|
|
// register based on the reg field.
|
|
|
|
|
SegReg,
|
|
|
|
|
|
2021-01-08 02:59:00 +00:00
|
|
|
|
// Parse for mode and register/memory fields, populating the
|
|
|
|
|
// source_ and destination_ fields with the result. Uses the
|
|
|
|
|
// 'register' field to pick INC or DEC.
|
|
|
|
|
MemRegINC_DEC,
|
|
|
|
|
|
|
|
|
|
// Parse for mode and register/memory fields, populating the
|
|
|
|
|
// source_ and destination_ fields with the result. Uses the
|
|
|
|
|
// 'register' field to pick from INC/DEC/CALL/JMP/PUSH, altering
|
|
|
|
|
// the source to ::Immediate and setting an operand size if necessary.
|
|
|
|
|
MemRegINC_to_PUSH,
|
2021-01-14 01:29:44 +00:00
|
|
|
|
|
|
|
|
|
// Parse for mode and register/memory fields, populating the
|
|
|
|
|
// source_ and destination_ fields with the result. Uses the
|
|
|
|
|
// 'register' field to pick from ADD/ADC/SBB/SUB/CMP, altering
|
|
|
|
|
// the source to ::Immediate and setting an appropriate operand size.
|
|
|
|
|
MemRegADC_to_CMP,
|
2021-01-06 02:25:12 +00:00
|
|
|
|
} modregrm_format_ = ModRegRMFormat::MemReg_Reg;
|
2021-01-03 02:11:19 +00:00
|
|
|
|
|
2021-01-04 00:33:16 +00:00
|
|
|
|
// Ephemeral decoding state.
|
2021-01-03 00:16:07 +00:00
|
|
|
|
Operation operation_ = Operation::Invalid;
|
2021-01-06 02:25:12 +00:00
|
|
|
|
uint8_t instr_ = 0x00; // TODO: is this desired, versus loading more context into ModRegRMFormat?
|
|
|
|
|
int consumed_ = 0, operand_bytes_ = 0;
|
|
|
|
|
|
|
|
|
|
// Source and destination locations.
|
2021-01-03 00:16:07 +00:00
|
|
|
|
Source source_ = Source::None;
|
|
|
|
|
Source destination_ = Source::None;
|
2021-01-06 02:25:12 +00:00
|
|
|
|
|
2021-01-09 03:38:56 +00:00
|
|
|
|
// Immediate fields.
|
|
|
|
|
int16_t displacement_ = 0;
|
|
|
|
|
uint16_t operand_ = 0;
|
2021-01-11 03:55:25 +00:00
|
|
|
|
uint64_t inward_data_ = 0;
|
2021-01-09 03:38:56 +00:00
|
|
|
|
|
2021-01-06 02:25:12 +00:00
|
|
|
|
// Facts about the instruction.
|
|
|
|
|
int displacement_size_ = 0; // i.e. size of in-stream displacement, if any.
|
|
|
|
|
int operand_size_ = 0; // i.e. size of in-stream operand, if any.
|
|
|
|
|
int operation_size_ = 0; // i.e. size of data manipulated by the operation.
|
2021-01-03 22:28:29 +00:00
|
|
|
|
|
2021-01-04 00:33:16 +00:00
|
|
|
|
// Prefix capture fields.
|
2021-01-06 02:25:12 +00:00
|
|
|
|
Repetition repetition_ = Repetition::None;
|
2021-01-03 22:28:29 +00:00
|
|
|
|
bool lock_ = false;
|
|
|
|
|
Source segment_override_ = Source::None;
|
2021-01-04 00:33:16 +00:00
|
|
|
|
|
|
|
|
|
/// Resets size capture and all fields with default values.
|
2021-01-03 22:28:29 +00:00
|
|
|
|
void reset_parsing() {
|
|
|
|
|
consumed_ = operand_bytes_ = 0;
|
2021-01-06 02:25:12 +00:00
|
|
|
|
displacement_size_ = operand_size_ = 0;
|
2021-01-13 02:49:22 +00:00
|
|
|
|
displacement_ = operand_ = 0;
|
2021-01-03 22:28:29 +00:00
|
|
|
|
lock_ = false;
|
|
|
|
|
segment_override_ = Source::None;
|
2021-01-04 00:33:16 +00:00
|
|
|
|
repetition_ = Repetition::None;
|
2021-01-13 02:49:22 +00:00
|
|
|
|
phase_ = Phase::Instruction;
|
2021-01-14 02:51:18 +00:00
|
|
|
|
source_ = destination_ = Source::None;
|
2021-01-03 22:28:29 +00:00
|
|
|
|
}
|
2021-01-03 00:16:07 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif /* x86_hpp */
|