1
0
mirror of https://github.com/TomHarte/CLK.git synced 2024-11-26 23:52:26 +00:00
CLK/InstructionSets/x86/Decoder.hpp

224 lines
7.0 KiB
C++
Raw Normal View History

//
2021-01-15 23:16:01 +00:00
// Decoder.hpp
// Clock Signal
//
// Created by Thomas Harte on 01/01/21.
// Copyright © 2021 Thomas Harte. All rights reserved.
//
2021-01-15 23:16:01 +00:00
#ifndef InstructionSets_x86_Decoder_hpp
#define InstructionSets_x86_Decoder_hpp
2021-01-15 23:16:01 +00:00
#include "Instruction.hpp"
#include "Model.hpp"
2021-01-15 23:16:01 +00:00
2021-01-16 03:33:14 +00:00
#include <cstddef>
2021-01-15 23:16:01 +00:00
#include <utility>
namespace InstructionSet {
namespace x86 {
/*!
Implements Intel x86 instruction decoding.
This is an experimental implementation; it has not yet undergone significant testing.
*/
template <Model model> class Decoder {
public:
using InstructionT = Instruction<model >= Model::i80386>;
/*!
@returns an @c Instruction plus a size; a positive size to indicate successful decoding; a
negative size specifies the [negatived] number of further bytes the caller should ideally
collect before calling again. The caller is free to call with fewer, but may not get a decoded
instruction in response, and the decoder may still not be able to complete decoding
even if given that number of bytes.
*/
std::pair<int, InstructionT> decode(const uint8_t *source, size_t length);
/*!
Enables or disables 32-bit protected mode. Meaningful only if the @c Model supports it.
*/
void set_32bit_protected_mode(bool);
private:
enum class Phase {
/// Captures all prefixes and continues until an instruction byte is encountered.
Instruction,
/// Having encountered a 0x0f first instruction byte, waits for the next byte fully to determine the instruction.
InstructionPageF,
/// Receives a ModRegRM byte and either populates the source_ and dest_ fields appropriately
/// or completes decoding of the instruction, as per the instruction format.
ModRegRM,
/// Awaits n 80386+-style scale-index-base byte ('SIB'), indicating the form of indirect addressing.
ScaleIndexBase,
/// Waits for sufficiently many bytes to pass for the required displacement and operand to be captured.
/// Cf. displacement_size_ and operand_size_.
2022-01-31 13:14:33 +00:00
DisplacementOrOperand,
/// Forms and returns an Instruction, and resets parsing state.
ReadyToPost
} phase_ = Phase::Instruction;
/// During the ModRegRM phase, format dictates interpretation of the ModRegRM byte.
///
/// During the ReadyToPost phase, format determines how transiently-recorded fields
/// are packaged into an Instruction.
enum class ModRegRMFormat: uint8_t {
// Parse the ModRegRM for mode, register and register/memory fields
2022-03-05 22:00:48 +00:00
// and populate the source_ and destination_ fields appropriately.
MemReg_Reg,
Reg_MemReg,
// Parse for mode and register/memory fields, populating both
// source_ and destination_ fields with the result. Use the 'register'
// field to check for the POP operation.
MemRegPOP,
// Parse for mode and register/memory fields, populating both
// the destination_ field with the result and setting source_ to Immediate.
// Use the 'register' field to check for the MOV operation.
MemRegMOV,
// Parse for mode and register/memory fields, populating the
2022-03-05 21:44:26 +00:00
// source_ field with the result. Fills destination_ with a segment
// register based on the reg field.
2022-03-05 22:00:48 +00:00
Seg_MemReg,
MemReg_Seg,
2022-03-05 21:44:26 +00:00
//
// 'Group 1'
//
2021-01-09 03:50:59 +00:00
// Parse for mode and register/memory fields, populating the
// destination_ field with the result. Use the 'register' field
// to pick an operation from the ADD/OR/ADC/SBB/AND/SUB/XOR/CMP group and
// waits for an operand equal to the operation size.
MemRegADD_to_CMP,
2022-03-05 21:44:26 +00:00
// Acts exactly as MemRegADD_to_CMP but the operand is fixed in size
// at a single byte, which is sign extended to the operation size.
MemRegADD_to_CMP_SignExtend,
//
// 'Group 2'
//
2021-01-08 02:36:05 +00:00
// Parse for mode and register/memory fields, populating the
2022-03-05 21:44:26 +00:00
// destination_ field with the result. Use the 'register' field
// to pick an operation from the ROL/ROR/RCL/RCR/SAL/SHR/SAR group.
MemRegROL_to_SAR,
//
// 'Group 3'
//
// Parse for mode and register/memory fields, populating both
// source_ and destination_ fields with the result. Use the 'register'
// field to pick an operation from the TEST/NOT/NEG/MUL/IMUL/DIV/IDIV group.
MemRegTEST_to_IDIV,
//
// 'Group 4'
//
2021-01-08 02:36:05 +00:00
// Parse for mode and register/memory fields, populating the
// source_ and destination_ fields with the result. Uses the
// 'register' field to pick INC or DEC.
MemRegINC_DEC,
2022-03-05 21:44:26 +00:00
//
// 'Group 5'
//
// Parse for mode and register/memory fields, populating the
// source_ and destination_ fields with the result. Uses the
// 'register' field to pick from INC/DEC/CALL/JMP/PUSH, altering
// the source to ::Immediate and setting an operand size if necessary.
MemRegINC_to_PUSH,
2022-03-05 21:44:26 +00:00
//
// 'Group 6'
//
// Parse for mode and register/memory field, populating both source_
// and destination_ fields with the result. Uses the 'register' field
// to pick from SLDT/STR/LLDT/LTR/VERR/VERW.
MemRegSLDT_to_VERW,
2022-03-05 21:44:26 +00:00
//
// 'Group 7'
//
// Parse for mode and register/memory field, populating both source_
// and destination_ fields with the result. Uses the 'register' field
// to pick from SGDT/LGDT/SMSW/LMSW.
MemRegSGDT_to_LMSW,
2022-03-05 21:44:26 +00:00
//
// 'Group 8'
//
// TODO.
MemRegBT_to_BTC,
} modregrm_format_ = ModRegRMFormat::MemReg_Reg;
// Ephemeral decoding state.
Operation operation_ = Operation::Invalid;
int consumed_ = 0, operand_bytes_ = 0;
// Source and destination locations.
Source source_ = Source::None;
Source destination_ = Source::None;
// Immediate fields.
int32_t displacement_ = 0;
uint32_t operand_ = 0;
uint64_t inward_data_ = 0;
int next_inward_data_shift_ = 0;
// Indirection style.
ScaleIndexBase sib_;
// Facts about the instruction.
DataSize displacement_size_ = DataSize::None; // i.e. size of in-stream displacement, if any.
DataSize operand_size_ = DataSize::None; // i.e. size of in-stream operand, if any.
DataSize operation_size_ = DataSize::None; // i.e. size of data manipulated by the operation.
bool sign_extend_ = false; // If set then sign extend the operand up to the operation size;
// otherwise it'll be zero-padded.
// Prefix capture fields.
Repetition repetition_ = Repetition::None;
bool lock_ = false;
Source segment_override_ = Source::None;
// 32-bit/16-bit selection.
AddressSize default_address_size_ = AddressSize::b16;
DataSize default_data_size_ = DataSize::Word;
AddressSize address_size_ = AddressSize::b16;
DataSize data_size_ = DataSize::Word;
/// Resets size capture and all fields with default values.
void reset_parsing() {
consumed_ = operand_bytes_ = 0;
displacement_size_ = operand_size_ = operation_size_ = DataSize::None;
displacement_ = operand_ = 0;
lock_ = false;
address_size_ = default_address_size_;
data_size_ = default_data_size_;
segment_override_ = Source::None;
repetition_ = Repetition::None;
phase_ = Phase::Instruction;
source_ = destination_ = Source::None;
sib_ = ScaleIndexBase();
next_inward_data_shift_ = 0;
inward_data_ = 0;
sign_extend_ = false;
}
};
}
}
2021-01-15 23:16:01 +00:00
#endif /* InstructionSets_x86_Decoder_hpp */