1
0
mirror of https://github.com/TomHarte/CLK.git synced 2025-01-15 20:31:36 +00:00
CLK/InstructionSets/x86/Instruction.cpp
2023-11-09 11:55:04 -05:00

619 lines
19 KiB
C++

//
// Instruction.cpp
// Clock Signal
//
// Created by Thomas Harte on 17/09/2023.
// Copyright © 2023 Thomas Harte. All rights reserved.
//
#include "Instruction.hpp"
#include "../../Numeric/Carry.hpp"
#include <cassert>
#include <iomanip>
#include <sstream>
using namespace InstructionSet::x86;
bool InstructionSet::x86::has_displacement(Operation operation) {
switch(operation) {
default: return false;
case Operation::JO: case Operation::JNO:
case Operation::JB: case Operation::JNB:
case Operation::JZ: case Operation::JNZ:
case Operation::JBE: case Operation::JNBE:
case Operation::JS: case Operation::JNS:
case Operation::JP: case Operation::JNP:
case Operation::JL: case Operation::JNL:
case Operation::JLE: case Operation::JNLE:
case Operation::LOOPNE: case Operation::LOOPE:
case Operation::LOOP: case Operation::JCXZ:
case Operation::CALLrel: case Operation::JMPrel:
return true;
}
}
int InstructionSet::x86::max_displayed_operands(Operation operation) {
switch(operation) {
default: return 2;
case Operation::INC: case Operation::DEC:
case Operation::POP: case Operation::PUSH:
case Operation::MUL: case Operation::IMUL_1:
case Operation::IDIV: case Operation::DIV:
case Operation::ESC:
case Operation::AAM: case Operation::AAD:
case Operation::INT:
case Operation::JMPabs: case Operation::JMPfar:
case Operation::CALLabs: case Operation::CALLfar:
case Operation::NEG: case Operation::NOT:
case Operation::RETnear:
case Operation::RETfar:
return 1;
// Pedantically, these have an displacement rather than an operand.
case Operation::JO: case Operation::JNO:
case Operation::JB: case Operation::JNB:
case Operation::JZ: case Operation::JNZ:
case Operation::JBE: case Operation::JNBE:
case Operation::JS: case Operation::JNS:
case Operation::JP: case Operation::JNP:
case Operation::JL: case Operation::JNL:
case Operation::JLE: case Operation::JNLE:
case Operation::LOOPNE: case Operation::LOOPE:
case Operation::LOOP: case Operation::JCXZ:
case Operation::CALLrel: case Operation::JMPrel:
// Genuine zero-operand instructions:
case Operation::CMPS: case Operation::LODS:
case Operation::MOVS: case Operation::SCAS:
case Operation::STOS:
case Operation::CLC: case Operation::CLD:
case Operation::CLI:
case Operation::STC: case Operation::STD:
case Operation::STI:
case Operation::CMC:
case Operation::LAHF: case Operation::SAHF:
case Operation::AAA: case Operation::AAS:
case Operation::DAA: case Operation::DAS:
case Operation::CBW: case Operation::CWD:
case Operation::INTO:
case Operation::PUSHF: case Operation::POPF:
case Operation::IRET:
case Operation::NOP:
case Operation::XLAT:
case Operation::SALC:
case Operation::Invalid:
return 0;
}
}
std::string InstructionSet::x86::to_string(Operation operation, DataSize size, Model model) {
switch(operation) {
case Operation::AAA: return "aaa";
case Operation::AAD: return "aad";
case Operation::AAM: return "aam";
case Operation::AAS: return "aas";
case Operation::DAA: return "daa";
case Operation::DAS: return "das";
case Operation::CBW: return "cbw";
case Operation::CWD: return "cwd";
case Operation::ESC: return "esc";
case Operation::HLT: return "hlt";
case Operation::WAIT: return "wait";
case Operation::ADC: return "adc";
case Operation::ADD: return "add";
case Operation::SBB: return "sbb";
case Operation::SUB: return "sub";
case Operation::MUL: return "mul";
case Operation::IMUL_1: return "imul";
case Operation::DIV: return "div";
case Operation::IDIV: return "idiv";
case Operation::IDIV_REP: return "idiv";
case Operation::INC: return "inc";
case Operation::DEC: return "dec";
case Operation::IN: return "in";
case Operation::OUT: return "out";
case Operation::JO: return "jo";
case Operation::JNO: return "jno";
case Operation::JB: return "jb";
case Operation::JNB: return "jnb";
case Operation::JZ: return "jz";
case Operation::JNZ: return "jnz";
case Operation::JBE: return "jbe";
case Operation::JNBE: return "jnbe";
case Operation::JS: return "js";
case Operation::JNS: return "jns";
case Operation::JP: return "jp";
case Operation::JNP: return "jnp";
case Operation::JL: return "jl";
case Operation::JNL: return "jnl";
case Operation::JLE: return "jle";
case Operation::JNLE: return "jnle";
case Operation::CALLabs: return "call";
case Operation::CALLrel: return "call";
case Operation::CALLfar: return "callf";
case Operation::IRET: return "iret";
case Operation::RETfar: return "retf";
case Operation::RETnear: return "retn";
case Operation::JMPabs: return "jmp";
case Operation::JMPrel: return "jmp";
case Operation::JMPfar: return "jmpf";
case Operation::JCXZ: return "jcxz";
case Operation::INT: return "int";
case Operation::INTO: return "into";
case Operation::LAHF: return "lahf";
case Operation::SAHF: return "sahf";
case Operation::LDS: return "lds";
case Operation::LES: return "les";
case Operation::LEA: return "lea";
case Operation::CMPS: {
constexpr char sizes[][6] = { "cmpsb", "cmpsw", "cmpsd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::CMPS_REPE: {
constexpr char sizes[][11] = { "repe cmpsb", "repe cmpsw", "repe cmpsd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::CMPS_REPNE: {
constexpr char sizes[][12] = { "repne cmpsb", "repne cmpsw", "repne cmpsd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::SCAS: {
constexpr char sizes[][6] = { "scasb", "scasw", "scasd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::SCAS_REPE: {
constexpr char sizes[][11] = { "repe scasb", "repe scasw", "repe scasd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::SCAS_REPNE: {
constexpr char sizes[][12] = { "repne scasb", "repne scasw", "repne scasd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::LODS: {
constexpr char sizes[][6] = { "lodsb", "lodsw", "lodsd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::LODS_REP: {
constexpr char sizes[][10] = { "rep lodsb", "rep lodsw", "rep lodsd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::MOVS: {
constexpr char sizes[][6] = { "movsb", "movsw", "movsd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::MOVS_REP: {
constexpr char sizes[][10] = { "rep movsb", "rep movsw", "rep movsd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::STOS: {
constexpr char sizes[][6] = { "stosb", "stosw", "stosd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::STOS_REP: {
constexpr char sizes[][10] = { "rep stosb", "rep stosw", "rep stosd", "?" };
return sizes[static_cast<int>(size)];
}
case Operation::LOOP: return "loop";
case Operation::LOOPE: return "loope";
case Operation::LOOPNE: return "loopne";
case Operation::MOV: return "mov";
case Operation::NEG: return "neg";
case Operation::NOT: return "not";
case Operation::AND: return "and";
case Operation::OR: return "or";
case Operation::XOR: return "xor";
case Operation::NOP: return "nop";
case Operation::POP: return "pop";
case Operation::POPF: return "popf";
case Operation::PUSH: return "push";
case Operation::PUSHF: return "pushf";
case Operation::RCL: return "rcl";
case Operation::RCR: return "rcr";
case Operation::ROL: return "rol";
case Operation::ROR: return "ror";
case Operation::SAL: return "sal";
case Operation::SAR: return "sar";
case Operation::SHR: return "shr";
case Operation::CLC: return "clc";
case Operation::CLD: return "cld";
case Operation::CLI: return "cli";
case Operation::STC: return "stc";
case Operation::STD: return "std";
case Operation::STI: return "sti";
case Operation::CMC: return "cmc";
case Operation::CMP: return "cmp";
case Operation::TEST: return "test";
case Operation::XCHG: return "xchg";
case Operation::XLAT: return "xlat";
case Operation::SALC: return "salc";
case Operation::SETMO:
if(model == Model::i8086) {
return "setmo";
} else {
return "enter";
}
case Operation::SETMOC:
if(model == Model::i8086) {
return "setmoc";
} else {
return "bound";
}
case Operation::Invalid: return "invalid";
default:
assert(false);
return "";
}
}
bool InstructionSet::x86::mnemonic_implies_data_size(Operation operation) {
switch(operation) {
default: return false;
case Operation::CMPS:
case Operation::LODS:
case Operation::MOVS:
case Operation::SCAS:
case Operation::STOS:
case Operation::JMPrel:
case Operation::LEA:
return true;
}
}
std::string InstructionSet::x86::to_string(DataSize size) {
constexpr char sizes[][6] = { "byte", "word", "dword", "?" };
return sizes[static_cast<int>(size)];
}
std::string InstructionSet::x86::to_string(Source source, DataSize size) {
switch(source) {
case Source::eAX: {
constexpr char sizes[][4] = { "al", "ax", "eax", "?" };
return sizes[static_cast<int>(size)];
}
case Source::eCX: {
constexpr char sizes[][4] = { "cl", "cx", "ecx", "?" };
return sizes[static_cast<int>(size)];
}
case Source::eDX: {
constexpr char sizes[][4] = { "dl", "dx", "edx", "?" };
return sizes[static_cast<int>(size)];
}
case Source::eBX: {
constexpr char sizes[][4] = { "bl", "bx", "ebx", "?" };
return sizes[static_cast<int>(size)];
}
case Source::eSPorAH: {
constexpr char sizes[][4] = { "ah", "sp", "esp", "?" };
return sizes[static_cast<int>(size)];
}
case Source::eBPorCH: {
constexpr char sizes[][4] = { "ch", "bp", "ebp", "?" };
return sizes[static_cast<int>(size)];
}
case Source::eSIorDH: {
constexpr char sizes[][4] = { "dh", "si", "esi", "?" };
return sizes[static_cast<int>(size)];
}
case Source::eDIorBH: {
constexpr char sizes[][4] = { "bh", "di", "edi", "?" };
return sizes[static_cast<int>(size)];
}
case Source::ES: return "es";
case Source::CS: return "cs";
case Source::SS: return "ss";
case Source::DS: return "ds";
case Source::FS: return "fd";
case Source::GS: return "gs";
case Source::None: return "0";
case Source::DirectAddress: return "DirectAccess";
case Source::Immediate: return "Immediate";
case Source::Indirect: return "Indirect";
case Source::IndirectNoBase: return "IndirectNoBase";
default: return "???";
}
}
namespace {
std::string to_hex(int value, int digits, bool with_suffix = true) {
auto stream = std::stringstream();
stream << std::setfill('0') << std::uppercase << std::hex << std::setw(digits);
switch(digits) {
case 2: stream << +uint8_t(value); break;
case 4: stream << +uint16_t(value); break;
default: stream << value; break;
}
if (with_suffix) stream << 'h';
return stream.str();
};
template <typename IntT>
std::string to_hex(IntT value) {
auto stream = std::stringstream();
stream << std::uppercase << std::hex << +value << 'h';
return stream.str();
};
}
template <bool is_32bit>
std::string InstructionSet::x86::to_string(
DataPointer pointer,
Instruction<is_32bit> instruction,
int offset_length,
int immediate_length,
DataSize operation_size
) {
if(operation_size == InstructionSet::x86::DataSize::None) operation_size = instruction.operation_size();
std::string operand;
auto append = [](std::stringstream &stream, auto value, int length) {
switch(length) {
case 0:
if(!value) {
return;
}
[[fallthrough]];
case 2:
value &= 0xff;
break;
}
stream << std::uppercase << std::hex << value << 'h';
};
auto append_signed = [](std::stringstream &stream, auto value, int length) {
if(!value && !length) {
return;
}
const bool is_negative = Numeric::top_bit<decltype(value)>() & value;
const uint64_t abs_value = uint64_t(std::abs(int16_t(value))); // TODO: don't assume 16-bit.
stream << (is_negative ? '-' : '+') << std::uppercase << std::hex << abs_value << 'h';
};
using Source = InstructionSet::x86::Source;
const Source source = pointer.source();
switch(source) {
// to_string handles all direct register names correctly.
default: return InstructionSet::x86::to_string(source, operation_size);
case Source::Immediate: {
std::stringstream stream;
append(stream, instruction.operand(), immediate_length);
return stream.str();
}
case Source::DirectAddress:
case Source::Indirect:
case Source::IndirectNoBase: {
std::stringstream stream;
if(!InstructionSet::x86::mnemonic_implies_data_size(instruction.operation())) {
stream << InstructionSet::x86::to_string(operation_size) << ' ';
}
stream << '[';
stream << InstructionSet::x86::to_string(instruction.data_segment(), InstructionSet::x86::DataSize::None) << ':';
bool addOffset = false;
switch(source) {
default: break;
case Source::Indirect:
stream << InstructionSet::x86::to_string(pointer.base(), data_size(instruction.address_size()));
if(pointer.index() != Source::None) {
stream << '+' << InstructionSet::x86::to_string(pointer.index(), data_size(instruction.address_size()));
}
addOffset = true;
break;
case Source::IndirectNoBase:
stream << InstructionSet::x86::to_string(pointer.index(), data_size(instruction.address_size()));
addOffset = true;
break;
case Source::DirectAddress:
stream << std::uppercase << std::hex << instruction.offset() << 'h';
break;
}
if(addOffset) {
append_signed(stream, instruction.offset(), offset_length);
}
stream << ']';
return stream.str();
}
}
return operand;
};
template<bool is_32bit>
std::string InstructionSet::x86::to_string(
std::pair<int, Instruction<is_32bit>> instruction,
Model model,
int offset_length,
int immediate_length
) {
std::string operation;
// Add segment override, if any, ahead of some operations that won't otherwise print it.
switch(instruction.second.operation()) {
default: break;
case Operation::CMPS:
case Operation::CMPS_REPE:
case Operation::CMPS_REPNE:
case Operation::SCAS:
case Operation::SCAS_REPE:
case Operation::SCAS_REPNE:
case Operation::STOS:
case Operation::STOS_REP:
case Operation::LODS:
case Operation::LODS_REP:
case Operation::MOVS:
case Operation::MOVS_REP:
case Operation::INS:
case Operation::INS_REP:
case Operation::OUTS:
case Operation::OUTS_REP:
switch(instruction.second.data_segment()) {
default: break;
case Source::ES: operation += "es "; break;
case Source::CS: operation += "cs "; break;
case Source::DS: operation += "ds "; break;
case Source::SS: operation += "ss "; break;
case Source::GS: operation += "gs "; break;
case Source::FS: operation += "fs "; break;
}
break;
}
// Add operation itself.
operation += to_string(instruction.second.operation(), instruction.second.operation_size(), model);
operation += " ";
// Deal with a few special cases up front.
switch(instruction.second.operation()) {
default: {
const int operands = max_displayed_operands(instruction.second.operation());
const bool displacement = has_displacement(instruction.second.operation());
const bool print_first =
instruction.second.destination().source() != Source::None &&
(
operands > 1 ||
(operands > 0 && instruction.second.source().source() == Source::None)
);
if(print_first) {
operation += to_string(instruction.second.destination(), instruction.second, offset_length, immediate_length);
}
if(operands > 0 && instruction.second.source().source() != Source::None) {
if(print_first) operation += ", ";
operation += to_string(instruction.second.source(), instruction.second, offset_length, immediate_length);
}
if(displacement) {
operation += to_hex(instruction.second.displacement() + instruction.first, offset_length);
}
} break;
case Operation::CALLfar:
case Operation::JMPfar: {
switch(instruction.second.destination().source()) {
case Source::Immediate:
operation += to_hex(instruction.second.segment(), 4, false);
operation += "h:";
operation += to_hex(instruction.second.offset(), 4, false);
operation += "h";
break;
default:
operation += to_string(instruction.second.destination(), instruction.second, offset_length, immediate_length);
break;
}
} break;
case Operation::LDS:
case Operation::LES: // The test set labels the pointer type as dword, which I guess is technically accurate.
// A full 32 bits will be loaded from that address in 16-bit mode.
operation += to_string(instruction.second.destination(), instruction.second, offset_length, immediate_length);
operation += ", ";
operation += to_string(instruction.second.source(), instruction.second, offset_length, immediate_length, InstructionSet::x86::DataSize::DWord);
break;
case Operation::IN:
operation += to_string(instruction.second.destination(), instruction.second, offset_length, immediate_length);
operation += ", ";
switch(instruction.second.source().source()) {
case Source::DirectAddress:
operation += to_hex(uint8_t(instruction.second.offset()));
break;
default:
operation += to_string(instruction.second.source(), instruction.second, offset_length, immediate_length, InstructionSet::x86::DataSize::Word);
break;
}
break;
case Operation::OUT:
switch(instruction.second.destination().source()) {
case Source::DirectAddress:
operation += to_hex(uint8_t(instruction.second.offset()));
break;
default:
operation += to_string(instruction.second.destination(), instruction.second, offset_length, immediate_length, InstructionSet::x86::DataSize::Word);
break;
}
operation += ", ";
operation += to_string(instruction.second.source(), instruction.second, offset_length, immediate_length);
break;
// Rolls and shifts list eCX as a source on the understanding that everyone knows that rolls and shifts
// use CL even when they're shifting or rolling a word-sized quantity.
case Operation::RCL: case Operation::RCR:
case Operation::ROL: case Operation::ROR:
case Operation::SAL: case Operation::SAR:
case Operation::SHR:
case Operation::SETMO: case Operation::SETMOC:
operation += to_string(instruction.second.destination(), instruction.second, offset_length, immediate_length);
switch(instruction.second.source().source()) {
case Source::None: break;
case Source::eCX: operation += ", cl"; break;
case Source::Immediate:
// Providing an immediate operand of 1 is a little future-proofing by the decoder; the '1'
// is actually implicit on a real 8088. So omit it.
if(instruction.second.operand() == 1) break;
[[fallthrough]];
default:
operation += ", ";
operation += to_string(instruction.second.source(), instruction.second, offset_length, immediate_length);
break;
}
break;
}
return operation;
}
// Although advertised, 32-bit printing is incomplete.
//
//template std::string InstructionSet::x86::to_string(
// Instruction<true> instruction,
// Model model,
// int offset_length,
// int immediate_length
//);
template std::string InstructionSet::x86::to_string(
std::pair<int, Instruction<false>> instruction,
Model model,
int offset_length,
int immediate_length
);