Lock down Intruction type.

2026-04-20 10:17:05 +00:00 · 2025-04-27 21:43:46 -04:00
parent f766841fad
commit 6cf825d3d8
10 changed files with 80 additions and 67 deletions
@@ -30,6 +30,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(
 	// without any loss of context. This reduces the risk of the decoder tricking a caller into
 	// an infinite loop.
 	static constexpr int max_instruction_length = model >= Model::i80386 ? 15 : (model == Model::i80286 ? 10 : 65536);
+	static constexpr bool is_32bit = instruction_type(model) == InstructionType::Bits32;
 	const uint8_t *const end = source + std::min(length, size_t(max_instruction_length - consumed_));

 	// MARK: - Prefixes (if present) and the opcode.
@@ -689,7 +690,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(
 			};
 			displacement_size_ = sizes[mod];

-			if(is_32bit(model) && address_size_ == AddressSize::b32) {
+			if(is_32bit && address_size_ == AddressSize::b32) {
 				// 32-bit decoding: the range of potential indirections is expanded,
 				// and may segue into obtaining a SIB.
 				sib_ = ScaleIndexBase(0, Source::None, reg_table[rm]);
@@ -764,7 +765,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(

 				// The 16-bit chips have four segment registers;
 				// the 80386 onwards has six.
-				if constexpr (is_32bit(model)) {
+				if constexpr (is_32bit) {
 					if(masked_reg > 5) {
 						return undefined();
 					}
@@ -949,7 +950,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(

 	// MARK: - ScaleIndexBase

-	if constexpr (is_32bit(model)) {
+	if constexpr (is_32bit) {
 		if(phase_ == Phase::ScaleIndexBase && source != end) {
 			sib_ = *source;
 			++source;
@@ -1072,7 +1073,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(
 }

 template <Model model> void Decoder<model>::set_32bit_protected_mode(bool enabled) {
-	if constexpr (!is_32bit(model)) {
+	if constexpr (instruction_type(model) == InstructionType::Bits16) {
 		assert(!enabled);
 		return;
 	}
@@ -23,7 +23,7 @@ namespace InstructionSet::x86 {
 */
 template <Model model> class Decoder {
 public:
-	using InstructionT = Instruction<is_32bit(model)>;
+	using InstructionT = Instruction<instruction_type(model)>;

 	/*!
 		@returns an @c Instruction plus a size; a positive size indicates successful decoding of
@@ -105,9 +105,9 @@ void jump_absolute(
 	context.flow_controller.template jump<uint16_t>(target);
 }

-template <typename AddressT, typename InstructionT, typename ContextT>
+template <typename AddressT, InstructionType type, typename ContextT>
 void call_far(
-	InstructionT &instruction,
+	const Instruction<type> &instruction,
 	ContextT &context
 ) {
 	// TODO: eliminate 16-bit assumption below.
@@ -155,9 +155,9 @@ void call_far(
 	context.flow_controller.template jump<AddressT>(segment, offset);
 }

-template <typename InstructionT, typename ContextT>
+template <InstructionType type, typename ContextT>
 void jump_far(
-	InstructionT &instruction,
+	const Instruction<type> &instruction,
 	ContextT &context
 ) {
 	// TODO: eliminate 16-bit assumption below.
@@ -49,14 +49,14 @@ void ld(
 	}
 }

-template <typename IntT, typename InstructionT, typename ContextT>
+template <typename IntT, InstructionType type, typename ContextT>
 void lea(
-	const InstructionT &instruction,
+	const Instruction<type> &instruction,
 	write_t<IntT> destination,
 	ContextT &context
 ) {
 	// TODO: address size.
-	destination = IntT(address<uint16_t, AccessType::Read>(instruction, instruction.source(), context));
+	destination = IntT(address<uint16_t, AccessType::Read, type>(instruction, instruction.source(), context));
 }

 template <typename AddressT, typename InstructionT, typename ContextT>
@@ -531,16 +531,18 @@ template <
 // It'd be a substantial effort to find the most neat expression of that, I think, so it is not currently done.
 //
 template <
-	typename InstructionT,
+	InstructionType type,
 	typename ContextT
 > void perform(
-	const InstructionT &instruction,
+	const Instruction<type> &instruction,
 	ContextT &context
 ) {
 	const auto size = [](DataSize operation_size, AddressSize address_size) constexpr -> int {
 		return int(operation_size) + (int(address_size) << 2);
 	};

+	static constexpr bool supports_32bit = type != InstructionType::Bits16;
+
 	// Dispatch to a function specialised on data and address size.
 	switch(size(instruction.operation_size(), instruction.address_size())) {
 		// 16-bit combinations.
@@ -557,25 +559,29 @@ template <
 		// model combinations. So if a caller nominates a 16-bit model it can supply registers and memory objects
 		// that don't implement 32-bit registers or accesses.
 		case size(DataSize::Byte, AddressSize::b32):
-			if constexpr (is_32bit(ContextT::model)) {
+			assert(supports_32bit);
+			if constexpr (supports_32bit) {
 				perform<DataSize::Byte, AddressSize::b32>(instruction, context);
 				return;
 			}
 		break;
 		case size(DataSize::Word, AddressSize::b32):
-			if constexpr (is_32bit(ContextT::model)) {
+			assert(supports_32bit);
+			if constexpr (supports_32bit) {
 				perform<DataSize::Word, AddressSize::b32>(instruction, context);
 				return;
 			}
 		break;
 		case size(DataSize::DWord, AddressSize::b16):
-			if constexpr (is_32bit(ContextT::model)) {
+			assert(supports_32bit);
+			if constexpr (supports_32bit) {
 				perform<DataSize::DWord, AddressSize::b16>(instruction, context);
 				return;
 			}
 		break;
 		case size(DataSize::DWord, AddressSize::b32):
-			if constexpr (is_32bit(ContextT::model)) {
+			assert(supports_32bit);
+			if constexpr (supports_32bit) {
 				perform<DataSize::DWord, AddressSize::b32>(instruction, context);
 				return;
 			}
@@ -9,6 +9,7 @@
 #pragma once

 #include "InstructionSets/x86/AccessType.hpp"
+#include "InstructionSets/x86/Instruction.hpp"

 namespace InstructionSet::x86 {

@@ -19,9 +20,10 @@ namespace InstructionSet::x86 {
 ///
 /// If @c source is Source::Immediate then the appropriate portion of @c instrucion's operand
 /// is copied to @c *immediate and @c immediate is returned.
-template <typename IntT, AccessType access, typename InstructionT, typename ContextT>
+template <typename IntT, AccessType access, InstructionType type, typename ContextT>
+requires is_x86_data_type<IntT>
 typename Accessor<IntT, access>::type resolve(
-	const InstructionT &instruction,
+	const Instruction<type> &instruction,
 	const Source source,
 	const DataPointer pointer,
 	ContextT &context,
@@ -31,9 +33,10 @@ typename Accessor<IntT, access>::type resolve(

 /// Calculates the absolute address for @c pointer given the registers and memory provided in @c context and taking any
 /// referenced offset from @c instruction.
-template <Source source, typename IntT, AccessType access, typename InstructionT, typename ContextT>
+template <Source source, typename IntT, AccessType access, InstructionType type, typename ContextT>
+requires is_x86_data_type<IntT>
 uint32_t address(
-	InstructionT &instruction,
+	const Instruction<type> &instruction,
 	DataPointer pointer,
 	ContextT &context
 ) {
@@ -44,7 +47,7 @@ uint32_t address(
 	uint32_t address;
 	uint16_t zero = 0;
 	address = resolve<uint16_t, AccessType::Read>(instruction, pointer.index(), pointer, context, &zero);
-	if constexpr (is_32bit(ContextT::model)) {
+	if constexpr (instruction_type(ContextT::model) != InstructionType::Bits16) {
 		address <<= pointer.scale();
 	}
 	address += instruction.offset();
@@ -59,8 +62,9 @@ uint32_t address(
 /// @c nullptr otherwise. @c access is currently unused but is intended to provide the hook upon which updates to
 /// segment registers can be tracked for protected modes.
 template <typename IntT, AccessType access, Source source, typename ContextT>
+requires is_x86_data_type<IntT>
 IntT *register_(ContextT &context) {
-	static constexpr bool supports_dword = is_32bit(ContextT::model);
+	static constexpr bool supports_dword = instruction_type(ContextT::model) != InstructionType::Bits16;

 	switch(source) {
 		case Source::eAX:
@@ -116,17 +120,18 @@ IntT *register_(ContextT &context) {
 		case Source::DS:	if constexpr (std::is_same_v<IntT, uint16_t>) return &context.registers.ds(); else return nullptr;

 		// 16-bit models don't have FS and GS.
-		case Source::FS:	if constexpr (is_32bit(ContextT::model) && std::is_same_v<IntT, uint16_t>) return &context.registers.fs(); else return nullptr;
-		case Source::GS:	if constexpr (is_32bit(ContextT::model) && std::is_same_v<IntT, uint16_t>) return &context.registers.gs(); else return nullptr;
+		case Source::FS:	if constexpr (supports_dword && std::is_same_v<IntT, uint16_t>) return &context.registers.fs(); else return nullptr;
+		case Source::GS:	if constexpr (supports_dword && std::is_same_v<IntT, uint16_t>) return &context.registers.gs(); else return nullptr;

 		default: return nullptr;
 	}
 }

 ///Obtains the address described by @c pointer from @c instruction given the registers and memory as described by @c context.
-template <typename IntT, AccessType access, typename InstructionT, typename ContextT>
+template <typename IntT, AccessType access, InstructionType type, typename ContextT>
+requires is_x86_data_type<IntT>
 uint32_t address(
-	InstructionT &instruction,
+	const Instruction<type> &instruction,
 	DataPointer pointer,
 	ContextT &context
 ) {
@@ -149,9 +154,10 @@ uint32_t address(
 }

 // See forward declaration, above, for details.
-template <typename IntT, AccessType access, typename InstructionT, typename ContextT>
+template <typename IntT, AccessType access, InstructionType type, typename ContextT>
+requires is_x86_data_type<IntT>
 typename Accessor<IntT, access>::type resolve(
-	const InstructionT &instruction,
+	const Instruction<type> &instruction,
 	const Source source,
 	const DataPointer pointer,
 	ContextT &context,
@@ -370,10 +370,10 @@ std::string to_hex(IntT value) {

 }

-template <bool is_32bit>
+template <InstructionType type>
 std::string InstructionSet::x86::to_string(
 	DataPointer pointer,
-	Instruction<is_32bit> instruction,
+	Instruction<type> instruction,
 	int offset_length,
 	int immediate_length,
 	DataSize operation_size
@@ -462,9 +462,9 @@ std::string InstructionSet::x86::to_string(
 	return operand;
 };

-template<bool is_32bit>
+template <InstructionType type>
 std::string InstructionSet::x86::to_string(
-	std::pair<int, Instruction<is_32bit>> instruction,
+	std::pair<int, Instruction<type>> instruction,
 	Model model,
 	int offset_length,
 	int immediate_length
@@ -616,7 +616,7 @@ std::string InstructionSet::x86::to_string(
 //);

 template std::string InstructionSet::x86::to_string(
-	std::pair<int, Instruction<false>> instruction,
+	std::pair<int, Instruction<InstructionType::Bits16>> instruction,
 	Model model,
 	int offset_length,
 	int immediate_length
@@ -697,10 +697,10 @@ private:
 	ScaleIndexBase sib_;
 };

-template<bool is_32bit> class Instruction {
+template<InstructionType type> class Instruction {
 public:
-	using DisplacementT = typename std::conditional<is_32bit, int32_t, int16_t>::type;
-	using ImmediateT = typename std::conditional<is_32bit, uint32_t, uint16_t>::type;
+	using DisplacementT = DisplacementT<type>::type;
+	using ImmediateT = ImmediateT<type>::type;
 	using AddressT = ImmediateT;

 	constexpr Instruction() noexcept = default;
@@ -768,7 +768,7 @@ public:
 	/// this allows a denser packing of instructions into containers.
 	constexpr size_t packing_size() const	{
 		return
-			offsetof(Instruction<is_32bit>, extensions_) +
+			offsetof(Instruction<type>, extensions_) +
 			(has_displacement() + has_operand()) * sizeof(ImmediateT);
 	}

@@ -854,7 +854,7 @@ public:
 	}

 	// Standard comparison operator.
-	constexpr bool operator ==(const Instruction<is_32bit> &rhs) const {
+	constexpr bool operator ==(const Instruction<type> &rhs) const {
 		if(	operation_ != rhs.operation_ ||
 			mem_exts_source_ != rhs.mem_exts_source_ ||
 			source_data_dest_sib_ != rhs.source_data_dest_sib_) {
@@ -920,8 +920,8 @@ private:
 	};
 };

-static_assert(sizeof(Instruction<true>) <= 16);
-static_assert(sizeof(Instruction<false>) <= 10);
+static_assert(sizeof(Instruction<InstructionType::Bits32>) <= 16);
+static_assert(sizeof(Instruction<InstructionType::Bits16>) <= 10);

 //
 // Disassembly aids.
@@ -951,10 +951,10 @@ std::string to_string(Source, DataSize);
 ///
 /// See notes below re: @c offset_length and @c immediate_length.
 /// If @c operation_size is the default value of @c ::None, it'll be taken from the @c instruction.
-template <bool is_32bit>
+template <InstructionType type>
 std::string to_string(
 	DataPointer pointer,
-	Instruction<is_32bit> instruction,
+	Instruction<type> instruction,
 	int offset_length,
 	int immediate_length,
 	DataSize operation_size = InstructionSet::x86::DataSize::None
@@ -966,9 +966,9 @@ std::string to_string(
 ///
 /// If @c offset_length is '2' or '4', truncates any printed offset to 2 or 4 digits if it is compatible with being that length.
 /// If @c immediate_length is '2' or '4', truncates any printed immediate value to 2 or 4 digits if it is compatible with being that length.
-template<bool is_32bit>
+template <InstructionType type>
 std::string to_string(
-	std::pair<int, Instruction<is_32bit>> instruction,
+	std::pair<int, Instruction<type>> instruction,
 	Model model,
 	int offset_length = 0,
 	int immediate_length = 0);
@@ -20,7 +20,25 @@ enum class Model {
 	i80386,
 };

-static constexpr bool is_32bit(const Model model) { return model >= Model::i80386; }
+enum class InstructionType {
+	Bits16,
+	Bits32,
+};
+
+template <InstructionType type> struct DisplacementT;
+template<> struct DisplacementT<InstructionType::Bits16> { using type = int16_t; };
+template<> struct DisplacementT<InstructionType::Bits32> { using type = int32_t; };
+
+template <InstructionType type> struct ImmediateT;
+template<> struct ImmediateT<InstructionType::Bits16> { using type = uint16_t; };
+template<> struct ImmediateT<InstructionType::Bits32> { using type = uint32_t; };
+
+template <InstructionType type> using AddressT = ImmediateT<type>;
+
+static constexpr InstructionType instruction_type(const Model model) {
+	return model >= Model::i80386 ? InstructionType::Bits32 : InstructionType::Bits16;
+}
+
 static constexpr bool has_mode(const Model model, const Mode mode) {
 	switch(mode) {
 		case Mode::Real:	return true;
@@ -33,8 +51,4 @@ static constexpr bool uses_8086_exceptions(const Model model) {
 	return model <= Model::i80186;
 }

-
-template <bool is_32bit> struct AddressT { using type = uint16_t; };
-template <> struct AddressT<true> { using type = uint32_t; };
-
 }
@@ -14,30 +14,16 @@

 namespace InstructionSet::x86 {

-template <
-	Model model_,
-	typename FlowControllerT,
-	typename RegistersT,
-	typename MemoryT,
-	typename IOT
-> struct ExecutionContext {
-	FlowControllerT flow_controller;
-	Flags flags;
-	RegistersT registers;
-	MemoryT memory;
-	IOT io;
-	static constexpr Model model = model_;
-};

 /// Performs @c instruction  querying @c registers and/or @c memory as required, using @c io for port input/output,
 /// and providing any flow control effects to @c flow_controller.
 ///
 /// Any change in processor status will be applied to @c status.
 template <
-	typename InstructionT,
+	InstructionType type,
 	typename ContextT
 > void perform(
-	const InstructionT &instruction,
+	const Instruction<type> &instruction,
 	ContextT &context
 );