Refactor: (i) to expose effective address calculation; and (ii) to include address size in Instruction.

2024-11-26 23:52:26 +00:00 · 2022-03-01 09:36:37 -05:00 · 2022-03-01 09:36:37 -05:00 · 2c816db45e
commit 2c816db45e
parent b920507f34
2 changed files with 213 additions and 161 deletions
--- a/InstructionSets/x86/DataPointerResolver.hpp
+++ b/InstructionSets/x86/DataPointerResolver.hpp
@ -39,134 +39,9 @@ enum class Register: uint8_t {
 	None
 };

-/// Reads from or writes to the source or target identified by a DataPointer, relying upon two user-supplied classes:
-///
-/// * a register bank; and
-/// * a memory pool.
-///
-/// The register bank should implement `template<typename DataT, Register> DataT read()` and `template<typename DataT, Register> void write(DataT)`.
-/// Those functions will be called only with registers and data types that are appropriate to the @c model.
-///
-/// The memory pool should implement `template<typename DataT> DataT read(Source segment, uint32_t address)` and
-/// `template<typename DataT> void write(Source segment, uint32_t address, DataT value)`.
-template <Model model, typename RegistersT, typename MemoryT> class DataPointerResolver {
-	public:
-		template <typename DataT> static DataT read(
-			RegistersT &registers,
-			MemoryT &memory,
-			const Instruction<is_32bit(model)> &instruction,
-			DataPointer pointer,
-			typename Instruction<is_32bit(model)>::AddressT memory_mask = ~0) {
-				DataT result;
-				access<true>(registers, memory, instruction, pointer, memory_mask, result);
-				return result;
-			}
+template <typename DataT> constexpr Register register_for_source(Source source) {
+	static_assert(sizeof(DataT) == 4 || sizeof(DataT) == 2 || sizeof(DataT) == 1);

-		template <typename DataT> static void write(
-			RegistersT &registers,
-			MemoryT &memory,
-			const Instruction<is_32bit(model)> &instruction,
-			DataPointer pointer,
-			DataT value,
-			typename Instruction<is_32bit(model)>::AddressT memory_mask = ~0) {
-				access<false>(registers, memory, instruction, pointer, memory_mask, value);
-			}
-
-	private:
-		template <bool is_write, typename DataT> static void access(
-			RegistersT &registers,
-			MemoryT &memory,
-			const Instruction<is_32bit(model)> &instruction,
-			DataPointer pointer,
-			typename Instruction<is_32bit(model)>::AddressT memory_mask,
-			DataT &value) {
-				assert(memory_mask == 0xffff'ffff || memory_mask == 0xffff);
-				const Source source = pointer.source();
-
-#define read_or_write(v, x, is_for_indirection)															\
-	case Source::x:																						\
-		if constexpr (!is_for_indirection && is_write) {												\
-			registers.template write<decltype(v), register_for_source<decltype(v)>(Source::x)>(v);		\
-		} else {																						\
-			v = registers.template read<decltype(v), register_for_source<decltype(v)>(Source::x)>();	\
-		}																								\
-	break;
-
-#define ALLREGS(v)	f(v, eAX); f(v, eCX); f(v, eDX); f(v, eBX); \
-					f(v, eSPorAH); f(v, eBPorCH); f(v, eSIorDH); f(v, eDIorBH); \
-					f(v, ES); f(v, CS); f(v, SS); f(v, DS); f(v, FS); f(v, GS);
-
-			switch(source) {
-				default:
-					if constexpr (!is_write) {
-						value = 0;
-					}
-				return;
-
-#define f(x, y) read_or_write(x, y, false)
-				ALLREGS(value);
-#undef f
-
-				case Source::DirectAddress:
-					if constexpr(is_write) {
-						memory.template write<DataT>(instruction.data_segment(), instruction.displacement(), value);
-					} else {
-						value = memory.template read<DataT>(instruction.data_segment(), instruction.displacement());
-					}
-				break;
-				case Source::Immediate:
-					value = DataT(instruction.operand());
-				break;
-
-				case Source::Indirect: {
-					using AddressT = typename Instruction<is_32bit(model)>::AddressT;
-					AddressT base = 0, index = 0;
-
-#define f(x, y) read_or_write(x, y, true)
-					switch(pointer.base()) {
-						default: break;
-						ALLREGS(base);
-					}
-
-					switch(pointer.index()) {
-						default: break;
-						ALLREGS(index);
-					}
-#undef f
-
-					// Always compute address as 32-bit.
-					// TODO: verify application of memory_mask here.
-					// The point of memory_mask is that 32-bit x86 offers the memory size modifier,
-					// permitting 16-bit addresses to be generated in 32-bit mode and vice versa.
-					// To figure out is at what point in the calculation the 16-bit constraint is
-					// applied when active.
-					uint32_t address = index;
-					if constexpr (model >= Model::i80386) {
-						address <<= pointer.scale();
-					} else {
-						assert(!pointer.scale());
-					}
-
-					address = (address & memory_mask) + (base & memory_mask) + instruction.displacement();
-
-					if constexpr (is_write) {
-						value = memory.template read<DataT>(
-							instruction.data_segment(),
-							address
-						);
-					} else {
-						memory.template write<DataT>(
-							instruction.data_segment(),
-							address,
-							value
-						);
-					}
-				}
-			}
-#undef ALLREGS
-		}
-
-		template <typename DataT> constexpr static Register register_for_source(Source source) {
 	if constexpr (sizeof(DataT) == 4) {
 		switch(source) {
 			case Source::eAX:		return Register::EAX;
@ -220,8 +95,181 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR

 	return Register::None;
 }
+
+/// Reads from or writes to the source or target identified by a DataPointer, relying upon two user-supplied classes:
+///
+/// * a register bank; and
+/// * a memory pool.
+///
+/// The register bank should implement `template<typename DataT, Register> DataT read()` and `template<typename DataT, Register> void write(DataT)`.
+/// Those functions will be called only with registers and data types that are appropriate to the @c model.
+///
+/// The memory pool should implement `template<typename DataT> DataT read(Source segment, uint32_t address)` and
+/// `template<typename DataT> void write(Source segment, uint32_t address, DataT value)`.
+template <Model model, typename RegistersT, typename MemoryT> class DataPointerResolver {
+	public:
+	public:
+		/// Reads the data pointed to by @c pointer, referencing @c instruction, @c memory and @c registers as necessary.
+		template <typename DataT> static DataT read(
+			RegistersT &registers,
+			MemoryT &memory,
+			const Instruction<is_32bit(model)> &instruction,
+			DataPointer pointer);
+
+		/// Writes @c value to the data pointed to by @c pointer, referencing @c instruction, @c memory and @c registers as necessary.
+		template <typename DataT> static void write(
+			RegistersT &registers,
+			MemoryT &memory,
+			const Instruction<is_32bit(model)> &instruction,
+			DataPointer pointer,
+			DataT value);
+
+		/// Computes the effective address of @c pointer including any displacement applied by @c instruction.
+		/// @c pointer must be of type Source::Indirect.
+		static uint32_t effective_address(
+			RegistersT &registers,
+			const Instruction<is_32bit(model)> &instruction,
+			DataPointer pointer);
+
+	private:
+		template <bool is_write, typename DataT> static void access(
+			RegistersT &registers,
+			MemoryT &memory,
+			const Instruction<is_32bit(model)> &instruction,
+			DataPointer pointer,
+			DataT &value);
 };

+
+//
+//	Implementation begins here.
+//
+
+template <Model model, typename RegistersT, typename MemoryT>
+template <typename DataT> DataT DataPointerResolver<model, RegistersT, MemoryT>::read(
+	RegistersT &registers,
+	MemoryT &memory,
+	const Instruction<is_32bit(model)> &instruction,
+	DataPointer pointer) {
+		DataT result;
+		access<true>(registers, memory, instruction, pointer, result);
+		return result;
+	}
+
+template <Model model, typename RegistersT, typename MemoryT>
+template <typename DataT> void DataPointerResolver<model, RegistersT, MemoryT>::write(
+	RegistersT &registers,
+	MemoryT &memory,
+	const Instruction<is_32bit(model)> &instruction,
+	DataPointer pointer,
+	DataT value) {
+		access<false>(registers, memory, instruction, pointer, value);
+	}
+
+#define rw(v, r, is_write)														\
+	case Source::r: {															\
+		if constexpr (is_write) {												\
+			registers.template write<decltype(v), register_for_source<decltype(v)>(Source::r)>(v);						\
+		} else {																\
+			v = registers.template read<decltype(v), register_for_source<decltype(v)>(Source::r)>();					\
+		}																		\
+	} break;
+
+#define ALLREGS(v, i)	rw(v, eAX, i); 		rw(v, eCX, i); 		\
+						rw(v, eDX, i);		rw(v, eBX, i); 		\
+						rw(v, eSPorAH, i);	rw(v, eBPorCH, i);	\
+						rw(v, eSIorDH, i);	rw(v, eDIorBH, i);	\
+						rw(v, ES, i);		rw(v, CS, i); 		\
+						rw(v, SS, i);		rw(v, DS, i); 		\
+						rw(v, FS, i);		rw(v, GS, i);
+
+template <Model model, typename RegistersT, typename MemoryT>
+uint32_t DataPointerResolver<model, RegistersT, MemoryT>::effective_address(
+	RegistersT &registers,
+	const Instruction<is_32bit(model)> &instruction,
+	DataPointer pointer) {
+		using AddressT = typename Instruction<is_32bit(model)>::AddressT;
+		AddressT base = 0, index = 0;
+
+		switch(pointer.base()) {
+			default: break;
+			ALLREGS(base, false);
+		}
+
+		switch(pointer.index()) {
+			default: break;
+			ALLREGS(index, false);
+		}
+
+		// Always compute address as 32-bit.
+		// TODO: verify application of memory_mask around here.
+		// The point of memory_mask is that 32-bit x86 offers the memory size modifier,
+		// permitting 16-bit addresses to be generated in 32-bit mode and vice versa.
+		// To figure out is at what point in the calculation the 16-bit constraint is
+		// applied when active.
+		uint32_t address = index;
+		if constexpr (model >= Model::i80386) {
+			address <<= pointer.scale();
+		} else {
+			assert(!pointer.scale());
+		}
+
+		constexpr uint32_t memory_masks[] = {0x0000'ffff, 0xffff'ffff};
+		const uint32_t memory_mask = memory_masks[instruction.address_size_is_32()];
+		address = (address & memory_mask) + (base & memory_mask) + instruction.displacement();
+		return address;
+	}
+
+template <Model model, typename RegistersT, typename MemoryT>
+template <bool is_write, typename DataT> void DataPointerResolver<model, RegistersT, MemoryT>::access(
+	RegistersT &registers,
+	MemoryT &memory,
+	const Instruction<is_32bit(model)> &instruction,
+	DataPointer pointer,
+	DataT &value) {
+		const Source source = pointer.source();
+
+		switch(source) {
+			default:
+				if constexpr (!is_write) {
+					value = 0;
+				}
+			return;
+
+			ALLREGS(value, is_write);
+
+			case Source::DirectAddress:
+				if constexpr(is_write) {
+					memory.template write<DataT>(instruction.data_segment(), instruction.displacement(), value);
+				} else {
+					value = memory.template read<DataT>(instruction.data_segment(), instruction.displacement());
+				}
+			break;
+			case Source::Immediate:
+				value = DataT(instruction.operand());
+			break;
+
+			case Source::Indirect: {
+				const auto address = effective_address(registers, instruction, pointer);
+
+				if constexpr (is_write) {
+					value = memory.template read<DataT>(
+						instruction.data_segment(),
+						address
+					);
+				} else {
+					memory.template write<DataT>(
+						instruction.data_segment(),
+						address,
+						value
+					);
+				}
+			}
+		}
+	}
+#undef ALLREGS
+#undef read_or_write
+
 }
 }

--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@ -522,14 +522,18 @@ template<bool is_32bit> class Instruction {
 				8 bits operation;
 				4 bits original instruction size;
 				2 bits data size;
-				3 bits extension flags.
+				1 bit memory size;
+				2 bits extension flags.

 			Extensions (16 or 32 bit, depending on templated size):
-				1) reptition + segment override + lock + memory size toggle (= 7 bits);
+				1) reptition + segment override + lock + original instruction size (= 10 bits);
 				2) displacement;
 				3) immediate operand.

-			Presence or absence of extensions is dictated by the extention flags.
+			Presence or absence of extensions is dictated by:
+				* instruction size = 0 => the repetition, etc extension (including the real extension size); and
+				* the extension flags for displacement and/or immediate.
+
 			Therefore an instruction's footprint is:
 				* 4–8 bytes (16-bit processors);
 				* 4–12 bytes (32-bit processors).
@ -537,9 +541,9 @@ template<bool is_32bit> class Instruction {
 			I'll then implement a collection suited to packing these things based on their
 			packing_size(), and later iterating them.

-			To verify: do the 8086 and 80286 limit instructions to 15 bytes as later members
-			of the family do? If not then consider original instruction size = 0 to imply an
-			extension of one word prior to the other extensions.
+			To verify: the 8086 allows unlimited-length instructions (which I'll probably handle by
+			generating length-15 NOPs and not resetting parser state), the 80386 limits them to
+			15 bytes, but what do the processors in between do?
 		*/

 	private:
@ -570,7 +574,7 @@ template<bool is_32bit> class Instruction {
 		DataPointer source() const		{	return DataPointer(Source(sources_ & 0x3f), sib_);			}
 		DataPointer destination() const	{	return DataPointer(Source((sources_ >> 6) & 0x3f), sib_);		}
 		bool lock() const				{	return sources_ & 0x8000;					}
-		bool address_size() const 		{	return address_size_;						}
+		bool address_size_is_32() const {	return address_size_;						}
 		Source data_segment() const		{
 			const auto segment_override = Source((sources_ >> 12) & 7);
 			if(segment_override != Source::None) return segment_override;