From 2d543590dc5f9220cf3deb0d23f59a83a8419158 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 31 Jan 2022 08:14:33 -0500
Subject: [PATCH 001/104] Make a noun, for better consistency.

---
 InstructionSets/x86/Decoder.cpp                       | 4 ++--
 InstructionSets/x86/Decoder.hpp                       | 2 +-
 OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj | 2 --
 3 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 4414684d9..ec72bbf8f 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -552,12 +552,12 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			default: assert(false);
 		}
 
-		phase_ = (displacement_size_ + operand_size_) ? Phase::AwaitingDisplacementOrOperand : Phase::ReadyToPost;
+		phase_ = (displacement_size_ + operand_size_) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
 	}
 
 	// MARK: - Displacement and operand.
 
-	if(phase_ == Phase::AwaitingDisplacementOrOperand && source != end) {
+	if(phase_ == Phase::DisplacementOrOperand && source != end) {
 		const int required_bytes = displacement_size_ + operand_size_;
 
 		const int outstanding_bytes = required_bytes - operand_bytes_;
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 5a21878cd..fe2879b22 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -48,7 +48,7 @@ class Decoder {
 			ModRegRM,
 			/// Waits for sufficiently many bytes to pass for the required displacement and operand to be captured.
 			/// Cf. displacement_size_ and operand_size_.
-			AwaitingDisplacementOrOperand,
+			DisplacementOrOperand,
 			/// Forms and returns an Instruction, and resets parsing state.
 			ReadyToPost
 		} phase_ = Phase::Instruction;
diff --git a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj
index 7962a7be7..e96009729 100644
--- a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj	
+++ b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj	
@@ -2077,7 +2077,6 @@
 		4BE76CF822641ED300ACD6FA /* QLTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = QLTests.mm; sourceTree = "<group>"; };
 		4BE845201F2FF7F100A5EA22 /* CRTC6845.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = CRTC6845.hpp; sourceTree = "<group>"; };
 		4BE8EB5425C0E9D40040BC40 /* Disassembler.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Disassembler.hpp; sourceTree = "<group>"; };
-		4BE8EB5525C0EA490040BC40 /* Sizes.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Sizes.hpp; sourceTree = "<group>"; };
 		4BE8EB6425C750B50040BC40 /* DAT.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = DAT.cpp; sourceTree = "<group>"; };
 		4BE8EB6525C750B50040BC40 /* DAT.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = DAT.hpp; sourceTree = "<group>"; };
 		4BE90FFC22D5864800FB464D /* MacintoshVideoTests.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = MacintoshVideoTests.mm; sourceTree = "<group>"; };
@@ -4707,7 +4706,6 @@
 				4BEDA42925B3C26B000C2DBD /* AccessType.hpp */,
 				4BEDA45425B5ECAB000C2DBD /* CachingExecutor.hpp */,
 				4BE8EB5425C0E9D40040BC40 /* Disassembler.hpp */,
-				4BE8EB5525C0EA490040BC40 /* Sizes.hpp */,
 				4BEDA3B625B25563000C2DBD /* README.md */,
 				4BEDA40925B2844B000C2DBD /* M50740 */,
 				4BEDA3B325B25563000C2DBD /* PowerPC */,

From 85bfd2eba390fd930694f670a8a93c4ec6fb9fc2 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 31 Jan 2022 08:22:07 -0500
Subject: [PATCH 002/104] Remove further errant 'Awaiting's.

---
 InstructionSets/x86/Decoder.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index ec72bbf8f..48f9fe77a 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -39,20 +39,20 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 	SetOpSrcDestSize(op, DirectAddress, dest, size);	\
 	source_ = Source::Immediate;						\
 	operand_size_ = size;								\
-	phase_ = Phase::AwaitingDisplacementOrOperand
+	phase_ = Phase::DisplacementOrOperand
 
 /// Handles instructions of the form Ax, jjkk where the latter is implicitly an address.
 #define RegAddr(op, dest, op_size, addr_size)			\
 	SetOpSrcDestSize(op, DirectAddress, dest, op_size);	\
 	operand_size_ = addr_size;							\
-	phase_ = Phase::AwaitingDisplacementOrOperand
+	phase_ = Phase::DisplacementOrOperand
 
 /// Handles instructions of the form jjkk, Ax where the former is implicitly an address.
 #define AddrReg(op, source, op_size, addr_size)				\
 	SetOpSrcDestSize(op, source, DirectAddress, op_size);	\
 	operand_size_ = addr_size;								\
 	destination_ = Source::DirectAddress;					\
-	phase_ = Phase::AwaitingDisplacementOrOperand
+	phase_ = Phase::DisplacementOrOperand
 
 /// Covers both `mem/reg, reg` and `reg, mem/reg`.
 #define MemRegReg(op, format, size)				\
@@ -65,13 +65,13 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 /// Handles JO, JNO, JB, etc — jumps with a single byte displacement.
 #define Jump(op)									\
 	operation_ = Operation::op;						\
-	phase_ = Phase::AwaitingDisplacementOrOperand;	\
+	phase_ = Phase::DisplacementOrOperand;			\
 	displacement_size_ = 1
 
 /// Handles far CALL and far JMP — fixed four byte operand operations.
 #define Far(op)										\
 	operation_ = Operation::op;						\
-	phase_ = Phase::AwaitingDisplacementOrOperand;	\
+	phase_ = Phase::DisplacementOrOperand;			\
 	operand_size_ = 4;								\
 
 	while(phase_ == Phase::Instruction && source != end) {

From a4da1b6eb0292e7a8340a12baae94c207b25e6f8 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 31 Jan 2022 09:11:06 -0500
Subject: [PATCH 003/104] Begins enumerating the 80286 and 80386 instructions.

---
 InstructionSets/x86/Instruction.hpp | 90 +++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 3723139b6..128792b75 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -23,6 +23,10 @@ namespace x86 {
 enum class Operation: uint8_t {
 	Invalid,
 
+	//
+	// 8086 instructions.
+	//
+
 	/// ASCII adjust after addition; source will be AL and destination will be AX.
 	AAA,
 	/// ASCII adjust before division; destination will be AX and source will be a multiplier.
@@ -192,6 +196,92 @@ enum class Operation: uint8_t {
 
 	/// Load AL with DS:[AL+BX].
 	XLAT,
+
+	// TODO: expand detail on all operations below.
+
+	//
+	// 80286 additions.
+	//
+
+	// TODO: INS, OUTS, PUSHA, POPA,
+
+	/// Checks an array index against bounds.
+	BOUND,
+
+	/// Create stack frame.
+	ENTER,
+	/// Procedure exit.
+	LEAVE,
+
+	/// Adjusts requested privilege level.
+	ARPL,
+	/// Clears the task-switched flag.
+	CLTS,
+	/// Loads access rights.
+	LAR,
+
+	/// Loads the global descriptor table.
+	LGDT,
+	/// Loads the interrupt descriptor table.
+	LIDT,
+	/// Loads the local descriptor table.
+	LLDT,
+	/// Stores the global descriptor table.
+	SGDT,
+	/// Stores the interrupt descriptor table.
+	SIDT,
+	/// Stores the local descriptor table.
+	SLDT,
+
+	/// Verifies a segment for reading.
+	VERR,
+	/// Verifies a segment for writing.
+	VERW,
+
+	/// Loads the machine status word.
+	LMSW,
+	/// Stores the machine status word.
+	SMSW,
+	/// Loads a segment limit
+	LSL,
+	/// Loads the task register.
+	LTR,
+	/// Stores the task register.
+	STR,
+
+	//
+	// 80386 additions.
+	//
+
+	// TODO: CWDE (as distinct from CWD?), MOVSX, MOVZX, SETcc.
+
+	/// Loads a pointer to FS.
+	LFS,
+	/// Loads a pointer to GS.
+	LGS,
+	/// Loads a pointer to SS.
+	LSS,
+
+	/// Shift left double.
+	SHLD,
+	/// Shift right double.
+	SHRD,
+
+	/// Bit scan forwards.
+	BSF,
+	/// Bit scan reverse.
+	BSR,
+	/// Bit test.
+	BT,
+	/// Bit test and complement.
+	BTC,
+	/// Bit test and reset.
+	BTR,
+	/// Bit test and set.
+	BTS,
+
+	/// Convert dword to qword.
+	CDQ,
 };
 
 enum class Size: uint8_t {

From ae21726287315ef2d88a837da83dabb7543e2742 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Tue, 1 Feb 2022 20:38:10 -0500
Subject: [PATCH 004/104] Splits 80186 additions from 80286; fills in a touch
 more.

---
 InstructionSets/x86/Instruction.hpp | 38 +++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 128792b75..135422d2d 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -200,11 +200,9 @@ enum class Operation: uint8_t {
 	// TODO: expand detail on all operations below.
 
 	//
-	// 80286 additions.
+	// 80186 additions.
 	//
 
-	// TODO: INS, OUTS, PUSHA, POPA,
-
 	/// Checks an array index against bounds.
 	BOUND,
 
@@ -213,6 +211,24 @@ enum class Operation: uint8_t {
 	/// Procedure exit.
 	LEAVE,
 
+	/// Inputs a byte from a port, incrementing or decrementing the destination.
+	INSB,
+	/// Inputs a word from a port, incrementing or decrementingthe destination.
+	INSW,
+	/// Outputs a byte to a port, incrementing or decrementing the destination.
+	OUTSB,
+	/// Outputs a word to a port, incrementing or decrementing the destination.
+	OUTSW,
+
+	/// Pushes all general purpose registers to the stack.
+	PUSHA,
+	/// Pops all general purpose registers from the stack.
+	POPA,
+
+	//
+	// 80286 additions.
+	//
+
 	/// Adjusts requested privilege level.
 	ARPL,
 	/// Clears the task-switched flag.
@@ -249,12 +265,13 @@ enum class Operation: uint8_t {
 	/// Stores the task register.
 	STR,
 
+	/// Undocumented (but used); loads all registers, including internal ones.
+	LOADALL,
+
 	//
 	// 80386 additions.
 	//
 
-	// TODO: CWDE (as distinct from CWD?), MOVSX, MOVZX, SETcc.
-
 	/// Loads a pointer to FS.
 	LFS,
 	/// Loads a pointer to GS.
@@ -280,8 +297,19 @@ enum class Operation: uint8_t {
 	/// Bit test and set.
 	BTS,
 
+	/// Compare string double word.
+	CMPSD,
+	/// [Early 80386s only] Insert bit string.
+	IBITS,
+
+	/// Inputs a double word from a port, incrementing or decrementing the destination.
+	INSD,
+
 	/// Convert dword to qword.
 	CDQ,
+	/// Convert word to dword; AX will be expanded to fill EAX.
+	/// Compare and contrast to CWD which would expand AX to DX:AX.
+	CWDE,
 };
 
 enum class Size: uint8_t {

From 7ceb3369ebe406897da0b96b1e16e2ca27f224e5 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 9 Feb 2022 17:51:48 -0500
Subject: [PATCH 005/104] Attempts decoding of the 80186 set.

---
 InstructionSets/x86/Decoder.cpp     | 59 +++++++++++++++++++++++++----
 InstructionSets/x86/Decoder.hpp     |  5 +++
 InstructionSets/x86/Instruction.hpp | 20 +++++-----
 3 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 48f9fe77a..380bd8c5a 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -15,7 +15,7 @@
 using namespace InstructionSet::x86;
 
 // Only 8086 is suppoted for now.
-Decoder::Decoder(Model) {}
+Decoder::Decoder(Model model) : model_(model) {}
 
 std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *source, size_t length) {
 	const uint8_t *const end = source + length;
@@ -74,6 +74,13 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 	phase_ = Phase::DisplacementOrOperand;			\
 	operand_size_ = 4;								\
 
+/// Handles ENTER — a fixed three-byte operation.
+#define Displacement16Operand8(op)					\
+	operation_ = Operation::op;						\
+	phase_ = Phase::DisplacementOrOperand;			\
+	displacement_size_ = 2;							\
+	operand_size_ = 1;								\
+
 	while(phase_ == Phase::Instruction && source != end) {
 		// Retain the instruction byte, in case additional decoding is deferred
 		// to the ModRegRM byte.
@@ -81,12 +88,14 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 		++source;
 		++consumed_;
 
+#define undefined()	{	\
+	const auto result = std::make_pair(consumed_, Instruction());	\
+	reset_parsing();	\
+	return result;	\
+}
+
 		switch(instr_) {
-			default: {
-				const auto result = std::make_pair(consumed_, Instruction());
-				reset_parsing();
-				return result;
-			}
+			default: undefined();
 
 #define PartialBlock(start, operation)								\
 	case start + 0x00: MemRegReg(operation, MemReg_Reg, 1);	break;	\
@@ -146,7 +155,34 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 
 #undef RegisterBlock
 
-			// 0x60–0x6f: not used.
+			case 0x60:
+				if(model_ < Model::i80186) undefined();
+				Complete(PUSHA, None, None, 2);
+			break;
+			case 0x61:
+				if(model_ < Model::i80186) undefined();
+				Complete(POPA, None, None, 2);
+			break;
+			case 0x62:
+				if(model_ < Model::i80186) undefined();
+				MemRegReg(BOUND, Reg_MemReg, 2);
+			break;
+			case 0x6c:	// INSB
+				if(model_ < Model::i80186) undefined();
+				Complete(INS, None, None, 1);
+			break;
+			case 0x6d:	// INSW
+				if(model_ < Model::i80186) undefined();
+				Complete(INS, None, None, 2);
+			break;
+			case 0x6e:	// OUTSB
+				if(model_ < Model::i80186) undefined();
+				Complete(OUTS, None, None, 1);
+			break;
+			case 0x6f:	// OUTSW
+				if(model_ < Model::i80186) undefined();
+				Complete(OUTS, None, None, 2);
+			break;
 
 			case 0x70: Jump(JO);	break;
 			case 0x71: Jump(JNO);	break;
@@ -243,6 +279,15 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			case 0xc6: MemRegReg(MOV, MemRegMOV, 1);	break;
 			case 0xc7: MemRegReg(MOV, MemRegMOV, 2);	break;
 
+			case 0xc8:
+				if(model_ < Model::i80186) undefined();
+				Displacement16Operand8(ENTER);
+			break;
+			case 0xc9:
+				if(model_ < Model::i80186) undefined();
+				Complete(LEAVE, None, None, 0);
+			break;
+
 			case 0xca: RegData(RETF, None, 2);			break;
 			case 0xcb: Complete(RETF, None, None, 4);	break;
 
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index fe2879b22..4f0cdcc7b 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -19,6 +19,9 @@ namespace x86 {
 
 enum class Model {
 	i8086,
+	i80186,
+	i80286,
+	i80386,
 };
 
 /*!
@@ -40,6 +43,8 @@ class Decoder {
 		std::pair<int, Instruction> decode(const uint8_t *source, size_t length);
 
 	private:
+		const Model model_;
+
 		enum class Phase {
 			/// Captures all prefixes and continues until an instruction byte is encountered.
 			Instruction,
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 135422d2d..2531daf84 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -208,21 +208,19 @@ enum class Operation: uint8_t {
 
 	/// Create stack frame.
 	ENTER,
-	/// Procedure exit.
+	/// Procedure exit; copies BP to SP, then pops a new BP from the stack.
 	LEAVE,
 
-	/// Inputs a byte from a port, incrementing or decrementing the destination.
-	INSB,
-	/// Inputs a word from a port, incrementing or decrementingthe destination.
-	INSW,
-	/// Outputs a byte to a port, incrementing or decrementing the destination.
-	OUTSB,
-	/// Outputs a word to a port, incrementing or decrementing the destination.
-	OUTSW,
+	/// Inputs from a port, incrementing or decrementing the destination.
+	INS,
+	/// Outputs to a port, incrementing or decrementing the destination.
+	OUTS,
 
-	/// Pushes all general purpose registers to the stack.
+	/// Pushes all general purpose registers to the stack, in the order:
+	/// AX, CX, DX, BX, [original] SP, BP, SI, DI.
 	PUSHA,
-	/// Pops all general purpose registers from the stack.
+	/// Pops all general purpose registers from the stack, in the reverse of
+	/// the PUSHA order, i.e. DI, SI, BP, [final] SP, BX, DX, CX, AX.
 	POPA,
 
 	//

From 0bd63cf00f4e3c6cb5fa8d843f89032a5b9bfd8b Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Thu, 10 Feb 2022 09:35:05 -0500
Subject: [PATCH 006/104] Introduces the easy F page instructions.

---
 InstructionSets/x86/Decoder.cpp | 45 ++++++++++++++++++++++++++++-----
 InstructionSets/x86/Decoder.hpp |  4 ++-
 2 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 380bd8c5a..bf03357bf 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -81,6 +81,12 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 	displacement_size_ = 2;							\
 	operand_size_ = 1;								\
 
+#define undefined()	{												\
+	const auto result = std::make_pair(consumed_, Instruction());	\
+	reset_parsing();												\
+	return result;													\
+}
+
 	while(phase_ == Phase::Instruction && source != end) {
 		// Retain the instruction byte, in case additional decoding is deferred
 		// to the ModRegRM byte.
@@ -88,12 +94,6 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 		++source;
 		++consumed_;
 
-#define undefined()	{	\
-	const auto result = std::make_pair(consumed_, Instruction());	\
-	reset_parsing();	\
-	return result;	\
-}
-
 		switch(instr_) {
 			default: undefined();
 
@@ -112,6 +112,13 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			PartialBlock(0x08, OR);						break;
 			case 0x0e: Complete(PUSH, CS, None, 2);		break;
 
+			// The 286 onwards have a further set of instructions
+			// prefixed with $0f.
+			case 0x0f:
+				if(model_ < Model::i80286) undefined();
+				phase_ = Phase::InstructionPageF;
+			break;
+
 			PartialBlock(0x10, ADC);					break;
 			case 0x16: Complete(PUSH, SS, None, 2);		break;
 			case 0x17: Complete(POP, None, SS, 2);		break;
@@ -167,6 +174,10 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 				if(model_ < Model::i80186) undefined();
 				MemRegReg(BOUND, Reg_MemReg, 2);
 			break;
+			case 0x63:
+				if(model_ < Model::i80286) undefined();
+				MemRegReg(ARPL, MemReg_Reg, 2);
+			break;
 			case 0x6c:	// INSB
 				if(model_ < Model::i80186) undefined();
 				Complete(INS, None, None, 1);
@@ -365,6 +376,28 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 		}
 	}
 
+	// MARK: - Additional F page of instructions.
+	if(phase_ == Phase::InstructionPageF && source != end) {
+		// Update the instruction acquired.
+		instr_ = 0x0f00 | *source;
+		++source;
+		++consumed_;
+
+		// NB: to reach here, the instruction set must be at least
+		// that of an 80286.
+		switch(instr_) {
+			default: undefined();
+
+			case 0x02:	MemRegReg(LAR, Reg_MemReg, 2);	break;
+			case 0x03:	MemRegReg(LSL, Reg_MemReg, 2);	break;
+			case 0x06:	Complete(CLTS, None, None, 1);	break;
+		}
+			// TODO: 0x0f 0x00 -> LLDT/SLDT/VERR/VERW/LTR/STR
+			// TODO: 0x0f 0x01 -> LGDT/LIDT/SGDT/SIDT/LMSW/SMSW
+			// TODO: 0x0f 0x05 -> LOADALL
+
+	}
+
 #undef Far
 #undef Jump
 #undef MemRegReg
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 4f0cdcc7b..dd0182d36 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -48,6 +48,8 @@ class Decoder {
 		enum class Phase {
 			/// Captures all prefixes and continues until an instruction byte is encountered.
 			Instruction,
+			/// Having encountered a 0x0f first instruction byte, waits for the next byte fully to determine the instruction.
+			InstructionPageF,
 			/// Receives a ModRegRM byte and either populates the source_ and dest_ fields appropriately
 			/// or completes decoding of the instruction, as per the instruction format.
 			ModRegRM,
@@ -119,7 +121,7 @@ class Decoder {
 
 		// Ephemeral decoding state.
 		Operation operation_ = Operation::Invalid;
-		uint8_t instr_ = 0x00;	// TODO: is this desired, versus loading more context into ModRegRMFormat?
+		uint16_t instr_ = 0x0000;	// TODO: is this desired, versus loading more context into ModRegRMFormat?
 		int consumed_ = 0, operand_bytes_ = 0;
 
 		// Source and destination locations.

From cd5ca3f65b64291c105b155237e7213df457e56e Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Thu, 10 Feb 2022 17:13:50 -0500
Subject: [PATCH 007/104] Attempts a full decoding of the 80286 instruction
 set.

---
 InstructionSets/x86/Decoder.cpp | 85 +++++++++++++++++----------------
 InstructionSets/x86/Decoder.hpp | 10 ++++
 2 files changed, 55 insertions(+), 40 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index bf03357bf..918bc78cb 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -388,14 +388,16 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 		switch(instr_) {
 			default: undefined();
 
-			case 0x02:	MemRegReg(LAR, Reg_MemReg, 2);	break;
-			case 0x03:	MemRegReg(LSL, Reg_MemReg, 2);	break;
-			case 0x06:	Complete(CLTS, None, None, 1);	break;
+			case 0x00:	MemRegReg(Invalid, MemRegSLDT_to_VERW, 2);	break;
+			case 0x01:	MemRegReg(Invalid, MemRegSGDT_to_LMSW, 2);	break;
+			case 0x02:	MemRegReg(LAR, Reg_MemReg, 2);				break;
+			case 0x03:	MemRegReg(LSL, Reg_MemReg, 2);				break;
+			case 0x05:
+				if(model_ != Model::i80286) undefined();
+				Complete(LOADALL, None, None, 0);
+			break;
+			case 0x06:	Complete(CLTS, None, None, 1);				break;
 		}
-			// TODO: 0x0f 0x00 -> LLDT/SLDT/VERR/VERW/LTR/STR
-			// TODO: 0x0f 0x01 -> LGDT/LIDT/SGDT/SIDT/LMSW/SMSW
-			// TODO: 0x0f 0x05 -> LOADALL
-
 	}
 
 #undef Far
@@ -456,9 +458,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 
 				// LES and LDS accept a memory argument only, not a register.
 				if(operation_ == Operation::LES || operation_ == Operation::LDS) {
-					const auto result = std::make_pair(consumed_, Instruction());
-					reset_parsing();
-					return result;
+					undefined();
 				}
 			break;
 		}
@@ -479,11 +479,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 				source_ = destination_ = memreg;
 
 				switch(reg) {
-					default: {
-						const auto result = std::make_pair(consumed_, Instruction());
-						reset_parsing();
-						return result;
-					}
+					default: undefined();
 
 					case 0: 	operation_ = Operation::TEST;	break;
 					case 2: 	operation_ = Operation::NOT;	break;
@@ -504,9 +500,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 				};
 
 				if(reg & 4) {
-					const auto result = std::make_pair(consumed_, Instruction());
-					reset_parsing();
-					return result;
+					undefined();
 				}
 
 				destination_ = seg_table[reg];
@@ -516,11 +510,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 				destination_ = memreg;
 
 				switch(reg) {
-					default: {
-						const auto result = std::make_pair(consumed_, Instruction());
-						reset_parsing();
-						return result;
-					}
+					default: 	undefined();
 
 					case 0: 	operation_ = Operation::ROL;	break;
 					case 2: 	operation_ = Operation::ROR;	break;
@@ -536,11 +526,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 				source_ = destination_ = memreg;
 
 				switch(reg) {
-					default: {
-						const auto result = std::make_pair(consumed_, Instruction());
-						reset_parsing();
-						return result;
-					}
+					default: 	undefined();
 
 					case 0:		operation_ = Operation::INC;	break;
 					case 1:		operation_ = Operation::DEC;	break;
@@ -551,11 +537,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 				source_ = destination_ = memreg;
 
 				switch(reg) {
-					default: {
-						const auto result = std::make_pair(consumed_, Instruction());
-						reset_parsing();
-						return result;
-					}
+					default: 	undefined();
 
 					case 0:		operation_ = Operation::INC;	break;
 					case 1:		operation_ = Operation::DEC;	break;
@@ -579,8 +561,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 				source_ = destination_ = memreg;
 
 				if(reg != 0) {
-					reset_parsing();
-					return std::make_pair(consumed_, Instruction());
+					undefined();
 				}
 			break;
 
@@ -613,11 +594,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 									// the operation requires it.
 
 				switch(reg) {
-					default: {
-						const auto result = std::make_pair(consumed_, Instruction());
-						reset_parsing();
-						return result;
-					}
+					default: undefined();
 
 					case 0: 	operation_ = Operation::ADD;	break;
 					case 2: 	operation_ = Operation::ADC;	break;
@@ -627,6 +604,34 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 				}
 			break;
 
+			case ModRegRMFormat::MemRegSLDT_to_VERW:
+				destination_ = source_ = memreg;
+
+				switch(reg) {
+					default: undefined();
+
+					case 0: 	operation_ = Operation::SLDT;	break;
+					case 1: 	operation_ = Operation::STR;	break;
+					case 2: 	operation_ = Operation::LLDT;	break;
+					case 3: 	operation_ = Operation::LTR;	break;
+					case 4: 	operation_ = Operation::VERR;	break;
+					case 5: 	operation_ = Operation::VERW;	break;
+				}
+			break;
+
+			case ModRegRMFormat::MemRegSGDT_to_LMSW:
+				destination_ = source_ = memreg;
+
+				switch(reg) {
+					default: undefined();
+
+					case 0: 	operation_ = Operation::SGDT;	break;
+					case 2: 	operation_ = Operation::LGDT;	break;
+					case 4: 	operation_ = Operation::SMSW;	break;
+					case 6: 	operation_ = Operation::LMSW;	break;
+				}
+			break;
+
 			default: assert(false);
 		}
 
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index dd0182d36..fd28eb4bb 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -117,6 +117,16 @@ class Decoder {
 			// 'register' field to pick from ADD/ADC/SBB/SUB/CMP, altering
 			// the source to ::Immediate and setting an appropriate operand size.
 			MemRegADC_to_CMP,
+
+			// Parse for mode and register/memory field, populating both source_
+			// and destination_ fields with the result. Uses the 'register' field
+			// to pick from SLDT/STR/LLDT/LTR/VERR/VERW.
+			MemRegSLDT_to_VERW,
+
+			// Parse for mode and register/memory field, populating both source_
+			// and destination_ fields with the result. Uses the 'register' field
+			// to pick from SGDT/LGDT/SMSW/LMSW.
+			MemRegSGDT_to_LMSW,
 		} modregrm_format_ = ModRegRMFormat::MemReg_Reg;
 
 		// Ephemeral decoding state.

From 12df7112da4df066cc712e48798587464aae256d Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Thu, 17 Feb 2022 11:32:09 -0500
Subject: [PATCH 008/104] Starts adjusting the concept of a `Source`.

---
 InstructionSets/x86/Decoder.cpp     | 101 ++++++++++++++--------------
 InstructionSets/x86/Instruction.hpp |  68 ++++++++++++++++---
 2 files changed, 108 insertions(+), 61 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 918bc78cb..430f55daf 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -102,8 +102,8 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 	case start + 0x01: MemRegReg(operation, MemReg_Reg, 2);	break;	\
 	case start + 0x02: MemRegReg(operation, Reg_MemReg, 1);	break;	\
 	case start + 0x03: MemRegReg(operation, Reg_MemReg, 2);	break;	\
-	case start + 0x04: RegData(operation, AL, 1);			break;	\
-	case start + 0x05: RegData(operation, AX, 2)
+	case start + 0x04: RegData(operation, eAX, 1);			break;	\
+	case start + 0x05: RegData(operation, eAX, 2)
 
 			PartialBlock(0x00, ADD);					break;
 			case 0x06: Complete(PUSH, ES, None, 2);		break;
@@ -137,23 +137,23 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 
 			PartialBlock(0x30, XOR);					break;
 			case 0x36: segment_override_ = Source::SS;	break;
-			case 0x37: Complete(AAA, AL, AX, 1);		break;
+			case 0x37: Complete(AAA, AL, eAX, 2);		break;
 
 			PartialBlock(0x38, CMP);					break;
 			case 0x3e: segment_override_ = Source::DS;	break;
-			case 0x3f: Complete(AAS, AL, AX, 1);		break;
+			case 0x3f: Complete(AAS, AL, eAX, 2);		break;
 
 #undef PartialBlock
 
-#define RegisterBlock(start, operation)							\
-	case start + 0x00: Complete(operation, AX, AX, 2);	break;	\
-	case start + 0x01: Complete(operation, CX, CX, 2);	break;	\
-	case start + 0x02: Complete(operation, DX, DX, 2);	break;	\
-	case start + 0x03: Complete(operation, BX, BX, 2);	break;	\
-	case start + 0x04: Complete(operation, SP, SP, 2);	break;	\
-	case start + 0x05: Complete(operation, BP, BP, 2);	break;	\
-	case start + 0x06: Complete(operation, SI, SI, 2);	break;	\
-	case start + 0x07: Complete(operation, DI, DI, 2)
+#define RegisterBlock(start, operation)								\
+	case start + 0x00: Complete(operation, eAX, eAX, 2);	break;	\
+	case start + 0x01: Complete(operation, eCX, eCX, 2);	break;	\
+	case start + 0x02: Complete(operation, eDX, eDX, 2);	break;	\
+	case start + 0x03: Complete(operation, eBX, eBX, 2);	break;	\
+	case start + 0x04: Complete(operation, eSP, eSP, 2);	break;	\
+	case start + 0x05: Complete(operation, eBP, eBP, 2);	break;	\
+	case start + 0x06: Complete(operation, eSI, eSI, 2);	break;	\
+	case start + 0x07: Complete(operation, eDI, eDI, 2)
 
 			RegisterBlock(0x40, INC);	break;
 			RegisterBlock(0x48, DEC);	break;
@@ -231,16 +231,16 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			case 0x8f: MemRegReg(POP, MemRegPOP, 2);	break;
 
 			case 0x90: Complete(NOP, None, None, 0);	break;	// Or XCHG AX, AX?
-			case 0x91: Complete(XCHG, AX, CX, 2);		break;
-			case 0x92: Complete(XCHG, AX, DX, 2);		break;
-			case 0x93: Complete(XCHG, AX, BX, 2);		break;
-			case 0x94: Complete(XCHG, AX, SP, 2);		break;
-			case 0x95: Complete(XCHG, AX, BP, 2);		break;
-			case 0x96: Complete(XCHG, AX, SI, 2);		break;
-			case 0x97: Complete(XCHG, AX, DI, 2);		break;
+			case 0x91: Complete(XCHG, eAX, eCX, 2);		break;
+			case 0x92: Complete(XCHG, eAX, eDX, 2);		break;
+			case 0x93: Complete(XCHG, eAX, eBX, 2);		break;
+			case 0x94: Complete(XCHG, eAX, eSP, 2);		break;
+			case 0x95: Complete(XCHG, eAX, eBP, 2);		break;
+			case 0x96: Complete(XCHG, eAX, eSI, 2);		break;
+			case 0x97: Complete(XCHG, eAX, eDI, 2);		break;
 
 			case 0x98: Complete(CBW, AL, AH, 1);		break;
-			case 0x99: Complete(CWD, AX, DX, 2);		break;
+			case 0x99: Complete(CWD, eAX, eDX, 2);		break;
 			case 0x9a: Far(CALLF);						break;
 			case 0x9b: Complete(WAIT, None, None, 0);	break;
 			case 0x9c: Complete(PUSHF, None, None, 2);	break;
@@ -249,16 +249,16 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			case 0x9f: Complete(LAHF, None, None, 1);	break;
 
 			case 0xa0: RegAddr(MOV, AL, 1, 1);	break;
-			case 0xa1: RegAddr(MOV, AX, 2, 2);	break;
+			case 0xa1: RegAddr(MOV, eAX, 2, 2);	break;
 			case 0xa2: AddrReg(MOV, AL, 1, 1);	break;
-			case 0xa3: AddrReg(MOV, AX, 2, 2);	break;
+			case 0xa3: AddrReg(MOV, eAX, 2, 2);	break;
 
 			case 0xa4: Complete(MOVS, None, None, 1);	break;
 			case 0xa5: Complete(MOVS, None, None, 2);	break;
 			case 0xa6: Complete(CMPS, None, None, 1);	break;
 			case 0xa7: Complete(CMPS, None, None, 2);	break;
 			case 0xa8: RegData(TEST, AL, 1);			break;
-			case 0xa9: RegData(TEST, AX, 2);			break;
+			case 0xa9: RegData(TEST, eAX, 2);			break;
 			case 0xaa: Complete(STOS, None, None, 1);	break;
 			case 0xab: Complete(STOS, None, None, 2);	break;
 			case 0xac: Complete(LODS, None, None, 1);	break;
@@ -266,22 +266,22 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			case 0xae: Complete(SCAS, None, None, 1);	break;
 			case 0xaf: Complete(SCAS, None, None, 2);	break;
 
-			case 0xb0: RegData(MOV, AL, 1);	break;
-			case 0xb1: RegData(MOV, CL, 1);	break;
-			case 0xb2: RegData(MOV, DL, 1);	break;
-			case 0xb3: RegData(MOV, BL, 1);	break;
-			case 0xb4: RegData(MOV, AH, 1);	break;
-			case 0xb5: RegData(MOV, CH, 1);	break;
-			case 0xb6: RegData(MOV, DH, 1);	break;
-			case 0xb7: RegData(MOV, BH, 1);	break;
-			case 0xb8: RegData(MOV, AX, 2);	break;
-			case 0xb9: RegData(MOV, CX, 2);	break;
-			case 0xba: RegData(MOV, DX, 2);	break;
-			case 0xbb: RegData(MOV, BX, 2);	break;
-			case 0xbc: RegData(MOV, SP, 2);	break;
-			case 0xbd: RegData(MOV, BP, 2);	break;
-			case 0xbe: RegData(MOV, SI, 2);	break;
-			case 0xbf: RegData(MOV, DI, 2);	break;
+			case 0xb0: RegData(MOV, eAX, 1);	break;
+			case 0xb1: RegData(MOV, eCX, 1);	break;
+			case 0xb2: RegData(MOV, eDX, 1);	break;
+			case 0xb3: RegData(MOV, eBX, 1);	break;
+			case 0xb4: RegData(MOV, AH, 1);		break;
+			case 0xb5: RegData(MOV, CH, 1);		break;
+			case 0xb6: RegData(MOV, DH, 1);		break;
+			case 0xb7: RegData(MOV, BH, 1);		break;
+			case 0xb8: RegData(MOV, eAX, 2);	break;
+			case 0xb9: RegData(MOV, eCX, 2);	break;
+			case 0xba: RegData(MOV, eDX, 2);	break;
+			case 0xbb: RegData(MOV, eBX, 2);	break;
+			case 0xbc: RegData(MOV, eSP, 2);	break;
+			case 0xbd: RegData(MOV, eBP, 2);	break;
+			case 0xbe: RegData(MOV, eSI, 2);	break;
+			case 0xbf: RegData(MOV, eDI, 2);	break;
 
 			case 0xc2: RegData(RETN, None, 2);			break;
 			case 0xc3: Complete(RETN, None, None, 2);	break;
@@ -320,8 +320,8 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 				operation_size_ = 1 + (instr_ & 1);
 				source_ = Source::CL;
 			break;
-			case 0xd4: RegData(AAM, AX, 1);				break;
-			case 0xd5: RegData(AAD, AX, 1);				break;
+			case 0xd4: RegData(AAM, eAX, 1);				break;
+			case 0xd5: RegData(AAD, eAX, 1);				break;
 
 			case 0xd7: Complete(XLAT, None, None, 1);	break;
 
@@ -354,6 +354,10 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			case 0xee: Complete(OUT, AL, DX, 1);	break;
 			case 0xef: Complete(OUT, AX, DX, 2);	break;
 
+			case 0xf0: lock_ = true;					break;
+			case 0xf2: repetition_ = Repetition::RepNE;	break;
+			case 0xf3: repetition_ = Repetition::RepE;	break;
+
 			case 0xf4: Complete(HLT, None, None, 1);				break;
 			case 0xf5: Complete(CMC, None, None, 1);				break;
 			case 0xf6: MemRegReg(Invalid, MemRegTEST_to_IDIV, 1);	break;
@@ -368,11 +372,6 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 
 			case 0xfe: MemRegReg(Invalid, MemRegINC_DEC, 1);		break;
 			case 0xff: MemRegReg(Invalid, MemRegINC_to_PUSH, 1);	break;
-
-			// Other prefix bytes.
-			case 0xf0:	lock_ = true;						break;
-			case 0xf2:	repetition_ = Repetition::RepNE;	break;
-			case 0xf3:	repetition_ = Repetition::RepE;		break;
 		}
 	}
 
@@ -422,11 +421,11 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 		constexpr Source reg_table[3][8] = {
 			{},
 			{
-				Source::AL,	Source::CL,	Source::DL,	Source::BL,
-				Source::AH,	Source::CH,	Source::DH,	Source::BH,
+				Source::AL,		Source::eCX,	Source::eDX,	Source::eBX,
+				Source::AH,		Source::CH,		Source::DH,		Source::BH,
 			}, {
-				Source::AX,	Source::CX,	Source::DX,	Source::BX,
-				Source::SP,	Source::BP,	Source::SI,	Source::DI,
+				Source::eAX,	Source::eCX,	Source::eDX,	Source::eBX,
+				Source::eSP,	Source::eBP,	Source::eSI,	Source::eDI,
 			}
 		};
 		switch(mod) {
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 2531daf84..33b2eb325 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -318,34 +318,82 @@ enum class Size: uint8_t {
 };
 
 enum class Source: uint8_t {
-	None,
-	CS, DS, ES, SS,
+	// These are in SIB order; this matters for packing later on.
+	// Whether each refers to e.g. EAX or AX depends on the
+	// instruction's data size.
+	eAX, eCX, eDX, eBX, eSP, eBP, eSI, eDI,
 
-	AL, AH, AX,
-	BL, BH, BX,
-	CL, CH, CX,
-	DL, DH, DX,
+	// Selectors are provided as a group.
+	CS, DS, ES, SS, FS, GS,
 
-	SI, DI,
-	BP, SP,
+	DirectAddress,
+	Immediate,
+	Indirect,
 
+	// Legacy 8-bit registers that can't be described as e.g. 8-bit eAX,
+	// or where the source is 8-bit but the destination is 16-bit.
+	AL, BL, CL, DL,
+	AH, BH, CH, DH,
+
+	// TODO: can these all be eliminated in favour of eAX,2, etc?
+	AX, BX, CX, DX,
+
+	// TODO: compact and replace with a reference to a SIB.
 	IndBXPlusSI,
 	IndBXPlusDI,
 	IndBPPlusSI,
 	IndBPPlusDI,
 	IndSI,
 	IndDI,
-	DirectAddress,
 	IndBP,
 	IndBX,
 
-	Immediate
+	/// @c None can be treated as a source that produces 0 when encountered;
+	/// it is semantically valid to receive it with that meaning in some contexts —
+	/// e.g. to indicate no index in indirect addressing.
+	None,
 };
 
 enum class Repetition: uint8_t {
 	None, RepE, RepNE
 };
 
+/// Provides a 32-bit-style scale, index and base; to produce the address this represents,
+/// calcluate base() + (index() << scale()).
+///
+/// This form of indirect addressing is used to describe both 16- and 32-bit indirect addresses,
+/// even though it is a superset of that supported prior to the 80386.
+class ScaleIndexBase {
+	public:
+		ScaleIndexBase(uint8_t sib) : sib_(sib) {}
+		ScaleIndexBase(int scale, Source index, Source base) : sib_(uint8_t(scale << 6 | (int(index != Source::None ? index : Source::eSI) << 3) | int(base))) {}
+
+		/// @returns the power of two by which to multiply @c index() before adding it to @c base().
+		int scale() const {
+			return sib_ >> 6;
+		}
+
+		/// @returns the @c index for this address; this is guaranteed to be one of eAX, eBX, eCX, eDX, None, eBP, eSI or eDI.
+		Source index() const {
+			constexpr Source sources[] = {
+				Source::eAX, Source::eCX, Source::eDX, Source::eBX, Source::None, Source::eBP, Source::eSI, Source::eDI,
+			};
+			static_assert(sizeof(sources) == 8);
+			return sources[(sib_ >> 3) & 0x7];
+		}
+
+		/// @returns the @c base for this address; this is guaranteed to be one of eAX, eBX, eCX, eDX, eSP, eBP, eSI or eDI.
+		Source base() const {
+			return Source(sib_ & 0x7);
+		}
+
+	private:
+		// Data is stored directly as an 80386 SIB byte.
+		const uint8_t sib_ = 0;
+};
+static_assert(sizeof(ScaleIndexBase) == 1);
+static_assert(alignof(ScaleIndexBase) == 1);
+
 class Instruction {
 	public:
 		Operation operation = Operation::Invalid;

From c257b91552eb6f4ac3ba34706f338b8a3bd62ba6 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 18 Feb 2022 16:32:28 -0500
Subject: [PATCH 009/104] Update tests to preference away from [A/B/C/D]L.

---
 .../Clock Signal.xcodeproj/project.pbxproj    | 22 +-------
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 56 +++++++++----------
 2 files changed, 30 insertions(+), 48 deletions(-)

diff --git a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj
index e96009729..99ca08e07 100644
--- a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj	
+++ b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj	
@@ -6116,9 +6116,7 @@
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
-				CODE_SIGN_IDENTITY = "-";
 				CODE_SIGN_STYLE = Automatic;
-				DEVELOPMENT_TEAM = DV3346VVUN;
 				FRAMEWORK_SEARCH_PATHS = (
 					"$(inherited)",
 					"$(USER_LIBRARY_DIR)/Frameworks",
@@ -6141,9 +6139,7 @@
 				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
 				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
 				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
-				CODE_SIGN_IDENTITY = "-";
 				CODE_SIGN_STYLE = Automatic;
-				DEVELOPMENT_TEAM = DV3346VVUN;
 				FRAMEWORK_SEARCH_PATHS = (
 					"$(inherited)",
 					"$(USER_LIBRARY_DIR)/Frameworks",
@@ -6188,9 +6184,9 @@
 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				CODE_SIGN_IDENTITY = "-";
 				COPY_PHASE_STRIP = NO;
 				DEBUG_INFORMATION_FORMAT = dwarf;
+				DEVELOPMENT_TEAM = DV3346VVUN;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				ENABLE_TESTABILITY = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu99;
@@ -6247,9 +6243,9 @@
 				CLANG_WARN_SUSPICIOUS_MOVE = YES;
 				CLANG_WARN_UNREACHABLE_CODE = YES;
 				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
-				CODE_SIGN_IDENTITY = "Mac Developer";
 				COPY_PHASE_STRIP = NO;
 				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				DEVELOPMENT_TEAM = DV3346VVUN;
 				ENABLE_NS_ASSERTIONS = NO;
 				ENABLE_STRICT_OBJC_MSGSEND = YES;
 				GCC_C_LANGUAGE_STANDARD = gnu99;
@@ -6284,9 +6280,7 @@
 				CLANG_WARN_SEMICOLON_BEFORE_METHOD_BODY = YES;
 				CLANG_WARN_SUSPICIOUS_IMPLICIT_CONVERSION = YES;
 				CODE_SIGN_ENTITLEMENTS = "Clock Signal/Clock Signal.entitlements";
-				CODE_SIGN_IDENTITY = "-";
 				CODE_SIGN_STYLE = Automatic;
-				DEVELOPMENT_TEAM = DV3346VVUN;
 				ENABLE_APP_SANDBOX = YES;
 				ENABLE_HARDENED_RUNTIME = YES;
 				FRAMEWORK_SEARCH_PATHS = (
@@ -6316,7 +6310,6 @@
 				);
 				PRODUCT_BUNDLE_IDENTIFIER = "TH.Clock-Signal";
 				PRODUCT_NAME = "$(TARGET_NAME)";
-				PROVISIONING_PROFILE_SPECIFIER = "";
 				SWIFT_OBJC_BRIDGING_HEADER = "Clock Signal/ClockSignal-Bridging-Header.h";
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
 			};
@@ -6335,9 +6328,7 @@
 				CLANG_WARN_SEMICOLON_BEFORE_METHOD_BODY = YES;
 				CLANG_WARN_SUSPICIOUS_IMPLICIT_CONVERSION = YES;
 				CODE_SIGN_ENTITLEMENTS = "Clock Signal/Clock Signal.entitlements";
-				CODE_SIGN_IDENTITY = "-";
 				CODE_SIGN_STYLE = Automatic;
-				DEVELOPMENT_TEAM = DV3346VVUN;
 				ENABLE_APP_SANDBOX = YES;
 				ENABLE_HARDENED_RUNTIME = YES;
 				FRAMEWORK_SEARCH_PATHS = (
@@ -6369,7 +6360,6 @@
 				);
 				PRODUCT_BUNDLE_IDENTIFIER = "TH.Clock-Signal";
 				PRODUCT_NAME = "$(TARGET_NAME)";
-				PROVISIONING_PROFILE_SPECIFIER = "";
 				SWIFT_OBJC_BRIDGING_HEADER = "Clock Signal/ClockSignal-Bridging-Header.h";
 			};
 			name = Release;
@@ -6380,10 +6370,7 @@
 				BUNDLE_LOADER = "$(TEST_HOST)";
 				CLANG_CXX_LANGUAGE_STANDARD = "c++17";
 				CLANG_ENABLE_MODULES = YES;
-				CODE_SIGN_ENTITLEMENTS = "Clock Signal/Clock Signal.entitlements";
-				CODE_SIGN_IDENTITY = "Apple Development";
 				CODE_SIGN_STYLE = Automatic;
-				DEVELOPMENT_TEAM = "";
 				ENABLE_HARDENED_RUNTIME = NO;
 				INFOPLIST_FILE = "Clock SignalTests/Info.plist";
 				LD_RUNPATH_SEARCH_PATHS = (
@@ -6394,7 +6381,6 @@
 				MACOSX_DEPLOYMENT_TARGET = 11.0;
 				PRODUCT_BUNDLE_IDENTIFIER = "TH.Clock-SignalTests";
 				PRODUCT_NAME = "$(TARGET_NAME)";
-				PROVISIONING_PROFILE_SPECIFIER = "";
 				SWIFT_OBJC_BRIDGING_HEADER = "Clock SignalTests/Bridges/Clock SignalTests-Bridging-Header.h";
 				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
 				SWIFT_VERSION = 5.0;
@@ -6408,10 +6394,7 @@
 				BUNDLE_LOADER = "$(TEST_HOST)";
 				CLANG_CXX_LANGUAGE_STANDARD = "c++17";
 				CLANG_ENABLE_MODULES = YES;
-				CODE_SIGN_ENTITLEMENTS = "Clock Signal/Clock Signal.entitlements";
-				CODE_SIGN_IDENTITY = "Apple Development";
 				CODE_SIGN_STYLE = Automatic;
-				DEVELOPMENT_TEAM = "";
 				ENABLE_HARDENED_RUNTIME = NO;
 				GCC_OPTIMIZATION_LEVEL = 2;
 				INFOPLIST_FILE = "Clock SignalTests/Info.plist";
@@ -6424,7 +6407,6 @@
 				ONLY_ACTIVE_ARCH = YES;
 				PRODUCT_BUNDLE_IDENTIFIER = "TH.Clock-SignalTests";
 				PRODUCT_NAME = "$(TARGET_NAME)";
-				PROVISIONING_PROFILE_SPECIFIER = "";
 				SWIFT_OBJC_BRIDGING_HEADER = "Clock SignalTests/Bridges/Clock SignalTests-Bridging-Header.h";
 				SWIFT_VERSION = 5.0;
 				TEST_HOST = "$(BUILT_PRODUCTS_DIR)/Clock Signal.app/Contents/MacOS/Clock Signal";
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 8f5cf18c6..ba1482e95 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -157,9 +157,9 @@ namespace {
 	// jb		0x00000001
 	// dec		%bx
 	// mov		$0x28,%ch
-	[self assert:instructions[0] operation:Operation::SUB size:2 operand:0xea77 destination:Source::AX];
+	[self assert:instructions[0] operation:Operation::SUB size:2 operand:0xea77 destination:Source::eAX];
 	[self assert:instructions[1] operation:Operation::JB displacement:0xfffc];
-	[self assert:instructions[2] operation:Operation::DEC size:2 source:Source::BX destination:Source::BX];
+	[self assert:instructions[2] operation:Operation::DEC size:2 source:Source::eBX destination:Source::eBX];
 	[self assert:instructions[3] operation:Operation::MOV size:1 operand:0x28 destination:Source::CH];
 
 	// ret
@@ -176,10 +176,10 @@ namespace {
 	// out		%ax,(%dx)
 	// jo		0x00000037
 	// xchg		%ax,%sp
-	[self assert:instructions[8] operation:Operation::DEC size:2 source:Source::SI destination:Source::SI];
+	[self assert:instructions[8] operation:Operation::DEC size:2 source:Source::eSI destination:Source::eSI];
 	[self assert:instructions[9] operation:Operation::OUT size:2 source:Source::AX destination:Source::DX];
 	[self assert:instructions[10] operation:Operation::JO displacement:0x20];
-	[self assert:instructions[11] operation:Operation::XCHG size:2 source:Source::AX destination:Source::SP];
+	[self assert:instructions[11] operation:Operation::XCHG size:2 source:Source::eAX destination:Source::eSP];
 
 	// ODA has:
 	// 	c4		(bad)
@@ -191,15 +191,15 @@ namespace {
 	//	c4 d4	(bad)
 	//	93		XCHG AX, BX
 	[self assert:instructions[12] operation:Operation::Invalid];
-	[self assert:instructions[13] operation:Operation::XCHG size:2 source:Source::AX destination:Source::BX];
+	[self assert:instructions[13] operation:Operation::XCHG size:2 source:Source::eAX destination:Source::eBX];
 
 	// inc		%bx
 	// cmp		$0x8e,%al
 	// [[ omitted: push		$0x65 ]]
 	// sbb		0x45(%bx,%si),%bh
 	// adc		%bh,0x3c(%bx)
-	[self assert:instructions[14] operation:Operation::INC size:2 source:Source::BX destination:Source::BX];
-	[self assert:instructions[15] operation:Operation::CMP size:1 operand:0x8e destination:Source::AL];
+	[self assert:instructions[14] operation:Operation::INC size:2 source:Source::eBX destination:Source::eBX];
+	[self assert:instructions[15] operation:Operation::CMP size:1 operand:0x8e destination:Source::eAX];
 	[self assert:instructions[16] operation:Operation::SBB size:1 source:Source::IndBXPlusSI destination:Source::BH displacement:0x45];
 	[self assert:instructions[17] operation:Operation::ADC size:1 source:Source::BH destination:Source::IndBX displacement:0x3c];
 
@@ -207,8 +207,8 @@ namespace {
 	// xor		%sp,0x2c(%si)
 	// out		%ax,$0xc6
 	// jge		0xffffffe0
-	[self assert:instructions[18] operation:Operation::SBB size:2 source:Source::BX destination:Source::IndBPPlusSI displacement:0x16];
-	[self assert:instructions[19] operation:Operation::XOR size:2 source:Source::SP destination:Source::IndSI displacement:0x2c];
+	[self assert:instructions[18] operation:Operation::SBB size:2 source:Source::eBX destination:Source::IndBPPlusSI displacement:0x16];
+	[self assert:instructions[19] operation:Operation::XOR size:2 source:Source::eSP destination:Source::IndSI displacement:0x2c];
 	[self assert:instructions[20] operation:Operation::OUT size:2 source:Source::AX destination:Source::DirectAddress operand:0xc6];
 	[self assert:instructions[21] operation:Operation::JNL displacement:0xffb0];
 
@@ -218,24 +218,24 @@ namespace {
 	// adc		$0x7e,%al
 	// jno		0x0000000b
 	[self assert:instructions[22] operation:Operation::MOV size:1 operand:0x49 destination:Source::CH];
-	[self assert:instructions[23] operation:Operation::MOV size:2 operand:0xcbc0 destination:Source::DX];
-	[self assert:instructions[24] operation:Operation::ADC size:1 operand:0x7e destination:Source::AL];
+	[self assert:instructions[23] operation:Operation::MOV size:2 operand:0xcbc0 destination:Source::eDX];
+	[self assert:instructions[24] operation:Operation::ADC size:1 operand:0x7e destination:Source::eAX];
 	[self assert:instructions[25] operation:Operation::JNO displacement:0xffd0];
 
 	// push		%ax
 	// js		0x0000007b
 	// add		(%di),%bx
 	// in		$0xc9,%ax
-	[self assert:instructions[26] operation:Operation::PUSH size:2 source:Source::AX];
+	[self assert:instructions[26] operation:Operation::PUSH size:2 source:Source::eAX];
 	[self assert:instructions[27] operation:Operation::JS displacement:0x3d];
-	[self assert:instructions[28] operation:Operation::ADD size:2 source:Source::IndDI destination:Source::BX];
+	[self assert:instructions[28] operation:Operation::ADD size:2 source:Source::IndDI destination:Source::eBX];
 	[self assert:instructions[29] operation:Operation::IN size:2 source:Source::DirectAddress destination:Source::AX operand:0xc9];
 
 	// xchg		%ax,%di
 	// ret
 	// fwait
 	// out		%al,$0xd3
-	[self assert:instructions[30] operation:Operation::XCHG size:2 source:Source::AX destination:Source::DI];
+	[self assert:instructions[30] operation:Operation::XCHG size:2 source:Source::eAX destination:Source::eDI];
 	[self assert:instructions[31] operation:Operation::RETN];
 	[self assert:instructions[32] operation:Operation::WAIT];
 	[self assert:instructions[33] operation:Operation::OUT size:1 source:Source::AL destination:Source::DirectAddress operand:0xd3];
@@ -245,10 +245,10 @@ namespace {
 	// dec		%bp
 	// jbe		0xffffffcc
 	// inc		%sp
-	[self assert:instructions[34] operation:Operation::POP size:2 destination:Source::AX];
-	[self assert:instructions[35] operation:Operation::DEC size:2 source:Source::BP destination:Source::BP];
+	[self assert:instructions[34] operation:Operation::POP size:2 destination:Source::eAX];
+	[self assert:instructions[35] operation:Operation::DEC size:2 source:Source::eBP destination:Source::eBP];
 	[self assert:instructions[36] operation:Operation::JBE displacement:0xff80];
-	[self assert:instructions[37] operation:Operation::INC size:2 source:Source::SP destination:Source::SP];
+	[self assert:instructions[37] operation:Operation::INC size:2 source:Source::eSP destination:Source::eSP];
 
 	// (bad)
 	// lahf
@@ -257,7 +257,7 @@ namespace {
 	[self assert:instructions[38] operation:Operation::Invalid];
 	[self assert:instructions[39] operation:Operation::LAHF];
 	[self assert:instructions[40] operation:Operation::MOVS size:2];
-	[self assert:instructions[41] operation:Operation::MOV size:2 operand:0x12a1 destination:Source::BP];
+	[self assert:instructions[41] operation:Operation::MOV size:2 operand:0x12a1 destination:Source::eBP];
 
 	// lds		(%bx,%di),%bp
 	// [[ omitted: leave ]]
@@ -273,9 +273,9 @@ namespace {
 	// cmp		%bx,-0x70(%di)
 	// adc		$0xb8c3,%ax
 	// lods		%ds:(%si),%ax
-	[self assert:instructions[46] operation:Operation::XCHG size:2 source:Source::AX destination:Source::DX];
-	[self assert:instructions[47] operation:Operation::CMP size:2 source:Source::BX destination:Source::IndDI displacement:0xff90];
-	[self assert:instructions[48] operation:Operation::ADC size:2 operand:0xb8c3 destination:Source::AX];
+	[self assert:instructions[46] operation:Operation::XCHG size:2 source:Source::eAX destination:Source::eDX];
+	[self assert:instructions[47] operation:Operation::CMP size:2 source:Source::eBX destination:Source::IndDI displacement:0xff90];
+	[self assert:instructions[48] operation:Operation::ADC size:2 operand:0xb8c3 destination:Source::eAX];
 	[self assert:instructions[49] operation:Operation::LODS size:2];
 
 	// call		0x0000172d
@@ -283,16 +283,16 @@ namespace {
 	// mov		$0x9e,%al
 	// stc
 	[self assert:instructions[50] operation:Operation::CALLD operand:0x16c8];
-	[self assert:instructions[51] operation:Operation::DEC size:2 source:Source::DX destination:Source::DX];
-	[self assert:instructions[52] operation:Operation::MOV size:1 operand:0x9e destination:Source::AL];
+	[self assert:instructions[51] operation:Operation::DEC size:2 source:Source::eDX destination:Source::eDX];
+	[self assert:instructions[52] operation:Operation::MOV size:1 operand:0x9e destination:Source::eAX];
 	[self assert:instructions[53] operation:Operation::STC];
 
 	// mov		$0xea56,%di
 	// dec		%si
 	// std
 	// in		$0x5a,%al
-	[self assert:instructions[54] operation:Operation::MOV size:2 operand:0xea56 destination:Source::DI];
-	[self assert:instructions[55] operation:Operation::DEC size:2 source:Source::SI destination:Source::SI];
+	[self assert:instructions[54] operation:Operation::MOV size:2 operand:0xea56 destination:Source::eDI];
+	[self assert:instructions[55] operation:Operation::DEC size:2 source:Source::eSI destination:Source::eSI];
 	[self assert:instructions[56] operation:Operation::STD];
 	[self assert:instructions[57] operation:Operation::IN size:1 source:Source::DirectAddress destination:Source::AL operand:0x5a];
 
@@ -300,10 +300,10 @@ namespace {
 	// sub		%dl,%dl
 	// negw		0x18(%bx)
 	// xchg		%dl,0x6425(%bx,%si)
-	[self assert:instructions[58] operation:Operation::AND size:2 source:Source::IndBPPlusSI destination:Source::BP displacement:0x5b2c];
-	[self assert:instructions[59] operation:Operation::SUB size:1 source:Source::DL destination:Source::DL];
+	[self assert:instructions[58] operation:Operation::AND size:2 source:Source::IndBPPlusSI destination:Source::eBP displacement:0x5b2c];
+	[self assert:instructions[59] operation:Operation::SUB size:1 source:Source::eDX destination:Source::eDX];
 	[self assert:instructions[60] operation:Operation::NEG size:2 source:Source::IndBX destination:Source::IndBX displacement:0x18];
-	[self assert:instructions[61] operation:Operation::XCHG size:1 source:Source::IndBXPlusSI destination:Source::DL displacement:0x6425];
+	[self assert:instructions[61] operation:Operation::XCHG size:1 source:Source::IndBXPlusSI destination:Source::eDX displacement:0x6425];
 
 	// mov		$0xc3,%bh
 	[self assert:instructions[62] operation:Operation::MOV size:1 operand:0xc3 destination:Source::BH];

From 30b355fd6ffca0f33833feea1b869b11acfda4a5 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 18 Feb 2022 18:37:47 -0500
Subject: [PATCH 010/104] Chips away further at the legacy register names.

---
 InstructionSets/x86/Decoder.cpp     | 30 ++++++++++++++---------------
 InstructionSets/x86/Instruction.hpp |  5 +++--
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 430f55daf..9cb657453 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -129,19 +129,19 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 
 			PartialBlock(0x20, AND);					break;
 			case 0x26: segment_override_ = Source::ES;	break;
-			case 0x27: Complete(DAA, AL, AL, 1);		break;
+			case 0x27: Complete(DAA, eAX, eAX, 1);		break;
 
 			PartialBlock(0x28, SUB);					break;
 			case 0x2e: segment_override_ = Source::CS;	break;
-			case 0x2f: Complete(DAS, AL, AL, 1);		break;
+			case 0x2f: Complete(DAS, eAX, eAX, 1);		break;
 
 			PartialBlock(0x30, XOR);					break;
 			case 0x36: segment_override_ = Source::SS;	break;
-			case 0x37: Complete(AAA, AL, eAX, 2);		break;
+			case 0x37: Complete(AAA, eAX, eAX, 2);		break;
 
 			PartialBlock(0x38, CMP);					break;
 			case 0x3e: segment_override_ = Source::DS;	break;
-			case 0x3f: Complete(AAS, AL, eAX, 2);		break;
+			case 0x3f: Complete(AAS, eAX, eAX, 2);		break;
 
 #undef PartialBlock
 
@@ -239,7 +239,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			case 0x96: Complete(XCHG, eAX, eSI, 2);		break;
 			case 0x97: Complete(XCHG, eAX, eDI, 2);		break;
 
-			case 0x98: Complete(CBW, AL, AH, 1);		break;
+			case 0x98: Complete(CBW, eAX, AH, 1);		break;
 			case 0x99: Complete(CWD, eAX, eDX, 2);		break;
 			case 0x9a: Far(CALLF);						break;
 			case 0x9b: Complete(WAIT, None, None, 0);	break;
@@ -248,16 +248,16 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			case 0x9e: Complete(SAHF, None, None, 1);	break;
 			case 0x9f: Complete(LAHF, None, None, 1);	break;
 
-			case 0xa0: RegAddr(MOV, AL, 1, 1);	break;
+			case 0xa0: RegAddr(MOV, eAX, 1, 1);	break;
 			case 0xa1: RegAddr(MOV, eAX, 2, 2);	break;
-			case 0xa2: AddrReg(MOV, AL, 1, 1);	break;
+			case 0xa2: AddrReg(MOV, eAX, 1, 1);	break;
 			case 0xa3: AddrReg(MOV, eAX, 2, 2);	break;
 
 			case 0xa4: Complete(MOVS, None, None, 1);	break;
 			case 0xa5: Complete(MOVS, None, None, 2);	break;
 			case 0xa6: Complete(CMPS, None, None, 1);	break;
 			case 0xa7: Complete(CMPS, None, None, 2);	break;
-			case 0xa8: RegData(TEST, AL, 1);			break;
+			case 0xa8: RegData(TEST, eAX, 1);			break;
 			case 0xa9: RegData(TEST, eAX, 2);			break;
 			case 0xaa: Complete(STOS, None, None, 1);	break;
 			case 0xab: Complete(STOS, None, None, 2);	break;
@@ -339,19 +339,19 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			case 0xe2: Jump(LOOP);		break;
 			case 0xe3: Jump(JPCX);		break;
 
-			case 0xe4: RegAddr(IN, AL, 1, 1);	break;
-			case 0xe5: RegAddr(IN, AX, 2, 1);	break;
-			case 0xe6: AddrReg(OUT, AL, 1, 1);	break;
-			case 0xe7: AddrReg(OUT, AX, 2, 1);	break;
+			case 0xe4: RegAddr(IN, eAX, 1, 1);	break;
+			case 0xe5: RegAddr(IN, eAX, 2, 1);	break;
+			case 0xe6: AddrReg(OUT, eAX, 1, 1);	break;
+			case 0xe7: AddrReg(OUT, eAX, 2, 1);	break;
 
 			case 0xe8: RegData(CALLD, None, 2);	break;
 			case 0xe9: RegData(JMPN, None, 2);	break;
 			case 0xea: Far(JMPF);				break;
 			case 0xeb: Jump(JMPN);				break;
 
-			case 0xec: Complete(IN, DX, AL, 1);		break;
+			case 0xec: Complete(IN, DX, eAX, 1);	break;
 			case 0xed: Complete(IN, DX, AX, 1);		break;
-			case 0xee: Complete(OUT, AL, DX, 1);	break;
+			case 0xee: Complete(OUT, eAX, DX, 1);	break;
 			case 0xef: Complete(OUT, AX, DX, 2);	break;
 
 			case 0xf0: lock_ = true;					break;
@@ -421,7 +421,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 		constexpr Source reg_table[3][8] = {
 			{},
 			{
-				Source::AL,		Source::eCX,	Source::eDX,	Source::eBX,
+				Source::eAX,	Source::eCX,	Source::eDX,	Source::eBX,
 				Source::AH,		Source::CH,		Source::DH,		Source::BH,
 			}, {
 				Source::eAX,	Source::eCX,	Source::eDX,	Source::eBX,
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 33b2eb325..f61f3b7ef 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -332,11 +332,12 @@ enum class Source: uint8_t {
 
 	// Legacy 8-bit registers that can't be described as e.g. 8-bit eAX,
 	// or where the source is 8-bit but the destination is 16-bit.
-	AL, BL, CL, DL,
+	CL,
 	AH, BH, CH, DH,
 
 	// TODO: can these all be eliminated in favour of eAX,2, etc?
-	AX, BX, CX, DX,
+	AX,
+	DX,
 
 	// TODO: compact and replace with a reference to a SIB.
 	IndBXPlusSI,

From 4d2e8cd71d074bdc9534fcb02ef0a532a0d721d4 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sat, 19 Feb 2022 18:00:27 -0500
Subject: [PATCH 011/104] Adds a presently-unreachable step for SIB
 consumption.

---
 InstructionSets/x86/Decoder.cpp     | 8 ++++++++
 InstructionSets/x86/Decoder.hpp     | 5 +++++
 InstructionSets/x86/Instruction.hpp | 2 +-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 9cb657453..95549f858 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -637,6 +637,14 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 		phase_ = (displacement_size_ + operand_size_) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
 	}
 
+	// MARK: - ScaleIndexBase
+
+	if(phase_ == Phase::ScaleIndexBase && source != end) {
+		sib_ = *source;
+		++source;
+		++consumed_;
+	}
+
 	// MARK: - Displacement and operand.
 
 	if(phase_ == Phase::DisplacementOrOperand && source != end) {
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index fd28eb4bb..c508f0805 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -53,6 +53,8 @@ class Decoder {
 			/// Receives a ModRegRM byte and either populates the source_ and dest_ fields appropriately
 			/// or completes decoding of the instruction, as per the instruction format.
 			ModRegRM,
+			/// Awaits n 80386+-style scale-index-base byte ('SIB'), indicating the form of indirect addressing.
+			ScaleIndexBase,
 			/// Waits for sufficiently many bytes to pass for the required displacement and operand to be captured.
 			/// Cf. displacement_size_ and operand_size_.
 			DisplacementOrOperand,
@@ -143,6 +145,9 @@ class Decoder {
 		uint16_t operand_ = 0;
 		uint64_t inward_data_ = 0;
 
+		// Indirection style.
+		uint8_t sib_;
+
 		// Facts about the instruction.
 		int displacement_size_ = 0;		// i.e. size of in-stream displacement, if any.
 		int operand_size_ = 0;			// i.e. size of in-stream operand, if any.
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index f61f3b7ef..792d6fb07 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -319,7 +319,7 @@ enum class Size: uint8_t {
 
 enum class Source: uint8_t {
 	// These are in SIB order; this matters for packing later on.
-	// Whether each refers to e.g. EAX or AX depends on the
+	// Whether each refers to e.g. EAX, AX or AL depends on the
 	// instruction's data size.
 	eAX, eCX, eDX, eBX, eSP, eBP, eSI, eDI,
 

From a5113998e228e38901af82fdb4d4ccb1baec8bf6 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sun, 20 Feb 2022 17:15:01 -0500
Subject: [PATCH 012/104] Accept that IN and OUT are going to have special
 semantics, thereby kill ::AX and ::DX.

---
 InstructionSets/x86/Decoder.cpp                     |  8 ++++----
 InstructionSets/x86/Instruction.hpp                 |  4 ----
 OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm | 10 +++++-----
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 95549f858..6dc0be92b 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -349,10 +349,10 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			case 0xea: Far(JMPF);				break;
 			case 0xeb: Jump(JMPN);				break;
 
-			case 0xec: Complete(IN, DX, eAX, 1);	break;
-			case 0xed: Complete(IN, DX, AX, 1);		break;
-			case 0xee: Complete(OUT, eAX, DX, 1);	break;
-			case 0xef: Complete(OUT, AX, DX, 2);	break;
+			case 0xec: Complete(IN, eDX, eAX, 1);	break;
+			case 0xed: Complete(IN, eDX, eAX, 1);		break;
+			case 0xee: Complete(OUT, eAX, eDX, 1);	break;
+			case 0xef: Complete(OUT, eAX, eDX, 2);	break;
 
 			case 0xf0: lock_ = true;					break;
 			case 0xf2: repetition_ = Repetition::RepNE;	break;
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 792d6fb07..edc4021be 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -335,10 +335,6 @@ enum class Source: uint8_t {
 	CL,
 	AH, BH, CH, DH,
 
-	// TODO: can these all be eliminated in favour of eAX,2, etc?
-	AX,
-	DX,
-
 	// TODO: compact and replace with a reference to a SIB.
 	IndBXPlusSI,
 	IndBXPlusDI,
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index ba1482e95..dc9801728 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -177,7 +177,7 @@ namespace {
 	// jo		0x00000037
 	// xchg		%ax,%sp
 	[self assert:instructions[8] operation:Operation::DEC size:2 source:Source::eSI destination:Source::eSI];
-	[self assert:instructions[9] operation:Operation::OUT size:2 source:Source::AX destination:Source::DX];
+	[self assert:instructions[9] operation:Operation::OUT size:2 source:Source::eAX destination:Source::eDX];
 	[self assert:instructions[10] operation:Operation::JO displacement:0x20];
 	[self assert:instructions[11] operation:Operation::XCHG size:2 source:Source::eAX destination:Source::eSP];
 
@@ -209,7 +209,7 @@ namespace {
 	// jge		0xffffffe0
 	[self assert:instructions[18] operation:Operation::SBB size:2 source:Source::eBX destination:Source::IndBPPlusSI displacement:0x16];
 	[self assert:instructions[19] operation:Operation::XOR size:2 source:Source::eSP destination:Source::IndSI displacement:0x2c];
-	[self assert:instructions[20] operation:Operation::OUT size:2 source:Source::AX destination:Source::DirectAddress operand:0xc6];
+	[self assert:instructions[20] operation:Operation::OUT size:2 source:Source::eAX destination:Source::DirectAddress operand:0xc6];
 	[self assert:instructions[21] operation:Operation::JNL displacement:0xffb0];
 
 	// mov		$0x49,%ch
@@ -229,7 +229,7 @@ namespace {
 	[self assert:instructions[26] operation:Operation::PUSH size:2 source:Source::eAX];
 	[self assert:instructions[27] operation:Operation::JS displacement:0x3d];
 	[self assert:instructions[28] operation:Operation::ADD size:2 source:Source::IndDI destination:Source::eBX];
-	[self assert:instructions[29] operation:Operation::IN size:2 source:Source::DirectAddress destination:Source::AX operand:0xc9];
+	[self assert:instructions[29] operation:Operation::IN size:2 source:Source::DirectAddress destination:Source::eAX operand:0xc9];
 
 	// xchg		%ax,%di
 	// ret
@@ -238,7 +238,7 @@ namespace {
 	[self assert:instructions[30] operation:Operation::XCHG size:2 source:Source::eAX destination:Source::eDI];
 	[self assert:instructions[31] operation:Operation::RETN];
 	[self assert:instructions[32] operation:Operation::WAIT];
-	[self assert:instructions[33] operation:Operation::OUT size:1 source:Source::AL destination:Source::DirectAddress operand:0xd3];
+	[self assert:instructions[33] operation:Operation::OUT size:1 source:Source::eAX destination:Source::DirectAddress operand:0xd3];
 
 	// [[ omitted: insb		(%dx),%es:(%di) ]]
 	// pop		%ax
@@ -294,7 +294,7 @@ namespace {
 	[self assert:instructions[54] operation:Operation::MOV size:2 operand:0xea56 destination:Source::eDI];
 	[self assert:instructions[55] operation:Operation::DEC size:2 source:Source::eSI destination:Source::eSI];
 	[self assert:instructions[56] operation:Operation::STD];
-	[self assert:instructions[57] operation:Operation::IN size:1 source:Source::DirectAddress destination:Source::AL operand:0x5a];
+	[self assert:instructions[57] operation:Operation::IN size:1 source:Source::DirectAddress destination:Source::eAX operand:0x5a];
 
 	// and		0x5b2c(%bp,%si),%bp
 	// sub		%dl,%dl

From 75d2d64e7cc0a77aee36c9df95697748924ededb Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sun, 20 Feb 2022 17:52:19 -0500
Subject: [PATCH 013/104] Albeit that it requires nuanced shift/roll semantics,
 eliminates `CL` constant.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Shifts and rolls are already slightly semantically special for being undefined for values greater than 8/16/32 — i.e. in some implementations they don't even use the entirety of CL, just the low five bits. Which makes me feel a little better.

The upside of no ambiguity between eCX size 1 and CL justifies the trade.
---
 InstructionSets/x86/Decoder.cpp     |  2 +-
 InstructionSets/x86/Instruction.hpp | 32 +++++++++++++++--------------
 2 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 6dc0be92b..a08e6fc82 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -318,7 +318,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 				phase_ = Phase::ModRegRM;
 				modregrm_format_ = ModRegRMFormat::MemRegROL_to_SAR;
 				operation_size_ = 1 + (instr_ & 1);
-				source_ = Source::CL;
+				source_ = Source::eCX;
 			break;
 			case 0xd4: RegData(AAM, eAX, 1);				break;
 			case 0xd5: RegData(AAD, eAX, 1);				break;
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index edc4021be..aebdfa2b7 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -156,19 +156,19 @@ enum class Operation: uint8_t {
 	PUSH,
 	/// PUSH the flags register to the stack.
 	PUSHF,
-	/// Rotate the destination left through carry the number of bits indicated by source.
+	/// Rotate the destination left through carry the number of bits indicated by source; if the source is a register then implicitly its size is 1.
 	RCL,
-	/// Rotate the destination right through carry the number of bits indicated by source.
+	/// Rotate the destination right through carry the number of bits indicated by source; if the source is a register then implicitly its size is 1.
 	RCR,
-	/// Rotate the destination left the number of bits indicated by source.
+	/// Rotate the destination left the number of bits indicated by source; if the source is a register then implicitly its size is 1.
 	ROL,
-	/// Rotate the destination right the number of bits indicated by source.
+	/// Rotate the destination right the number of bits indicated by source; if the source is a register then implicitly its size is 1.
 	ROR,
-	/// Arithmetic shift left the destination by the number of bits indicated by source.
+	/// Arithmetic shift left the destination by the number of bits indicated by source; if the source is a register then implicitly its size is 1.
 	SAL,
-	/// Arithmetic shift right the destination by the number of bits indicated by source.
+	/// Arithmetic shift right the destination by the number of bits indicated by source; if the source is a register then implicitly its size is 1.
 	SAR,
-	/// Logical shift right the destination by the number of bits indicated by source.
+	/// Logical shift right the destination by the number of bits indicated by source; if the source is a register then implicitly its size is 1.
 	SHR,
 
 	/// Clear carry flag; no source or destination provided.
@@ -323,18 +323,20 @@ enum class Source: uint8_t {
 	// instruction's data size.
 	eAX, eCX, eDX, eBX, eSP, eBP, eSI, eDI,
 
-	// Selectors are provided as a group.
+	// Selectors.
 	CS, DS, ES, SS, FS, GS,
 
-	DirectAddress,
-	Immediate,
-	Indirect,
-
-	// Legacy 8-bit registers that can't be described as e.g. 8-bit eAX,
-	// or where the source is 8-bit but the destination is 16-bit.
-	CL,
+	// Legacy 8-bit registers that can't be described as e.g. 8-bit eAX.
 	AH, BH, CH, DH,
 
+	// Sources that are not a register.
+	/// The address included within this instruction should be used as the source.
+	DirectAddress,
+	/// The immediate value included within this instruction should be used as the source.
+	Immediate,
+	/// The ScaleIndexBase associated with this source should be used.
+	Indirect,
+
 	// TODO: compact and replace with a reference to a SIB.
 	IndBXPlusSI,
 	IndBXPlusDI,

From 63d8a88e2f60a427735588deb9e15628ffe076de Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sun, 20 Feb 2022 17:54:53 -0500
Subject: [PATCH 014/104] Switch to holding the SIB as a typed ScaleIndexBase.

(and permit copy assignment)
---
 InstructionSets/x86/Decoder.hpp     | 2 +-
 InstructionSets/x86/Instruction.hpp | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index c508f0805..59fd09beb 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -146,7 +146,7 @@ class Decoder {
 		uint64_t inward_data_ = 0;
 
 		// Indirection style.
-		uint8_t sib_;
+		ScaleIndexBase sib_;
 
 		// Facts about the instruction.
 		int displacement_size_ = 0;		// i.e. size of in-stream displacement, if any.
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index aebdfa2b7..31e6725be 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -364,6 +364,7 @@ enum class Repetition: uint8_t {
 /// even though it is a superset of that supported prior to the 80386.
 class ScaleIndexBase {
 	public:
+		ScaleIndexBase() {}
 		ScaleIndexBase(uint8_t sib) : sib_(sib) {}
 		ScaleIndexBase(int scale, Source index, Source base) : sib_(uint8_t(scale << 6 | (int(index != Source::None ? index : Source::eSI) << 3) | int(base))) {}
 
@@ -388,7 +389,7 @@ class ScaleIndexBase {
 
 	private:
 		// Data is stored directly as an 80386 SIB byte.
-		const uint8_t sib_ = 0;
+		uint8_t sib_ = 0;
 };
 static_assert(sizeof(ScaleIndexBase) == 1);
 static_assert(alignof(ScaleIndexBase) == 1);

From 546b4edbf1ea457e819584256891db395337e82d Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sun, 20 Feb 2022 19:22:28 -0500
Subject: [PATCH 015/104] Ensure `ScaleIndexBase` can be used `constexpr`; add
 note-to-self on indexing table.

---
 InstructionSets/x86/Decoder.cpp     | 11 +++++++++++
 InstructionSets/x86/Instruction.hpp | 12 ++++++------
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index a08e6fc82..458facbea 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -440,6 +440,17 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			} break;
 
 			default: {
+				// TODO: switch to this table.
+//				constexpr ScaleIndexBase rm_table[8] = {
+//					ScaleIndexBase(0, Source::eBX, Source::eSI),
+//					ScaleIndexBase(0, Source::eBX, Source::eDI),
+//					ScaleIndexBase(0, Source::eBP, Source::eSI),
+//					ScaleIndexBase(0, Source::eBP, Source::eDI),
+//					ScaleIndexBase(0, Source::None, Source::eSI),
+//					ScaleIndexBase(0, Source::None, Source::eDI),
+//					ScaleIndexBase(0, Source::None, Source::eBP),
+//					ScaleIndexBase(0, Source::None, Source::eBX),
+//				};
 				constexpr Source rm_table[8] = {
 					Source::IndBXPlusSI,	Source::IndBXPlusDI,
 					Source::IndBPPlusSI,	Source::IndBPPlusDI,
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 31e6725be..23259c27d 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -364,17 +364,17 @@ enum class Repetition: uint8_t {
 /// even though it is a superset of that supported prior to the 80386.
 class ScaleIndexBase {
 	public:
-		ScaleIndexBase() {}
-		ScaleIndexBase(uint8_t sib) : sib_(sib) {}
-		ScaleIndexBase(int scale, Source index, Source base) : sib_(uint8_t(scale << 6 | (int(index != Source::None ? index : Source::eSI) << 3) | int(base))) {}
+		constexpr ScaleIndexBase() noexcept {}
+		constexpr ScaleIndexBase(uint8_t sib) noexcept : sib_(sib) {}
+		constexpr ScaleIndexBase(int scale, Source index, Source base) noexcept : sib_(uint8_t(scale << 6 | (int(index != Source::None ? index : Source::eSI) << 3) | int(base))) {}
 
 		/// @returns the power of two by which to multiply @c index() before adding it to @c base().
-		int scale() const {
+		constexpr int scale() const {
 			return sib_ >> 6;
 		}
 
 		/// @returns the @c index for this address; this is guaranteed to be one of eAX, eBX, eCX, eDX, None, eBP, eSI or eDI.
-		Source index() const {
+		constexpr Source index() const {
 			constexpr Source sources[] = {
 				Source::eAX, Source::eCX, Source::eDX, Source::eBX, Source::None, Source::eBP, Source::eSI, Source::eDI,
 			};
@@ -383,7 +383,7 @@ class ScaleIndexBase {
 		}
 
 		/// @returns the @c base for this address; this is guaranteed to be one of eAX, eBX, eCX, eDX, eSP, eBP, eSI or eDI.
-		Source base() const {
+		constexpr Source base() const {
 			return Source(sib_ & 0x7);
 		}
 

From 9e9e160c43489ce57ab06aad33c2ab29dc05be76 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 21 Feb 2022 11:45:46 -0500
Subject: [PATCH 016/104] Eliminate Ind[BXPlusSI/etc] in favour of specifying
 everything via a ScaleIndexBase.

---
 InstructionSets/x86/Decoder.cpp               | 47 +++++---------
 InstructionSets/x86/Decoder.hpp               |  1 +
 InstructionSets/x86/Instruction.hpp           | 64 +++++++++++++------
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 52 +++++++--------
 4 files changed, 91 insertions(+), 73 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 458facbea..45dd5ef71 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -429,37 +429,23 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			}
 		};
 		switch(mod) {
-			case 0: {
-				constexpr Source rm_table[8] = {
-					Source::IndBXPlusSI,	Source::IndBXPlusDI,
-					Source::IndBPPlusSI,	Source::IndBPPlusDI,
-					Source::IndSI,			Source::IndDI,
-					Source::DirectAddress,	Source::IndBX,
-				};
-				memreg = rm_table[rm];
-			} break;
-
-			default: {
-				// TODO: switch to this table.
-//				constexpr ScaleIndexBase rm_table[8] = {
-//					ScaleIndexBase(0, Source::eBX, Source::eSI),
-//					ScaleIndexBase(0, Source::eBX, Source::eDI),
-//					ScaleIndexBase(0, Source::eBP, Source::eSI),
-//					ScaleIndexBase(0, Source::eBP, Source::eDI),
-//					ScaleIndexBase(0, Source::None, Source::eSI),
-//					ScaleIndexBase(0, Source::None, Source::eDI),
-//					ScaleIndexBase(0, Source::None, Source::eBP),
-//					ScaleIndexBase(0, Source::None, Source::eBX),
-//				};
-				constexpr Source rm_table[8] = {
-					Source::IndBXPlusSI,	Source::IndBXPlusDI,
-					Source::IndBPPlusSI,	Source::IndBPPlusDI,
-					Source::IndSI,			Source::IndDI,
-					Source::IndBP,			Source::IndBX,
-				};
-				memreg = rm_table[rm];
-
+			default:
 				displacement_size_ = 1 + (mod == 2);
+				[[fallthrough]];
+			case 0: {
+				constexpr ScaleIndexBase rm_table[8] = {
+					ScaleIndexBase(0, Source::eBX, Source::eSI),
+					ScaleIndexBase(0, Source::eBX, Source::eDI),
+					ScaleIndexBase(0, Source::eBP, Source::eSI),
+					ScaleIndexBase(0, Source::eBP, Source::eDI),
+					ScaleIndexBase(0, Source::None, Source::eSI),
+					ScaleIndexBase(0, Source::None, Source::eDI),
+					ScaleIndexBase(0, Source::None, Source::eBP),
+					ScaleIndexBase(0, Source::None, Source::eBX),
+				};
+
+				memreg = Source::Indirect;
+				sib_ = rm_table[rm];
 			} break;
 
 			// Other operand is just a register.
@@ -710,6 +696,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 				operation_,
 				source_,
 				destination_,
+				sib_,
 				lock_,
 				segment_override_,
 				repetition_,
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 59fd09beb..e2c68a907 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -168,6 +168,7 @@ class Decoder {
 			repetition_ = Repetition::None;
 			phase_ = Phase::Instruction;
 			source_ = destination_ = Source::None;
+			sib_ = 0;
 		}
 };
 
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 23259c27d..897b0a770 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -329,28 +329,21 @@ enum class Source: uint8_t {
 	// Legacy 8-bit registers that can't be described as e.g. 8-bit eAX.
 	AH, BH, CH, DH,
 
-	// Sources that are not a register.
 	/// The address included within this instruction should be used as the source.
 	DirectAddress,
+
 	/// The immediate value included within this instruction should be used as the source.
 	Immediate,
-	/// The ScaleIndexBase associated with this source should be used.
-	Indirect,
-
-	// TODO: compact and replace with a reference to a SIB.
-	IndBXPlusSI,
-	IndBXPlusDI,
-	IndBPPlusSI,
-	IndBPPlusDI,
-	IndSI,
-	IndDI,
-	IndBP,
-	IndBX,
 
 	/// @c None can be treated as a source that produces 0 when encountered;
 	/// it is semantically valid to receive it with that meaning in some contexts —
 	/// e.g. to indicate no index in indirect addressing.
 	None,
+
+	/// The ScaleIndexBase associated with this source should be used.
+	Indirect = 0b11000,
+	// Elsewhere, as an implementation detail, the low three bits of an indirect source
+	// are reused.
 };
 
 enum class Repetition: uint8_t {
@@ -367,6 +360,8 @@ class ScaleIndexBase {
 		constexpr ScaleIndexBase() noexcept {}
 		constexpr ScaleIndexBase(uint8_t sib) noexcept : sib_(sib) {}
 		constexpr ScaleIndexBase(int scale, Source index, Source base) noexcept : sib_(uint8_t(scale << 6 | (int(index != Source::None ? index : Source::eSI) << 3) | int(base))) {}
+		constexpr ScaleIndexBase(Source index, Source base) noexcept : ScaleIndexBase(0, index, base) {}
+		constexpr explicit ScaleIndexBase(Source base) noexcept : ScaleIndexBase(0, Source::None, base) {}
 
 		/// @returns the power of two by which to multiply @c index() before adding it to @c base().
 		constexpr int scale() const {
@@ -387,6 +382,18 @@ class ScaleIndexBase {
 			return Source(sib_ & 0x7);
 		}
 
+		bool operator ==(const ScaleIndexBase &rhs) const {
+			// Permit either exact equality or index and base being equal
+			// but transposed with a scale of 1.
+			return
+				(sib_ == rhs.sib_) ||
+				(
+					!scale() &&	!rhs.scale() &&
+					rhs.index() == base() &&
+					rhs.base() == index()
+				);
+		}
+
 	private:
 		// Data is stored directly as an 80386 SIB byte.
 		uint8_t sib_ = 0;
@@ -394,6 +401,20 @@ class ScaleIndexBase {
 static_assert(sizeof(ScaleIndexBase) == 1);
 static_assert(alignof(ScaleIndexBase) == 1);
 
+// TODO: improve the naming of SourceSIB.
+struct SourceSIB {
+	SourceSIB(Source source) : source(source) {}
+	SourceSIB(ScaleIndexBase sib) : sib(sib) {}
+	SourceSIB(Source source, ScaleIndexBase sib) : source(source), sib(sib) {}
+
+	bool operator ==(const SourceSIB &rhs) const {
+		return source == rhs.source && (source != Source::Indirect || sib == rhs.sib);
+	}
+
+	Source source = Source::Indirect;
+	ScaleIndexBase sib;
+};
+
 class Instruction {
 	public:
 		Operation operation = Operation::Invalid;
@@ -403,7 +424,8 @@ class Instruction {
 				repetition_size_ == rhs.repetition_size_ &&
 				sources_ == rhs.sources_ &&
 				displacement_ == rhs.displacement_ &&
-				operand_ == rhs.operand_;
+				operand_ == rhs.operand_ &&
+				sib_ == rhs.sib_;
 		}
 
 	private:
@@ -421,9 +443,12 @@ class Instruction {
 		int16_t displacement_ = 0;
 		uint16_t operand_ = 0;		// ... or used to store a segment for far operations.
 
+		// Fields yet to be properly incorporated...
+		ScaleIndexBase sib_;
+
 	public:
-		Source source() const			{	return Source(sources_ & 0x3f);				}
-		Source destination() const		{	return Source((sources_ >> 6) & 0x3f);		}
+		SourceSIB  source() const		{	return SourceSIB(Source(sources_ & 0x3f), sib_);			}
+		SourceSIB destination() const	{	return SourceSIB(Source((sources_ >> 6) & 0x3f), sib_);		}
 		bool lock() const				{	return sources_ & 0x8000;					}
 		Source segment_override() const	{	return Source((sources_ >> 12) & 7);		}
 
@@ -441,6 +466,7 @@ class Instruction {
 			Operation operation,
 			Source source,
 			Source destination,
+			ScaleIndexBase sib,
 			bool lock,
 			Source segment_override,
 			Repetition repetition,
@@ -456,10 +482,12 @@ class Instruction {
 					(int(lock) << 15)
 				)),
 				displacement_(displacement),
-				operand_(operand) {}
+				operand_(operand),
+				sib_(sib) {}
 };
 
-static_assert(sizeof(Instruction) <= 8);
+// TODO: repack.
+//static_assert(sizeof(Instruction) <= 8);
 
 }
 }
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index dc9801728..51617d7b4 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -17,6 +17,8 @@ namespace {
 	using Instruction = InstructionSet::x86::Instruction;
 	using Source = InstructionSet::x86::Source;
 	using Size = InstructionSet::x86::Size;
+	using ScaleIndexBase = InstructionSet::x86::ScaleIndexBase;
+	using SourceSIB = InstructionSet::x86::SourceSIB;
 }
 
 @interface x86DecoderTests : XCTestCase
@@ -42,42 +44,42 @@ namespace {
 	XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::Size(size));
 }
 
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(Source)source destination:(Source)destination displacement:(int16_t)displacement {
+- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(SourceSIB)source destination:(SourceSIB)destination displacement:(int16_t)displacement {
 	XCTAssertEqual(instruction.operation, operation);
 	XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::Size(size));
-	XCTAssertEqual(instruction.source(), source);
-	XCTAssertEqual(instruction.destination(), destination);
+	XCTAssert(instruction.source() == source);
+	XCTAssert(instruction.destination() == destination);
 	XCTAssertEqual(instruction.displacement(), displacement);
 }
 
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(Source)source destination:(Source)destination displacement:(int16_t)displacement operand:(uint16_t)operand {
+- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(SourceSIB)source destination:(SourceSIB)destination displacement:(int16_t)displacement operand:(uint16_t)operand {
 	[self assert:instruction operation:operation size:size source:source destination:destination displacement:displacement];
 	XCTAssertEqual(instruction.operand(), operand);
 }
 
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(Source)source destination:(Source)destination operand:(uint16_t)operand {
+- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(SourceSIB)source destination:(SourceSIB)destination operand:(uint16_t)operand {
 	[self assert:instruction operation:operation size:size source:source destination:destination displacement:0 operand:operand];
 }
 
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(Source)source destination:(Source)destination {
+- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(SourceSIB)source destination:(SourceSIB)destination {
 	[self assert:instruction operation:operation size:size source:source destination:destination displacement:0];
 }
 
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(Source)source {
+- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(SourceSIB)source {
 	XCTAssertEqual(instruction.operation, operation);
 	XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::Size(size));
-	XCTAssertEqual(instruction.source(), source);
+	XCTAssert(instruction.source() == source);
 }
 
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size destination:(Source)destination {
+- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size destination:(SourceSIB)destination {
 	[self assert:instruction operation:operation size:size];
-	XCTAssertEqual(instruction.destination(), destination);
+	XCTAssert(instruction.destination() == destination);
 }
 
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size operand:(uint16_t)operand destination:(Source)destination {
+- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size operand:(uint16_t)operand destination:(SourceSIB)destination {
 	[self assert:instruction operation:operation size:size];
-	XCTAssertEqual(instruction.destination(), destination);
-	XCTAssertEqual(instruction.source(), Source::Immediate);
+	XCTAssert(instruction.destination() == destination);
+	XCTAssert(instruction.source() == SourceSIB(Source::Immediate));
 	XCTAssertEqual(instruction.operand(), operand);
 	XCTAssertEqual(instruction.displacement(), 0);
 }
@@ -200,15 +202,15 @@ namespace {
 	// adc		%bh,0x3c(%bx)
 	[self assert:instructions[14] operation:Operation::INC size:2 source:Source::eBX destination:Source::eBX];
 	[self assert:instructions[15] operation:Operation::CMP size:1 operand:0x8e destination:Source::eAX];
-	[self assert:instructions[16] operation:Operation::SBB size:1 source:Source::IndBXPlusSI destination:Source::BH displacement:0x45];
-	[self assert:instructions[17] operation:Operation::ADC size:1 source:Source::BH destination:Source::IndBX displacement:0x3c];
+	[self assert:instructions[16] operation:Operation::SBB size:1 source:ScaleIndexBase(Source::eBX, Source::eSI) destination:Source::BH displacement:0x45];
+	[self assert:instructions[17] operation:Operation::ADC size:1 source:Source::BH destination:ScaleIndexBase(Source::eBX) displacement:0x3c];
 
 	// sbb		%bx,0x16(%bp,%si)
 	// xor		%sp,0x2c(%si)
 	// out		%ax,$0xc6
 	// jge		0xffffffe0
-	[self assert:instructions[18] operation:Operation::SBB size:2 source:Source::eBX destination:Source::IndBPPlusSI displacement:0x16];
-	[self assert:instructions[19] operation:Operation::XOR size:2 source:Source::eSP destination:Source::IndSI displacement:0x2c];
+	[self assert:instructions[18] operation:Operation::SBB size:2 source:Source::eBX destination:ScaleIndexBase(Source::eBP, Source::eSI) displacement:0x16];
+	[self assert:instructions[19] operation:Operation::XOR size:2 source:Source::eSP destination:ScaleIndexBase(Source::eSI) displacement:0x2c];
 	[self assert:instructions[20] operation:Operation::OUT size:2 source:Source::eAX destination:Source::DirectAddress operand:0xc6];
 	[self assert:instructions[21] operation:Operation::JNL displacement:0xffb0];
 
@@ -228,7 +230,7 @@ namespace {
 	// in		$0xc9,%ax
 	[self assert:instructions[26] operation:Operation::PUSH size:2 source:Source::eAX];
 	[self assert:instructions[27] operation:Operation::JS displacement:0x3d];
-	[self assert:instructions[28] operation:Operation::ADD size:2 source:Source::IndDI destination:Source::eBX];
+	[self assert:instructions[28] operation:Operation::ADD size:2 source:ScaleIndexBase(Source::eDI) destination:Source::eBX];
 	[self assert:instructions[29] operation:Operation::IN size:2 source:Source::DirectAddress destination:Source::eAX operand:0xc9];
 
 	// xchg		%ax,%di
@@ -274,7 +276,7 @@ namespace {
 	// adc		$0xb8c3,%ax
 	// lods		%ds:(%si),%ax
 	[self assert:instructions[46] operation:Operation::XCHG size:2 source:Source::eAX destination:Source::eDX];
-	[self assert:instructions[47] operation:Operation::CMP size:2 source:Source::eBX destination:Source::IndDI displacement:0xff90];
+	[self assert:instructions[47] operation:Operation::CMP size:2 source:Source::eBX destination:ScaleIndexBase(Source::eDI) displacement:0xff90];
 	[self assert:instructions[48] operation:Operation::ADC size:2 operand:0xb8c3 destination:Source::eAX];
 	[self assert:instructions[49] operation:Operation::LODS size:2];
 
@@ -300,10 +302,10 @@ namespace {
 	// sub		%dl,%dl
 	// negw		0x18(%bx)
 	// xchg		%dl,0x6425(%bx,%si)
-	[self assert:instructions[58] operation:Operation::AND size:2 source:Source::IndBPPlusSI destination:Source::eBP displacement:0x5b2c];
+	[self assert:instructions[58] operation:Operation::AND size:2 source:ScaleIndexBase(Source::eBP, Source::eSI) destination:Source::eBP displacement:0x5b2c];
 	[self assert:instructions[59] operation:Operation::SUB size:1 source:Source::eDX destination:Source::eDX];
-	[self assert:instructions[60] operation:Operation::NEG size:2 source:Source::IndBX destination:Source::IndBX displacement:0x18];
-	[self assert:instructions[61] operation:Operation::XCHG size:1 source:Source::IndBXPlusSI destination:Source::eDX displacement:0x6425];
+	[self assert:instructions[60] operation:Operation::NEG size:2 source:ScaleIndexBase(Source::eBX) destination:ScaleIndexBase(Source::eBX) displacement:0x18];
+	[self assert:instructions[61] operation:Operation::XCHG size:1 source:ScaleIndexBase(Source::eBX, Source::eSI) destination:Source::eDX displacement:0x6425];
 
 	// mov		$0xc3,%bh
 	[self assert:instructions[62] operation:Operation::MOV size:1 operand:0xc3 destination:Source::BH];
@@ -317,9 +319,9 @@ namespace {
 	}];
 
 	XCTAssertEqual(instructions.size(), 3);
-	[self assert:instructions[0] operation:Operation::ADC size:2 source:Source::Immediate destination:Source::IndBXPlusSI operand:0xff80];
-	[self assert:instructions[1] operation:Operation::CMP size:2 source:Source::Immediate destination:Source::IndBPPlusDI operand:0x4];
-	[self assert:instructions[2] operation:Operation::SUB size:2 source:Source::Immediate destination:Source::IndBX operand:0x9];
+	[self assert:instructions[0] operation:Operation::ADC size:2 source:Source::Immediate destination:ScaleIndexBase(Source::eBX, Source::eSI) operand:0xff80];
+	[self assert:instructions[1] operation:Operation::CMP size:2 source:Source::Immediate destination:ScaleIndexBase(Source::eBP, Source::eDI) operand:0x4];
+	[self assert:instructions[2] operation:Operation::SUB size:2 source:Source::Immediate destination:ScaleIndexBase(Source::eBX) operand:0x9];
 }
 
 - (void)testFar {

From 1934c7faa23cc9ba57ba68db2f355e2c7fcabc04 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 21 Feb 2022 12:21:57 -0500
Subject: [PATCH 017/104] Switch `Decoder` into a template.

---
 InstructionSets/x86/Decoder.cpp               | 36 ++++++++++---------
 InstructionSets/x86/Decoder.hpp               |  6 +---
 .../Mac/Clock SignalTests/x86DecoderTests.mm  |  4 +--
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 45dd5ef71..d4c910903 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -14,10 +14,8 @@
 
 using namespace InstructionSet::x86;
 
-// Only 8086 is suppoted for now.
-Decoder::Decoder(Model model) : model_(model) {}
-
-std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *source, size_t length) {
+template <Model model>
+std::pair<int, InstructionSet::x86::Instruction> Decoder<model>::decode(const uint8_t *source, size_t length) {
 	const uint8_t *const end = source + length;
 
 	// MARK: - Prefixes (if present) and the opcode.
@@ -115,7 +113,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			// The 286 onwards have a further set of instructions
 			// prefixed with $0f.
 			case 0x0f:
-				if(model_ < Model::i80286) undefined();
+				if constexpr (model < Model::i80286) undefined();
 				phase_ = Phase::InstructionPageF;
 			break;
 
@@ -163,35 +161,35 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 #undef RegisterBlock
 
 			case 0x60:
-				if(model_ < Model::i80186) undefined();
+				if constexpr (model < Model::i80186) undefined();
 				Complete(PUSHA, None, None, 2);
 			break;
 			case 0x61:
-				if(model_ < Model::i80186) undefined();
+				if constexpr (model < Model::i80186) undefined();
 				Complete(POPA, None, None, 2);
 			break;
 			case 0x62:
-				if(model_ < Model::i80186) undefined();
+				if constexpr (model < Model::i80186) undefined();
 				MemRegReg(BOUND, Reg_MemReg, 2);
 			break;
 			case 0x63:
-				if(model_ < Model::i80286) undefined();
+				if constexpr (model < Model::i80286) undefined();
 				MemRegReg(ARPL, MemReg_Reg, 2);
 			break;
 			case 0x6c:	// INSB
-				if(model_ < Model::i80186) undefined();
+				if constexpr (model < Model::i80186) undefined();
 				Complete(INS, None, None, 1);
 			break;
 			case 0x6d:	// INSW
-				if(model_ < Model::i80186) undefined();
+				if constexpr (model < Model::i80186) undefined();
 				Complete(INS, None, None, 2);
 			break;
 			case 0x6e:	// OUTSB
-				if(model_ < Model::i80186) undefined();
+				if constexpr (model < Model::i80186) undefined();
 				Complete(OUTS, None, None, 1);
 			break;
 			case 0x6f:	// OUTSW
-				if(model_ < Model::i80186) undefined();
+				if constexpr (model < Model::i80186) undefined();
 				Complete(OUTS, None, None, 2);
 			break;
 
@@ -291,11 +289,11 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			case 0xc7: MemRegReg(MOV, MemRegMOV, 2);	break;
 
 			case 0xc8:
-				if(model_ < Model::i80186) undefined();
+				if constexpr (model < Model::i80186) undefined();
 				Displacement16Operand8(ENTER);
 			break;
 			case 0xc9:
-				if(model_ < Model::i80186) undefined();
+				if constexpr (model < Model::i80186) undefined();
 				Complete(LEAVE, None, None, 0);
 			break;
 
@@ -392,7 +390,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 			case 0x02:	MemRegReg(LAR, Reg_MemReg, 2);				break;
 			case 0x03:	MemRegReg(LSL, Reg_MemReg, 2);				break;
 			case 0x05:
-				if(model_ != Model::i80286) undefined();
+				if constexpr (model != Model::i80286) undefined();
 				Complete(LOADALL, None, None, 0);
 			break;
 			case 0x06:	Complete(CLTS, None, None, 1);				break;
@@ -711,3 +709,9 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder::decode(const uint8_t *
 	// i.e. not done yet.
 	return std::make_pair(0, Instruction());
 }
+
+// Ensure all possible decoders are built.
+template class InstructionSet::x86::Decoder<InstructionSet::x86::Model::i8086>;
+template class InstructionSet::x86::Decoder<InstructionSet::x86::Model::i80186>;
+template class InstructionSet::x86::Decoder<InstructionSet::x86::Model::i80286>;
+template class InstructionSet::x86::Decoder<InstructionSet::x86::Model::i80386>;
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index e2c68a907..4213c4fd5 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -29,10 +29,8 @@ enum class Model {
 
 	This is an experimental implementation; it has not yet undergone significant testing.
 */
-class Decoder {
+template <Model> class Decoder {
 	public:
-		Decoder(Model model);
-
 		/*!
 			@returns an @c Instruction plus a size; a positive size to indicate successful decoding; a
 				negative size specifies the [negatived] number of further bytes the caller should ideally
@@ -43,8 +41,6 @@ class Decoder {
 		std::pair<int, Instruction> decode(const uint8_t *source, size_t length);
 
 	private:
-		const Model model_;
-
 		enum class Phase {
 			/// Captures all prefixes and continues until an instruction byte is encountered.
 			Instruction,
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 51617d7b4..9c2fd60d2 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -105,7 +105,7 @@ namespace {
 
 - (void)decode:(const std::initializer_list<uint8_t> &)stream {
 	// Decode by offering up all data at once.
-	InstructionSet::x86::Decoder decoder(InstructionSet::x86::Model::i8086);
+	InstructionSet::x86::Decoder<InstructionSet::x86::Model::i8086> decoder;
 	instructions.clear();
 	const uint8_t *byte = stream.begin();
 	while(byte != stream.end()) {
@@ -117,7 +117,7 @@ namespace {
 
 	// Grab a byte-at-a-time decoding and check that it matches the previous.
 	{
-		InstructionSet::x86::Decoder decoder(InstructionSet::x86::Model::i8086);
+		InstructionSet::x86::Decoder<InstructionSet::x86::Model::i8086> decoder;
 
 		auto previous_instruction = instructions.begin();
 		for(auto item: stream) {

From 76814588b8ae50f1648449725a353445ab9ce2c7 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 21 Feb 2022 12:36:03 -0500
Subject: [PATCH 018/104] Template `Instruction` on its content size.

---
 InstructionSets/x86/Decoder.cpp                     | 10 +++++-----
 InstructionSets/x86/Decoder.hpp                     |  6 ++++--
 InstructionSets/x86/Instruction.hpp                 |  2 +-
 OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm |  2 +-
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index d4c910903..871040789 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -15,7 +15,7 @@
 using namespace InstructionSet::x86;
 
 template <Model model>
-std::pair<int, InstructionSet::x86::Instruction> Decoder<model>::decode(const uint8_t *source, size_t length) {
+std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(const uint8_t *source, size_t length) {
 	const uint8_t *const end = source + length;
 
 	// MARK: - Prefixes (if present) and the opcode.
@@ -80,7 +80,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder<model>::decode(const ui
 	operand_size_ = 1;								\
 
 #define undefined()	{												\
-	const auto result = std::make_pair(consumed_, Instruction());	\
+	const auto result = std::make_pair(consumed_, InstructionT());	\
 	reset_parsing();												\
 	return result;													\
 }
@@ -681,7 +681,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder<model>::decode(const ui
 			}
 		} else {
 			// Provide a genuine measure of further bytes required.
-			return std::make_pair(-(outstanding_bytes - bytes_to_consume), Instruction());
+			return std::make_pair(-(outstanding_bytes - bytes_to_consume), InstructionT());
 		}
 	}
 
@@ -690,7 +690,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder<model>::decode(const ui
 	if(phase_ == Phase::ReadyToPost) {
 		const auto result = std::make_pair(
 			consumed_,
-			Instruction(
+			InstructionT(
 				operation_,
 				source_,
 				destination_,
@@ -707,7 +707,7 @@ std::pair<int, InstructionSet::x86::Instruction> Decoder<model>::decode(const ui
 	}
 
 	// i.e. not done yet.
-	return std::make_pair(0, Instruction());
+	return std::make_pair(0, InstructionT());
 }
 
 // Ensure all possible decoders are built.
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 4213c4fd5..58c03e648 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -29,8 +29,10 @@ enum class Model {
 
 	This is an experimental implementation; it has not yet undergone significant testing.
 */
-template <Model> class Decoder {
+template <Model model> class Decoder {
 	public:
+		using InstructionT = Instruction<model >= Model::i80386>;
+
 		/*!
 			@returns an @c Instruction plus a size; a positive size to indicate successful decoding; a
 				negative size specifies the [negatived] number of further bytes the caller should ideally
@@ -38,7 +40,7 @@ template <Model> class Decoder {
 				instruction in response, and the decoder may still not be able to complete decoding
 				even if given that number of bytes.
 		*/
-		std::pair<int, Instruction> decode(const uint8_t *source, size_t length);
+		std::pair<int, InstructionT> decode(const uint8_t *source, size_t length);
 
 	private:
 		enum class Phase {
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 897b0a770..ffb149a74 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -415,7 +415,7 @@ struct SourceSIB {
 	ScaleIndexBase sib;
 };
 
-class Instruction {
+template<bool is_32bit> class Instruction {
 	public:
 		Operation operation = Operation::Invalid;
 
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 9c2fd60d2..1177faae9 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -14,7 +14,7 @@
 
 namespace {
 	using Operation = InstructionSet::x86::Operation;
-	using Instruction = InstructionSet::x86::Instruction;
+	using Instruction = InstructionSet::x86::Instruction<false>;
 	using Source = InstructionSet::x86::Source;
 	using Size = InstructionSet::x86::Size;
 	using ScaleIndexBase = InstructionSet::x86::ScaleIndexBase;

From 159e869fe63996a1bb1ad1996b6f8fc6757fed74 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 21 Feb 2022 15:33:08 -0500
Subject: [PATCH 019/104] Justifies the templatisation.

---
 InstructionSets/x86/Instruction.hpp | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index ffb149a74..b800b0b57 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -10,6 +10,7 @@
 #define InstructionSets_x86_Instruction_h
 
 #include <cstdint>
+#include <type_traits>
 
 namespace InstructionSet {
 namespace x86 {
@@ -428,6 +429,9 @@ template<bool is_32bit> class Instruction {
 				sib_ == rhs.sib_;
 		}
 
+		using DisplacementT = typename std::conditional<is_32bit, int32_t, int16_t>::type;
+		using ImmediateT = typename std::conditional<is_32bit, uint32_t, uint16_t>::type;
+
 	private:
 		// b0, b1: a Repetition;
 		// b2+: operation size.
@@ -440,13 +444,18 @@ template<bool is_32bit> class Instruction {
 		uint16_t sources_ = 0;
 
 		// Unpackable fields.
-		int16_t displacement_ = 0;
-		uint16_t operand_ = 0;		// ... or used to store a segment for far operations.
+		DisplacementT displacement_ = 0;
+		ImmediateT operand_ = 0;		// ... or used to store a segment for far operations.
 
 		// Fields yet to be properly incorporated...
 		ScaleIndexBase sib_;
 
 	public:
+		/// @returns The number of bytes used for meaningful content within this class. A receiver must use at least @c sizeof(Instruction) bytes
+		/// to store an @c Instruction but is permitted to reuse the trailing sizeof(Instruction) - packing_size() for any purpose it likes. Teleologically,
+		/// this allows a denser packing of instructions into containers.
+		size_t packing_size() const		{	return sizeof(*this);	/* TODO */	}
+
 		SourceSIB  source() const		{	return SourceSIB(Source(sources_ & 0x3f), sib_);			}
 		SourceSIB destination() const	{	return SourceSIB(Source((sources_ >> 6) & 0x3f), sib_);		}
 		bool lock() const				{	return sources_ & 0x8000;					}
@@ -455,11 +464,12 @@ template<bool is_32bit> class Instruction {
 		Repetition repetition() const	{	return Repetition(repetition_size_ & 3);	}
 		Size operation_size() const 	{	return Size(repetition_size_ >> 2);			}
 
+		// TODO: confirm whether far call for some reason makes thse 32-bit in protected mode.
 		uint16_t segment() const		{	return uint16_t(operand_);					}
 		uint16_t offset() const			{	return uint16_t(displacement_);				}
 
-		int16_t displacement() const	{	return displacement_;						}
-		uint16_t operand() const		{	return operand_;							}
+		DisplacementT displacement() const	{	return displacement_;						}
+		ImmediateT operand() const			{	return operand_;							}
 
 		Instruction() noexcept {}
 		Instruction(
@@ -471,8 +481,8 @@ template<bool is_32bit> class Instruction {
 			Source segment_override,
 			Repetition repetition,
 			Size operation_size,
-			int16_t displacement,
-			uint16_t operand) noexcept :
+			DisplacementT displacement,
+			ImmediateT operand) noexcept :
 				operation(operation),
 				repetition_size_(uint8_t((int(operation_size) << 2) | int(repetition))),
 				sources_(uint16_t(

From b968a662d301b6713a6a23133322600487178040 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 21 Feb 2022 15:48:58 -0500
Subject: [PATCH 020/104] Dump notes on intended Instruction layout, add memory
 size flag.

---
 InstructionSets/x86/Decoder.cpp     |  1 +
 InstructionSets/x86/Decoder.hpp     |  2 ++
 InstructionSets/x86/Instruction.hpp | 35 ++++++++++++++++++++++++++++-
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 871040789..fdfc76d19 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -696,6 +696,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				destination_,
 				sib_,
 				lock_,
+				memory_size_,
 				segment_override_,
 				repetition_,
 				Size(operation_size_),
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 58c03e648..d367cf033 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -154,6 +154,7 @@ template <Model model> class Decoder {
 		// Prefix capture fields.
 		Repetition repetition_ = Repetition::None;
 		bool lock_ = false;
+		bool memory_size_ = false;
 		Source segment_override_ = Source::None;
 
 		/// Resets size capture and all fields with default values.
@@ -162,6 +163,7 @@ template <Model model> class Decoder {
 			displacement_size_ = operand_size_ = 0;
 			displacement_ = operand_ = 0;
 			lock_ = false;
+			memory_size_ = false;
 			segment_override_ = Source::None;
 			repetition_ = Repetition::None;
 			phase_ = Phase::Instruction;
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index b800b0b57..46997badb 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -432,6 +432,35 @@ template<bool is_32bit> class Instruction {
 		using DisplacementT = typename std::conditional<is_32bit, int32_t, int16_t>::type;
 		using ImmediateT = typename std::conditional<is_32bit, uint32_t, uint16_t>::type;
 
+		/* Note to self — current thinking is:
+
+			First 32bits:
+				5 bits source;
+				5 bits dest;
+				5 bits partial SIB, combined with three low bits of source or dest if indirect;
+				8 bits operation;
+				4 bits original instruction size;
+				2 bits data size;
+				3 bits extension flags.
+
+			Extensions (16 or 32 bit, depending on templated size):
+				1) reptition + segment override + lock + memory size toggle (= 7 bits);
+				2) displacement;
+				3) immediate operand.
+
+			Presence or absence of extensions is dictated by the extention flags.
+			Therefore an instruction's footprint is:
+				* 4–8 bytes (16-bit processors);
+				* 4–12 bytes (32-bit processors).
+
+			I'll then implement a collection suited to packing these things based on their
+			packing_size(), and later iterating them.
+
+			To verify: do the 8086 and 80286 limit instructions to 15 bytes as later members
+			of the family do? If not then consider original instruction size = 0 to imply an
+			extension of one word prior to the other extensions.
+		*/
+
 	private:
 		// b0, b1: a Repetition;
 		// b2+: operation size.
@@ -449,6 +478,7 @@ template<bool is_32bit> class Instruction {
 
 		// Fields yet to be properly incorporated...
 		ScaleIndexBase sib_;
+		bool memory_size_ = false;
 
 	public:
 		/// @returns The number of bytes used for meaningful content within this class. A receiver must use at least @c sizeof(Instruction) bytes
@@ -459,6 +489,7 @@ template<bool is_32bit> class Instruction {
 		SourceSIB  source() const		{	return SourceSIB(Source(sources_ & 0x3f), sib_);			}
 		SourceSIB destination() const	{	return SourceSIB(Source((sources_ >> 6) & 0x3f), sib_);		}
 		bool lock() const				{	return sources_ & 0x8000;					}
+		bool memory_size() const 		{	return memory_size_;						}
 		Source segment_override() const	{	return Source((sources_ >> 12) & 7);		}
 
 		Repetition repetition() const	{	return Repetition(repetition_size_ & 3);	}
@@ -478,6 +509,7 @@ template<bool is_32bit> class Instruction {
 			Source destination,
 			ScaleIndexBase sib,
 			bool lock,
+			bool memory_size,
 			Source segment_override,
 			Repetition repetition,
 			Size operation_size,
@@ -493,7 +525,8 @@ template<bool is_32bit> class Instruction {
 				)),
 				displacement_(displacement),
 				operand_(operand),
-				sib_(sib) {}
+				sib_(sib),
+				memory_size_(memory_size) {}
 };
 
 // TODO: repack.

From 229af0380c933c66cbddd1f93b4cadc1fdef7161 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 21 Feb 2022 15:52:16 -0500
Subject: [PATCH 021/104] This is normatively called the address size.

---
 InstructionSets/x86/Decoder.cpp     | 2 +-
 InstructionSets/x86/Decoder.hpp     | 4 ++--
 InstructionSets/x86/Instruction.hpp | 8 ++++----
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index fdfc76d19..f76ac9343 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -696,7 +696,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				destination_,
 				sib_,
 				lock_,
-				memory_size_,
+				address_size_,
 				segment_override_,
 				repetition_,
 				Size(operation_size_),
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index d367cf033..6c52e698b 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -154,7 +154,7 @@ template <Model model> class Decoder {
 		// Prefix capture fields.
 		Repetition repetition_ = Repetition::None;
 		bool lock_ = false;
-		bool memory_size_ = false;
+		bool address_size_ = false;
 		Source segment_override_ = Source::None;
 
 		/// Resets size capture and all fields with default values.
@@ -163,7 +163,7 @@ template <Model model> class Decoder {
 			displacement_size_ = operand_size_ = 0;
 			displacement_ = operand_ = 0;
 			lock_ = false;
-			memory_size_ = false;
+			address_size_ = false;
 			segment_override_ = Source::None;
 			repetition_ = Repetition::None;
 			phase_ = Phase::Instruction;
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 46997badb..a94170acc 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -478,7 +478,7 @@ template<bool is_32bit> class Instruction {
 
 		// Fields yet to be properly incorporated...
 		ScaleIndexBase sib_;
-		bool memory_size_ = false;
+		bool address_size_ = false;
 
 	public:
 		/// @returns The number of bytes used for meaningful content within this class. A receiver must use at least @c sizeof(Instruction) bytes
@@ -489,7 +489,7 @@ template<bool is_32bit> class Instruction {
 		SourceSIB  source() const		{	return SourceSIB(Source(sources_ & 0x3f), sib_);			}
 		SourceSIB destination() const	{	return SourceSIB(Source((sources_ >> 6) & 0x3f), sib_);		}
 		bool lock() const				{	return sources_ & 0x8000;					}
-		bool memory_size() const 		{	return memory_size_;						}
+		bool address_size() const 		{	return address_size_;						}
 		Source segment_override() const	{	return Source((sources_ >> 12) & 7);		}
 
 		Repetition repetition() const	{	return Repetition(repetition_size_ & 3);	}
@@ -509,7 +509,7 @@ template<bool is_32bit> class Instruction {
 			Source destination,
 			ScaleIndexBase sib,
 			bool lock,
-			bool memory_size,
+			bool address_size,
 			Source segment_override,
 			Repetition repetition,
 			Size operation_size,
@@ -526,7 +526,7 @@ template<bool is_32bit> class Instruction {
 				displacement_(displacement),
 				operand_(operand),
 				sib_(sib),
-				memory_size_(memory_size) {}
+				address_size_(address_size) {}
 };
 
 // TODO: repack.

From b6183e86ebd88903fc97b1c780383eca0430431b Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 21 Feb 2022 16:06:02 -0500
Subject: [PATCH 022/104] Clarifies model tests by macro; adds the address size
 toggle.

---
 InstructionSets/x86/Decoder.cpp | 35 ++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index f76ac9343..3c05f6e69 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -85,6 +85,9 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 	return result;													\
 }
 
+#define Requires(x)		if constexpr (model != Model::x) undefined();
+#define RequiresMin(x)	if constexpr (model < Model::x) undefined();
+
 	while(phase_ == Phase::Instruction && source != end) {
 		// Retain the instruction byte, in case additional decoding is deferred
 		// to the ModRegRM byte.
@@ -113,7 +116,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			// The 286 onwards have a further set of instructions
 			// prefixed with $0f.
 			case 0x0f:
-				if constexpr (model < Model::i80286) undefined();
+				RequiresMin(i80286);
 				phase_ = Phase::InstructionPageF;
 			break;
 
@@ -161,35 +164,39 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 #undef RegisterBlock
 
 			case 0x60:
-				if constexpr (model < Model::i80186) undefined();
+				RequiresMin(i80186);
 				Complete(PUSHA, None, None, 2);
 			break;
 			case 0x61:
-				if constexpr (model < Model::i80186) undefined();
+				RequiresMin(i80186);
 				Complete(POPA, None, None, 2);
 			break;
 			case 0x62:
-				if constexpr (model < Model::i80186) undefined();
+				RequiresMin(i80186);
 				MemRegReg(BOUND, Reg_MemReg, 2);
 			break;
 			case 0x63:
-				if constexpr (model < Model::i80286) undefined();
+				RequiresMin(i80286);
 				MemRegReg(ARPL, MemReg_Reg, 2);
 			break;
+			case 0x67:
+				RequiresMin(i80386);
+				address_size_ = true;
+			break;
 			case 0x6c:	// INSB
-				if constexpr (model < Model::i80186) undefined();
+				RequiresMin(i80186);
 				Complete(INS, None, None, 1);
 			break;
 			case 0x6d:	// INSW
-				if constexpr (model < Model::i80186) undefined();
+				RequiresMin(i80186);
 				Complete(INS, None, None, 2);
 			break;
 			case 0x6e:	// OUTSB
-				if constexpr (model < Model::i80186) undefined();
+				RequiresMin(i80186);
 				Complete(OUTS, None, None, 1);
 			break;
 			case 0x6f:	// OUTSW
-				if constexpr (model < Model::i80186) undefined();
+				RequiresMin(i80186);
 				Complete(OUTS, None, None, 2);
 			break;
 
@@ -289,11 +296,11 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xc7: MemRegReg(MOV, MemRegMOV, 2);	break;
 
 			case 0xc8:
-				if constexpr (model < Model::i80186) undefined();
+				RequiresMin(i80186);
 				Displacement16Operand8(ENTER);
 			break;
 			case 0xc9:
-				if constexpr (model < Model::i80186) undefined();
+				RequiresMin(i80186);
 				Complete(LEAVE, None, None, 0);
 			break;
 
@@ -390,13 +397,15 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0x02:	MemRegReg(LAR, Reg_MemReg, 2);				break;
 			case 0x03:	MemRegReg(LSL, Reg_MemReg, 2);				break;
 			case 0x05:
-				if constexpr (model != Model::i80286) undefined();
+				Requires(i80286);
 				Complete(LOADALL, None, None, 0);
 			break;
 			case 0x06:	Complete(CLTS, None, None, 1);				break;
 		}
 	}
 
+#undef Requires
+#undef RequiresMin
 #undef Far
 #undef Jump
 #undef MemRegReg
@@ -632,6 +641,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 		phase_ = (displacement_size_ + operand_size_) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
 	}
 
+#undef undefined
+
 	// MARK: - ScaleIndexBase
 
 	if(phase_ == Phase::ScaleIndexBase && source != end) {

From ecb20cc29b9cb5c420846c56a88d75d6304ec6f3 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 21 Feb 2022 16:09:03 -0500
Subject: [PATCH 023/104] Improve tabbing.

---
 InstructionSets/x86/Decoder.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 3c05f6e69..45879036f 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -325,8 +325,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				operation_size_ = 1 + (instr_ & 1);
 				source_ = Source::eCX;
 			break;
-			case 0xd4: RegData(AAM, eAX, 1);				break;
-			case 0xd5: RegData(AAD, eAX, 1);				break;
+			case 0xd4: RegData(AAM, eAX, 1);			break;
+			case 0xd5: RegData(AAD, eAX, 1);			break;
 
 			case 0xd7: Complete(XLAT, None, None, 1);	break;
 
@@ -355,7 +355,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xeb: Jump(JMPN);				break;
 
 			case 0xec: Complete(IN, eDX, eAX, 1);	break;
-			case 0xed: Complete(IN, eDX, eAX, 1);		break;
+			case 0xed: Complete(IN, eDX, eAX, 1);	break;
 			case 0xee: Complete(OUT, eAX, eDX, 1);	break;
 			case 0xef: Complete(OUT, eAX, eDX, 2);	break;
 

From 95976d8b5820c1b1d1fd53ed2e6d433545c7cab3 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 21 Feb 2022 16:33:58 -0500
Subject: [PATCH 024/104] Add missing #include.

---
 InstructionSets/x86/Instruction.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index a94170acc..4ca1d455f 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -9,6 +9,7 @@
 #ifndef InstructionSets_x86_Instruction_h
 #define InstructionSets_x86_Instruction_h
 
+#include <cstddef>
 #include <cstdint>
 #include <type_traits>
 

From dc37b692cf0470b1829af326dd696bf9af4fe57f Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 23 Feb 2022 04:33:28 -0500
Subject: [PATCH 025/104] Switch to templated test function.

---
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 268 ++++++++----------
 1 file changed, 123 insertions(+), 145 deletions(-)

diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 1177faae9..5a0930bf6 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -9,93 +9,55 @@
 #import <XCTest/XCTest.h>
 
 #include <initializer_list>
+#include <optional>
 #include <vector>
 #include "../../../InstructionSets/x86/Decoder.hpp"
 
 namespace {
-	using Operation = InstructionSet::x86::Operation;
-	using Instruction = InstructionSet::x86::Instruction<false>;
-	using Source = InstructionSet::x86::Source;
-	using Size = InstructionSet::x86::Size;
-	using ScaleIndexBase = InstructionSet::x86::ScaleIndexBase;
-	using SourceSIB = InstructionSet::x86::SourceSIB;
-}
 
-@interface x86DecoderTests : XCTestCase
-@end
-
-/*!
-	Tests 8086 decoding by throwing a bunch of randomly-generated
-	word streams and checking that the result matches what I got from a
-	disassembler elsewhere.
-*/
-@implementation x86DecoderTests {
-	std::vector<Instruction> instructions;
-}
+using Operation = InstructionSet::x86::Operation;
+using Instruction = InstructionSet::x86::Instruction<false>;
+using Model = InstructionSet::x86::Model;
+using Source = InstructionSet::x86::Source;
+using Size = InstructionSet::x86::Size;
+using ScaleIndexBase = InstructionSet::x86::ScaleIndexBase;
+using SourceSIB = InstructionSet::x86::SourceSIB;
 
 // MARK: - Specific instruction asserts.
 
-- (void)assert:(Instruction &)instruction operation:(Operation)operation {
+template <typename InstructionT> void test(const InstructionT &instruction, int size, Operation operation) {
+	XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::Size(size));
 	XCTAssertEqual(instruction.operation, operation);
 }
 
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size {
-	XCTAssertEqual(instruction.operation, operation);
-	XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::Size(size));
-}
+template <typename InstructionT> void test(
+	const InstructionT &instruction,
+	int size,
+	Operation operation,
+	SourceSIB source,
+	std::optional<SourceSIB> destination = std::nullopt,
+	std::optional<typename InstructionT::ImmediateT> operand = std::nullopt,
+	std::optional<typename InstructionT::DisplacementT> displacement = std::nullopt) {
 
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(SourceSIB)source destination:(SourceSIB)destination displacement:(int16_t)displacement {
-	XCTAssertEqual(instruction.operation, operation);
 	XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::Size(size));
+	XCTAssertEqual(instruction.operation, operation);
 	XCTAssert(instruction.source() == source);
-	XCTAssert(instruction.destination() == destination);
-	XCTAssertEqual(instruction.displacement(), displacement);
+	if(destination) XCTAssert(instruction.destination() == *destination);
+	if(operand)	XCTAssertEqual(instruction.operand(), *operand);
+	if(displacement) XCTAssertEqual(instruction.displacement(), *displacement);
 }
 
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(SourceSIB)source destination:(SourceSIB)destination displacement:(int16_t)displacement operand:(uint16_t)operand {
-	[self assert:instruction operation:operation size:size source:source destination:destination displacement:displacement];
-	XCTAssertEqual(instruction.operand(), operand);
-}
-
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(SourceSIB)source destination:(SourceSIB)destination operand:(uint16_t)operand {
-	[self assert:instruction operation:operation size:size source:source destination:destination displacement:0 operand:operand];
-}
-
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(SourceSIB)source destination:(SourceSIB)destination {
-	[self assert:instruction operation:operation size:size source:source destination:destination displacement:0];
-}
-
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size source:(SourceSIB)source {
+template <typename InstructionT> void test(
+	const InstructionT &instruction,
+	Operation operation,
+	std::optional<typename InstructionT::ImmediateT> operand = std::nullopt,
+	std::optional<typename InstructionT::DisplacementT> displacement = std::nullopt) {
 	XCTAssertEqual(instruction.operation, operation);
-	XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::Size(size));
-	XCTAssert(instruction.source() == source);
+	if(operand)	XCTAssertEqual(instruction.operand(), *operand);
+	if(displacement) XCTAssertEqual(instruction.displacement(), *displacement);
 }
 
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size destination:(SourceSIB)destination {
-	[self assert:instruction operation:operation size:size];
-	XCTAssert(instruction.destination() == destination);
-}
-
-- (void)assert:(Instruction &)instruction operation:(Operation)operation size:(int)size operand:(uint16_t)operand destination:(SourceSIB)destination {
-	[self assert:instruction operation:operation size:size];
-	XCTAssert(instruction.destination() == destination);
-	XCTAssert(instruction.source() == SourceSIB(Source::Immediate));
-	XCTAssertEqual(instruction.operand(), operand);
-	XCTAssertEqual(instruction.displacement(), 0);
-}
-
-- (void)assert:(Instruction &)instruction operation:(Operation)operation displacement:(int16_t)displacement {
-	XCTAssertEqual(instruction.operation, operation);
-	XCTAssertEqual(instruction.displacement(), displacement);
-}
-
-- (void)assert:(Instruction &)instruction operation:(Operation)operation operand:(uint16_t)operand {
-	XCTAssertEqual(instruction.operation, operation);
-	XCTAssertEqual(instruction.operand(), operand);
-	XCTAssertEqual(instruction.displacement(), 0);
-}
-
-- (void)assert:(Instruction &)instruction operation:(Operation)operation segment:(uint16_t)segment offset:(uint16_t)offset {
+template <typename InstructionT> void test_far(const InstructionT &instruction, Operation operation, uint16_t segment, uint16_t offset) {
 	XCTAssertEqual(instruction.operation, operation);
 	XCTAssertEqual(instruction.segment(), segment);
 	XCTAssertEqual(instruction.offset(), offset);
@@ -103,9 +65,10 @@ namespace {
 
 // MARK: - Decoder
 
-- (void)decode:(const std::initializer_list<uint8_t> &)stream {
+template <Model model> std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(const std::initializer_list<uint8_t> &stream) {
 	// Decode by offering up all data at once.
-	InstructionSet::x86::Decoder<InstructionSet::x86::Model::i8086> decoder;
+	std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> instructions;
+	InstructionSet::x86::Decoder<model> decoder;
 	instructions.clear();
 	const uint8_t *byte = stream.begin();
 	while(byte != stream.end()) {
@@ -117,7 +80,7 @@ namespace {
 
 	// Grab a byte-at-a-time decoding and check that it matches the previous.
 	{
-		InstructionSet::x86::Decoder<InstructionSet::x86::Model::i8086> decoder;
+		InstructionSet::x86::Decoder<model> decoder;
 
 		auto previous_instruction = instructions.begin();
 		for(auto item: stream) {
@@ -128,9 +91,21 @@ namespace {
 			}
 		}
 	}
+
+	return instructions;
 }
 
-// MARK: - Tests
+}
+
+@interface x86DecoderTests : XCTestCase
+@end
+
+/*!
+	Tests 8086 decoding by throwing a bunch of randomly-generated
+	word streams and checking that the result matches what I got from a
+	disassembler elsewhere.
+*/
+@implementation x86DecoderTests
 
 - (void)testSequence1 {
 	// Sequences the Online Disassembler believes to exist but The 8086 Book does not:
@@ -141,7 +116,7 @@ namespace {
 	// 0x6c			insb (%dx), %es:(%di)
 	// 0xc9			leave
 	//
-	[self decode:{
+	const auto instructions = decode<Model::i8086>({
 		0x2d, 0x77, 0xea, 0x72, 0xfc, 0x4b, 0xb5, 0x28, 0xc3, 0xca, 0x26, 0x48, /* 0x65, 0x6d, */ 0x7b, 0x9f,
 		0xc2, 0x65, 0x42, 0x4e, 0xef, 0x70, 0x20, 0x94, 0xc4, 0xd4, 0x93, 0x43, 0x3c, 0x8e, /* 0x6a, 0x65, */
 		0x1a, 0x78, 0x45, 0x10, 0x7f, 0x3c, 0x19, 0x5a, 0x16, 0x31, 0x64, 0x2c, 0xe7, 0xc6, 0x7d, 0xb0,
@@ -150,7 +125,7 @@ namespace {
 		0xbd, 0xa1, 0x12, 0xc5, 0x29, /* 0xc9, */ 0x9e, 0xd8, 0xf3, 0xcf, 0x92, 0x39, 0x5d, 0x90, 0x15, 0xc3,
 		0xb8, 0xad, 0xe8, 0xc8, 0x16, 0x4a, 0xb0, 0x9e, 0xf9, 0xbf, 0x56, 0xea, 0x4e, 0xfd, 0xe4, 0x5a,
 		0x23, 0xaa, 0x2c, 0x5b, 0x2a, 0xd2, 0xf7, 0x5f, 0x18, 0x86, 0x90, 0x25, 0x64, 0xb7, 0xc3
-	}];
+	});
 
 	// 63 instructions are expected.
 	XCTAssertEqual(instructions.size(), 63);
@@ -159,29 +134,29 @@ namespace {
 	// jb		0x00000001
 	// dec		%bx
 	// mov		$0x28,%ch
-	[self assert:instructions[0] operation:Operation::SUB size:2 operand:0xea77 destination:Source::eAX];
-	[self assert:instructions[1] operation:Operation::JB displacement:0xfffc];
-	[self assert:instructions[2] operation:Operation::DEC size:2 source:Source::eBX destination:Source::eBX];
-	[self assert:instructions[3] operation:Operation::MOV size:1 operand:0x28 destination:Source::CH];
+	test(instructions[0], 2, Operation::SUB, Source::Immediate, Source::eAX, 0xea77);
+	test(instructions[1], Operation::JB, std::nullopt, 0xfffc);
+	test(instructions[2], 2, Operation::DEC, Source::eBX, Source::eBX);
+	test(instructions[3], 1, Operation::MOV, Source::Immediate, Source::CH, 0x28);
 
 	// ret
 	// lret		$0x4826
 	// [[ omitted: gs insw (%dx),%es:(%di) ]]
 	// jnp		0xffffffaf
 	// ret		$0x4265
-	[self assert:instructions[4] operation:Operation::RETN];
-	[self assert:instructions[5] operation:Operation::RETF operand:0x4826];
-	[self assert:instructions[6] operation:Operation::JNP displacement:0xff9f];
-	[self assert:instructions[7] operation:Operation::RETN operand:0x4265];
+	test(instructions[4], Operation::RETN);
+	test(instructions[5], Operation::RETF, 0x4826);
+	test(instructions[6], Operation::JNP, std::nullopt, 0xff9f);
+	test(instructions[7], Operation::RETN, 0x4265);
 
 	// dec		%si
 	// out		%ax,(%dx)
 	// jo		0x00000037
 	// xchg		%ax,%sp
-	[self assert:instructions[8] operation:Operation::DEC size:2 source:Source::eSI destination:Source::eSI];
-	[self assert:instructions[9] operation:Operation::OUT size:2 source:Source::eAX destination:Source::eDX];
-	[self assert:instructions[10] operation:Operation::JO displacement:0x20];
-	[self assert:instructions[11] operation:Operation::XCHG size:2 source:Source::eAX destination:Source::eSP];
+	test(instructions[8], 2, Operation::DEC, Source::eSI, Source::eSI);
+	test(instructions[9], 2, Operation::OUT, Source::eAX, Source::eDX);
+	test(instructions[10], Operation::JO, std::nullopt, 0x20);
+	test(instructions[11], 2, Operation::XCHG, Source::eAX, Source::eSP);
 
 	// ODA has:
 	// 	c4		(bad)
@@ -192,145 +167,148 @@ namespace {
 	//
 	//	c4 d4	(bad)
 	//	93		XCHG AX, BX
-	[self assert:instructions[12] operation:Operation::Invalid];
-	[self assert:instructions[13] operation:Operation::XCHG size:2 source:Source::eAX destination:Source::eBX];
+	test(instructions[12], Operation::Invalid);
+	test(instructions[13], 2, Operation::XCHG, Source::eAX, Source::eBX);
 
 	// inc		%bx
 	// cmp		$0x8e,%al
 	// [[ omitted: push		$0x65 ]]
 	// sbb		0x45(%bx,%si),%bh
 	// adc		%bh,0x3c(%bx)
-	[self assert:instructions[14] operation:Operation::INC size:2 source:Source::eBX destination:Source::eBX];
-	[self assert:instructions[15] operation:Operation::CMP size:1 operand:0x8e destination:Source::eAX];
-	[self assert:instructions[16] operation:Operation::SBB size:1 source:ScaleIndexBase(Source::eBX, Source::eSI) destination:Source::BH displacement:0x45];
-	[self assert:instructions[17] operation:Operation::ADC size:1 source:Source::BH destination:ScaleIndexBase(Source::eBX) displacement:0x3c];
+	test(instructions[14], 2, Operation::INC, Source::eBX, Source::eBX);
+	test(instructions[15], 1, Operation::CMP, Source::Immediate, Source::eAX, 0x8e);
+	test(instructions[16], 1, Operation::SBB, ScaleIndexBase(Source::eBX, Source::eSI), Source::BH, std::nullopt, 0x45);
+	test(instructions[17], 1, Operation::ADC, Source::BH, ScaleIndexBase(Source::eBX), std::nullopt, 0x3c);
 
 	// sbb		%bx,0x16(%bp,%si)
 	// xor		%sp,0x2c(%si)
 	// out		%ax,$0xc6
 	// jge		0xffffffe0
-	[self assert:instructions[18] operation:Operation::SBB size:2 source:Source::eBX destination:ScaleIndexBase(Source::eBP, Source::eSI) displacement:0x16];
-	[self assert:instructions[19] operation:Operation::XOR size:2 source:Source::eSP destination:ScaleIndexBase(Source::eSI) displacement:0x2c];
-	[self assert:instructions[20] operation:Operation::OUT size:2 source:Source::eAX destination:Source::DirectAddress operand:0xc6];
-	[self assert:instructions[21] operation:Operation::JNL displacement:0xffb0];
+	test(instructions[18], 2, Operation::SBB, Source::eBX, ScaleIndexBase(Source::eBP, Source::eSI), std::nullopt, 0x16);
+	test(instructions[19], 2, Operation::XOR, Source::eSP, ScaleIndexBase(Source::eSI), std::nullopt, 0x2c);
+	test(instructions[20], 2, Operation::OUT, Source::eAX, Source::DirectAddress, 0xc6);
+	test(instructions[21], Operation::JNL, std::nullopt, 0xffb0);
 
 	// mov		$0x49,%ch
 	// [[ omitted: addr32	popa ]]
 	// mov		$0xcbc0,%dx
 	// adc		$0x7e,%al
 	// jno		0x0000000b
-	[self assert:instructions[22] operation:Operation::MOV size:1 operand:0x49 destination:Source::CH];
-	[self assert:instructions[23] operation:Operation::MOV size:2 operand:0xcbc0 destination:Source::eDX];
-	[self assert:instructions[24] operation:Operation::ADC size:1 operand:0x7e destination:Source::eAX];
-	[self assert:instructions[25] operation:Operation::JNO displacement:0xffd0];
+	test(instructions[22], 1, Operation::MOV, Source::Immediate, Source::CH, 0x49);
+	test(instructions[23], 2, Operation::MOV, Source::Immediate, Source::eDX, 0xcbc0);
+	test(instructions[24], 1, Operation::ADC, Source::Immediate, Source::eAX, 0x7e);
+	test(instructions[25], Operation::JNO, std::nullopt, 0xffd0);
 
 	// push		%ax
 	// js		0x0000007b
 	// add		(%di),%bx
 	// in		$0xc9,%ax
-	[self assert:instructions[26] operation:Operation::PUSH size:2 source:Source::eAX];
-	[self assert:instructions[27] operation:Operation::JS displacement:0x3d];
-	[self assert:instructions[28] operation:Operation::ADD size:2 source:ScaleIndexBase(Source::eDI) destination:Source::eBX];
-	[self assert:instructions[29] operation:Operation::IN size:2 source:Source::DirectAddress destination:Source::eAX operand:0xc9];
+	test(instructions[26], 2, Operation::PUSH, Source::eAX);
+	test(instructions[27], Operation::JS, std::nullopt, 0x3d);
+	test(instructions[28], 2, Operation::ADD, ScaleIndexBase(Source::eDI), Source::eBX);
+	test(instructions[29], 2, Operation::IN, Source::DirectAddress, Source::eAX, 0xc9);
 
 	// xchg		%ax,%di
 	// ret
 	// fwait
 	// out		%al,$0xd3
-	[self assert:instructions[30] operation:Operation::XCHG size:2 source:Source::eAX destination:Source::eDI];
-	[self assert:instructions[31] operation:Operation::RETN];
-	[self assert:instructions[32] operation:Operation::WAIT];
-	[self assert:instructions[33] operation:Operation::OUT size:1 source:Source::eAX destination:Source::DirectAddress operand:0xd3];
+	test(instructions[30], 2, Operation::XCHG, Source::eAX, Source::eDI);
+	test(instructions[31], Operation::RETN);
+	test(instructions[32], Operation::WAIT);
+	test(instructions[33], 1, Operation::OUT, Source::eAX, Source::DirectAddress, 0xd3);
 
 	// [[ omitted: insb		(%dx),%es:(%di) ]]
 	// pop		%ax
 	// dec		%bp
 	// jbe		0xffffffcc
 	// inc		%sp
-	[self assert:instructions[34] operation:Operation::POP size:2 destination:Source::eAX];
-	[self assert:instructions[35] operation:Operation::DEC size:2 source:Source::eBP destination:Source::eBP];
-	[self assert:instructions[36] operation:Operation::JBE displacement:0xff80];
-	[self assert:instructions[37] operation:Operation::INC size:2 source:Source::eSP destination:Source::eSP];
+	test(instructions[34], 2, Operation::POP, Source::eAX);
+	test(instructions[35], 2, Operation::DEC, Source::eBP, Source::eBP);
+	test(instructions[36], Operation::JBE, std::nullopt, 0xff80);
+	test(instructions[37], 2, Operation::INC, Source::eSP, Source::eSP);
 
 	// (bad)
 	// lahf
 	// movsw	%ds:(%si),%es:(%di)
 	// mov		$0x12a1,%bp
-	[self assert:instructions[38] operation:Operation::Invalid];
-	[self assert:instructions[39] operation:Operation::LAHF];
-	[self assert:instructions[40] operation:Operation::MOVS size:2];
-	[self assert:instructions[41] operation:Operation::MOV size:2 operand:0x12a1 destination:Source::eBP];
+	test(instructions[38], Operation::Invalid);
+	test(instructions[39], Operation::LAHF);
+	test(instructions[40], 2, Operation::MOVS); /* Arguments are implicit. */
+	test(instructions[41], 2, Operation::MOV, Source::Immediate, Source::eBP, 0x12a1);
 
 	// lds		(%bx,%di),%bp
 	// [[ omitted: leave ]]
 	// sahf
 	// fdiv		%st(3),%st
 	// iret
-	[self assert:instructions[42] operation:Operation::LDS size:2];
-	[self assert:instructions[43] operation:Operation::SAHF];
-	[self assert:instructions[44] operation:Operation::ESC];
-	[self assert:instructions[45] operation:Operation::IRET];
+	test(instructions[42], 2, Operation::LDS);
+	test(instructions[43], Operation::SAHF);
+	test(instructions[44], Operation::ESC);
+	test(instructions[45], Operation::IRET);
 
 	// xchg		%ax,%dx
 	// cmp		%bx,-0x70(%di)
 	// adc		$0xb8c3,%ax
 	// lods		%ds:(%si),%ax
-	[self assert:instructions[46] operation:Operation::XCHG size:2 source:Source::eAX destination:Source::eDX];
-	[self assert:instructions[47] operation:Operation::CMP size:2 source:Source::eBX destination:ScaleIndexBase(Source::eDI) displacement:0xff90];
-	[self assert:instructions[48] operation:Operation::ADC size:2 operand:0xb8c3 destination:Source::eAX];
-	[self assert:instructions[49] operation:Operation::LODS size:2];
+	test(instructions[46], 2, Operation::XCHG, Source::eAX, Source::eDX);
+	test(instructions[47], 2, Operation::CMP, Source::eBX, ScaleIndexBase(Source::eDI), std::nullopt, 0xff90);
+	test(instructions[48], 2, Operation::ADC, Source::Immediate, Source::eAX, 0xb8c3);
+	test(instructions[49], 2, Operation::LODS);
 
 	// call		0x0000172d
 	// dec		%dx
 	// mov		$0x9e,%al
 	// stc
-	[self assert:instructions[50] operation:Operation::CALLD operand:0x16c8];
-	[self assert:instructions[51] operation:Operation::DEC size:2 source:Source::eDX destination:Source::eDX];
-	[self assert:instructions[52] operation:Operation::MOV size:1 operand:0x9e destination:Source::eAX];
-	[self assert:instructions[53] operation:Operation::STC];
+	test(instructions[50], Operation::CALLD, uint16_t(0x16c8));
+	test(instructions[51], 2, Operation::DEC, Source::eDX, Source::eDX);
+	test(instructions[52], 1, Operation::MOV, Source::Immediate, Source::eAX, 0x9e);
+	test(instructions[53], Operation::STC);
 
 	// mov		$0xea56,%di
 	// dec		%si
 	// std
 	// in		$0x5a,%al
-	[self assert:instructions[54] operation:Operation::MOV size:2 operand:0xea56 destination:Source::eDI];
-	[self assert:instructions[55] operation:Operation::DEC size:2 source:Source::eSI destination:Source::eSI];
-	[self assert:instructions[56] operation:Operation::STD];
-	[self assert:instructions[57] operation:Operation::IN size:1 source:Source::DirectAddress destination:Source::eAX operand:0x5a];
+	test(instructions[54], 2, Operation::MOV, Source::Immediate, Source::eDI, 0xea56);
+	test(instructions[55], 2, Operation::DEC, Source::eSI, Source::eSI);
+	test(instructions[56], Operation::STD);
+	test(instructions[57], 1, Operation::IN, Source::DirectAddress, Source::eAX, 0x5a);
 
 	// and		0x5b2c(%bp,%si),%bp
 	// sub		%dl,%dl
 	// negw		0x18(%bx)
 	// xchg		%dl,0x6425(%bx,%si)
-	[self assert:instructions[58] operation:Operation::AND size:2 source:ScaleIndexBase(Source::eBP, Source::eSI) destination:Source::eBP displacement:0x5b2c];
-	[self assert:instructions[59] operation:Operation::SUB size:1 source:Source::eDX destination:Source::eDX];
-	[self assert:instructions[60] operation:Operation::NEG size:2 source:ScaleIndexBase(Source::eBX) destination:ScaleIndexBase(Source::eBX) displacement:0x18];
-	[self assert:instructions[61] operation:Operation::XCHG size:1 source:ScaleIndexBase(Source::eBX, Source::eSI) destination:Source::eDX displacement:0x6425];
+	test(instructions[58], 2, Operation::AND, ScaleIndexBase(Source::eBP, Source::eSI), Source::eBP, std::nullopt, 0x5b2c);
+	test(instructions[59], 1, Operation::SUB, Source::eDX, Source::eDX);
+	test(instructions[60], 2, Operation::NEG, ScaleIndexBase(Source::eBX), ScaleIndexBase(Source::eBX), std::nullopt, 0x18);
+	test(instructions[61], 1, Operation::XCHG, ScaleIndexBase(Source::eBX, Source::eSI), Source::eDX, std::nullopt, 0x6425);
 
 	// mov		$0xc3,%bh
-	[self assert:instructions[62] operation:Operation::MOV size:1 operand:0xc3 destination:Source::BH];
+	test(instructions[62], 1, Operation::MOV, Source::Immediate, Source::BH, 0xc3);
 }
 
 - (void)test83 {
-	[self decode:{
+	const auto instructions = decode<Model::i8086>({
 		0x83, 0x10, 0x80,	// adcw		$0xff80,(%bx,%si)
 		0x83, 0x3b, 0x04,	// cmpw		$0x4,(%bp,%di)
 		0x83, 0x2f, 0x09,	// subw		$0x9,(%bx)
-	}];
+	});
 
 	XCTAssertEqual(instructions.size(), 3);
-	[self assert:instructions[0] operation:Operation::ADC size:2 source:Source::Immediate destination:ScaleIndexBase(Source::eBX, Source::eSI) operand:0xff80];
-	[self assert:instructions[1] operation:Operation::CMP size:2 source:Source::Immediate destination:ScaleIndexBase(Source::eBP, Source::eDI) operand:0x4];
-	[self assert:instructions[2] operation:Operation::SUB size:2 source:Source::Immediate destination:ScaleIndexBase(Source::eBX) operand:0x9];
+	test(instructions[0], 2, Operation::ADC, Source::Immediate, ScaleIndexBase(Source::eBX, Source::eSI), 0xff80);
+	test(instructions[1], 2, Operation::CMP, Source::Immediate, ScaleIndexBase(Source::eBP, Source::eDI), 0x4);
+	test(instructions[2], 2, Operation::SUB, Source::Immediate, ScaleIndexBase(Source::eBX), 0x9);
 }
 
 - (void)testFar {
-	[self decode:{
+	const auto instructions = decode<Model::i8086>({
 		0x9a, 0x12, 0x34, 0x56, 0x78,	// lcall 0x7856, 0x3412
-	}];
+	});
 
 	XCTAssertEqual(instructions.size(), 1);
-	[self assert:instructions[0] operation:Operation::CALLF segment:0x7856 offset:0x3412];
+	test_far(instructions[0], Operation::CALLF, 0x7856, 0x3412);
+}
+
+- (void)testSequence2 {
 }
 
 @end

From 60bf1ef7ea21d5dcd87dea8b60653039ef059896 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 23 Feb 2022 08:28:20 -0500
Subject: [PATCH 026/104] Rename SourceSIB to DataPointer, extend to allow for
 an absent base.

---
 InstructionSets/x86/Instruction.hpp           | 65 +++++++++++++++----
 .../Mac/Clock SignalTests/x86DecoderTests.mm  |  5 +-
 2 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 4ca1d455f..7dc11cf70 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -345,8 +345,10 @@ enum class Source: uint8_t {
 	/// The ScaleIndexBase associated with this source should be used.
 	Indirect = 0b11000,
 	// Elsewhere, as an implementation detail, the low three bits of an indirect source
-	// are reused.
+	// are reused; (Indirect-1) is also used as a sentinel value but is not a valid member
+	// of the enum and isn't exposed externally.
 };
+constexpr Source SourceIndirectNoBase = Source(uint8_t(Source::Indirect) - 1);
 
 enum class Repetition: uint8_t {
 	None, RepE, RepNE
@@ -357,6 +359,12 @@ enum class Repetition: uint8_t {
 ///
 /// This form of indirect addressing is used to describe both 16- and 32-bit indirect addresses,
 /// even though it is a superset of that supported prior to the 80386.
+///
+/// This class can represent only exactly what a SIB byte can — a scale of 0 to 3, a base
+/// that is any one of the eight general purpose registers, and an index that is one of the seven
+/// general purpose registers excluding eSP or is ::None.
+///
+/// It cannot natively describe a base of ::None.
 class ScaleIndexBase {
 	public:
 		constexpr ScaleIndexBase() noexcept {}
@@ -403,18 +411,49 @@ class ScaleIndexBase {
 static_assert(sizeof(ScaleIndexBase) == 1);
 static_assert(alignof(ScaleIndexBase) == 1);
 
-// TODO: improve the naming of SourceSIB.
-struct SourceSIB {
-	SourceSIB(Source source) : source(source) {}
-	SourceSIB(ScaleIndexBase sib) : sib(sib) {}
-	SourceSIB(Source source, ScaleIndexBase sib) : source(source), sib(sib) {}
+/// Provides the location of an operand's source or destination.
+///
+/// Callers should use .source() as a first point of entry. If it directly nominates a register
+/// then use the register contents directly. If it indicates ::DirectAddress or ::Immediate
+/// then ask the instruction for the address or immediate value that was provided in
+/// the instruction.
+///
+/// If .source() indicates ::Indirect then use base(), index() and scale() to construct an address.
+///
+/// In all cases, the applicable segment is indicated by the instruction.
+class DataPointer {
+	public:
+		constexpr DataPointer(Source source) noexcept : source_(source) {}
+		constexpr DataPointer(ScaleIndexBase sib) noexcept : sib_(sib) {}
+		constexpr DataPointer(Source source, ScaleIndexBase sib) noexcept : source_(source), sib_(sib) {}
 
-	bool operator ==(const SourceSIB &rhs) const {
-		return source == rhs.source && (source != Source::Indirect || sib == rhs.sib);
-	}
+		constexpr bool operator ==(const DataPointer &rhs) const {
+			// Require a SIB match only if source_ is ::Indirect or ::IndirectNoBase.
+			return source_ == rhs.source_ && (source_ < SourceIndirectNoBase || sib_ == rhs.sib_);
+		}
 
-	Source source = Source::Indirect;
-	ScaleIndexBase sib;
+		// TODO: determine whether conditionals below
+		// have introduced branching.
+
+		constexpr Source source() const {
+			return (source_ >= SourceIndirectNoBase) ? Source::Indirect : source_;
+		}
+
+		constexpr int scale() const {
+			return sib_.scale();
+		}
+
+		constexpr Source index() const {
+			return sib_.index();
+		}
+
+		constexpr Source base() const {
+			return (source_ <= SourceIndirectNoBase) ? Source::None : sib_.base();
+		}
+
+	private:
+		Source source_ = Source::Indirect;
+		ScaleIndexBase sib_;
 };
 
 template<bool is_32bit> class Instruction {
@@ -487,8 +526,8 @@ template<bool is_32bit> class Instruction {
 		/// this allows a denser packing of instructions into containers.
 		size_t packing_size() const		{	return sizeof(*this);	/* TODO */	}
 
-		SourceSIB  source() const		{	return SourceSIB(Source(sources_ & 0x3f), sib_);			}
-		SourceSIB destination() const	{	return SourceSIB(Source((sources_ >> 6) & 0x3f), sib_);		}
+		DataPointer  source() const		{	return DataPointer(Source(sources_ & 0x3f), sib_);			}
+		DataPointer destination() const	{	return DataPointer(Source((sources_ >> 6) & 0x3f), sib_);		}
 		bool lock() const				{	return sources_ & 0x8000;					}
 		bool address_size() const 		{	return address_size_;						}
 		Source segment_override() const	{	return Source((sources_ >> 12) & 7);		}
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 5a0930bf6..26c01569e 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -21,7 +21,6 @@ using Model = InstructionSet::x86::Model;
 using Source = InstructionSet::x86::Source;
 using Size = InstructionSet::x86::Size;
 using ScaleIndexBase = InstructionSet::x86::ScaleIndexBase;
-using SourceSIB = InstructionSet::x86::SourceSIB;
 
 // MARK: - Specific instruction asserts.
 
@@ -34,8 +33,8 @@ template <typename InstructionT> void test(
 	const InstructionT &instruction,
 	int size,
 	Operation operation,
-	SourceSIB source,
-	std::optional<SourceSIB> destination = std::nullopt,
+	InstructionSet::x86::DataPointer source,
+	std::optional<InstructionSet::x86::DataPointer> destination = std::nullopt,
 	std::optional<typename InstructionT::ImmediateT> operand = std::nullopt,
 	std::optional<typename InstructionT::DisplacementT> displacement = std::nullopt) {
 

From b8bff0e7f5672a82316c92971756d9e1887aaac4 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Thu, 24 Feb 2022 05:16:15 -0500
Subject: [PATCH 027/104] Double up eSP, eBP, eSI, eDI and AH, CH, DH, BH
 enums, as per Intel's encoding.

---
 InstructionSets/x86/Instruction.hpp | 44 +++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 7dc11cf70..0f27d2356 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -300,7 +300,7 @@ enum class Operation: uint8_t {
 	/// Compare string double word.
 	CMPSD,
 	/// [Early 80386s only] Insert bit string.
-	IBITS,
+	IBTS,
 
 	/// Inputs a double word from a port, incrementing or decrementing the destination.
 	INSD,
@@ -310,6 +310,15 @@ enum class Operation: uint8_t {
 	/// Convert word to dword; AX will be expanded to fill EAX.
 	/// Compare and contrast to CWD which would expand AX to DX:AX.
 	CWDE,
+
+	/// Move with zero extension.
+	MOVZX,
+	/// Move with sign extension.
+	MOVSX,
+
+	IRETD,
+	JECXZ,
+	LODSD,
 };
 
 enum class Size: uint8_t {
@@ -321,18 +330,37 @@ enum class Size: uint8_t {
 
 enum class Source: uint8_t {
 	// These are in SIB order; this matters for packing later on.
-	// Whether each refers to e.g. EAX, AX or AL depends on the
-	// instruction's data size.
-	eAX, eCX, eDX, eBX, eSP, eBP, eSI, eDI,
+
+	/// AL, AX or EAX depending on size.
+	eAX,
+	/// CL, CX or ECX depending on size.
+	eCX,
+	/// DL, DX or EDX depending on size.
+	eDX,
+	/// BL, BX or BDX depending on size.
+	eBX,
+	/// AH if size is 1; SP or ESP otherwise.
+	eSPorAH,
+	/// CH if size is 1; BP or EBP otherwise.
+	eBPorCH,
+	/// DH if size is 1; SI or ESI otherwise.
+	eSIorDH,
+	/// BH if size is 1; DI or EDI otherwise.
+	eDIorBH,
+
+	// Aliases for the dual-purpose enums.
+	eSP = eSPorAH,	AH = eSPorAH,
+	eBP = eBPorCH,	CH = eBPorCH,
+	eSI = eSIorDH,	DH = eSIorDH,
+	eDI = eDIorBH,	BH = eDIorBH,
 
 	// Selectors.
-	CS, DS, ES, SS, FS, GS,
-
-	// Legacy 8-bit registers that can't be described as e.g. 8-bit eAX.
-	AH, BH, CH, DH,
+	ES, CS, SS, DS, FS, GS,
 
 	/// The address included within this instruction should be used as the source.
 	DirectAddress,
+	// TODO: is this better eliminated in favour of an indirect
+	// source with a base() and index() of 0?
 
 	/// The immediate value included within this instruction should be used as the source.
 	Immediate,

From 0d7a7dc7c96f541646792ae03c1c8f73b6e4e4b0 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sun, 27 Feb 2022 11:25:02 -0500
Subject: [PATCH 028/104] Introduce `DataPointerResolver`, to codify the
 meaning of `DataPointer` and validate that enough information is present.

---
 InstructionSets/x86/DataPointerResolver.hpp   | 200 ++++++++++++++++++
 InstructionSets/x86/Decoder.hpp               |   8 +-
 InstructionSets/x86/Instruction.hpp           |  23 +-
 InstructionSets/x86/Model.hpp                 |  27 +++
 .../Clock Signal.xcodeproj/project.pbxproj    |   4 +
 .../Mac/Clock SignalTests/x86DecoderTests.mm  |   1 +
 6 files changed, 248 insertions(+), 15 deletions(-)
 create mode 100644 InstructionSets/x86/DataPointerResolver.hpp
 create mode 100644 InstructionSets/x86/Model.hpp

diff --git a/InstructionSets/x86/DataPointerResolver.hpp b/InstructionSets/x86/DataPointerResolver.hpp
new file mode 100644
index 000000000..8b5080173
--- /dev/null
+++ b/InstructionSets/x86/DataPointerResolver.hpp
@@ -0,0 +1,200 @@
+//
+//  DataPointerResolver.hpp
+//  Clock Signal
+//
+//  Created by Thomas Harte on 24/02/2022.
+//  Copyright © 2022 Thomas Harte. All rights reserved.
+//
+
+#ifndef DataPointerResolver_hpp
+#define DataPointerResolver_hpp
+
+#include "Instruction.hpp"
+#include "Model.hpp"
+
+namespace InstructionSet {
+namespace x86 {
+
+/// Unlike source, describes only registers, and breaks
+/// them down by conventional name — so AL, AH, AX and EAX are all
+/// listed separately and uniquely, rather than being eAX+size or
+/// eSPorAH with a size of 1.
+enum class Register: uint8_t {
+	AL, AH, AX, EAX,
+	CL, CH, CX, ECX,
+	DL, DH, DX, EDX,
+	BL, BH, BX, EBX,
+	SP, ESP,
+	BP, EBP,
+	SI, ESI,
+	DI, EDI,
+	ES,
+	CS,
+	SS,
+	DS,
+	FS,
+	GS,
+	None
+};
+
+/// Reads from or writes to the source or target identified by a DataPointer, relying upon two user-supplied classes:
+///
+/// * a register bank; and
+/// * a memory pool.
+///
+/// The register bank should implement `template<typename DataT, Register> DataT read()` and `template<typename DataT, Register> void write(DataT)`.
+/// Those functions will be called only with registers and data types that are appropriate to the @c model.
+///
+/// The memory pool should implement `template<typename DataT> DataT read(Source segment, uint32_t address)` and
+/// `template<typename DataT> void write(Source segment, uint32_t address, DataT value)`.
+template <Model model, typename RegistersT, typename MemoryT> class DataPointerResolver {
+	public:
+		template <typename DataT> DataT read(
+			RegistersT &registers,
+			MemoryT &memory,
+			const Instruction<is_32bit(model)> &instruction,
+			DataPointer pointer,
+			decltype(RegistersT::eSP) memory_mask = ~0) {
+				DataT result;
+				access<true>(registers, memory, instruction, pointer, memory_mask, result);
+				return result;
+			}
+
+		template <typename DataT> void write(
+			RegistersT &registers,
+			MemoryT &memory,
+			const Instruction<is_32bit(model)> &instruction,
+			DataPointer pointer,
+			DataT value,
+			decltype(RegistersT::eSP) memory_mask = ~0) {
+				access<false>(registers, memory, instruction, pointer, memory_mask, value);
+			}
+
+	private:
+		template <bool is_write, typename DataT> DataT access(
+			RegistersT &registers,
+			MemoryT &memory,
+			const Instruction<is_32bit(model)> &instruction,
+			DataPointer pointer,
+			decltype(RegistersT::eSP) memory_mask,
+			DataT &value) {
+				const Source source = pointer.source();
+
+#define read_or_write(v, x, allow_write)	\
+	case Source::x:	\
+		if constexpr(allow_write && is_write) {\
+			registers.template write<DataT, register_for_source<DataT>(Source::x)>(v);	\
+		} else {	\
+			value = registers.template read<DataT, register_for_source<DataT>(Source::x)>(); \
+		}
+
+#define ALLREGS(v)	f(v, eAX); f(v, eCX); f(v, eDX); f(v, eBX); \
+					f(v, eSPorAH); f(v, eBPorCH); f(v, eSIorDH); f(v, eDIorBH); \
+					f(v, ES); f(v, CS); f(v, SS); f(v, DS); f(v, FS); f(v, GS);
+
+			switch(source) {
+				default:	return DataT(0);
+#define f(x, y) read_or_write(x, y, true)
+				ALLREGS(value);
+#undef f
+
+				case Source::DirectAddress:
+					if constexpr(is_write) {
+						memory.template write<DataT>(instruction.data_segment(), instruction.displacement(), value);
+					} else {
+						value = memory.template read<DataT>(instruction.data_segment(), instruction.displacement());
+					}
+				break;
+				case Source::Immediate:
+					value = DataT(instruction.operand());
+				break;
+
+				case Source::Indirect: {
+					uint32_t base = 0, index = 0;
+
+#define f(x, y) read_or_write(x, y, false)
+					switch(pointer.base()) {
+						default: break;
+						ALLREGS(base);
+					}
+
+					switch(pointer.index()) {
+						default: break;
+						ALLREGS(index);
+					}
+#undef f
+
+					if constexpr (model >= Model::i80386) {
+						index <<= pointer.scale();
+					}
+
+					// TODO: verify application of memory_mask here.
+					value = memory.template get<DataT>(
+						instruction.data_segment(),
+						(base & memory_mask) + (index & memory_mask)
+					);
+				}
+			}
+#undef ALLREGS
+		}
+
+		template <typename DataT> constexpr static Register register_for_source(Source source) {
+			if constexpr (sizeof(DataT) == 4) {
+				switch(source) {
+					case Source::eAX:		return Register::EAX;
+					case Source::eCX:		return Register::ECX;
+					case Source::eDX:		return Register::EDX;
+					case Source::eBX:		return Register::EBX;
+					case Source::eSPorAH:	return Register::ESP;
+					case Source::eBPorCH:	return Register::EBP;
+					case Source::eSIorDH:	return Register::ESI;
+					case Source::eDIorBH:	return Register::EDI;
+
+					default:				break;
+				}
+			}
+
+			if constexpr (sizeof(DataT) == 2) {
+				switch(source) {
+					case Source::eAX:		return Register::AX;
+					case Source::eCX:		return Register::CX;
+					case Source::eDX:		return Register::DX;
+					case Source::eBX:		return Register::BX;
+					case Source::eSPorAH:	return Register::SP;
+					case Source::eBPorCH:	return Register::BP;
+					case Source::eSIorDH:	return Register::SI;
+					case Source::eDIorBH:	return Register::DI;
+					case Source::ES:		return Register::ES;
+					case Source::CS:		return Register::CS;
+					case Source::SS:		return Register::SS;
+					case Source::DS:		return Register::DS;
+					case Source::FS:		return Register::FS;
+					case Source::GS:		return Register::GS;
+
+					default:				break;
+				}
+			}
+
+			if constexpr (sizeof(DataT) == 1) {
+				switch(source) {
+					case Source::eAX:		return Register::AL;
+					case Source::eCX:		return Register::CL;
+					case Source::eDX:		return Register::DL;
+					case Source::eBX:		return Register::BL;
+					case Source::eSPorAH:	return Register::AH;
+					case Source::eBPorCH:	return Register::CH;
+					case Source::eSIorDH:	return Register::DH;
+					case Source::eDIorBH:	return Register::BH;
+
+					default:				break;
+				}
+			}
+
+			return Register::None;
+		}
+};
+
+}
+}
+
+#endif /* DataPointerResolver_hpp */
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 6c52e698b..30d6bd92e 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -10,6 +10,7 @@
 #define InstructionSets_x86_Decoder_hpp
 
 #include "Instruction.hpp"
+#include "Model.hpp"
 
 #include <cstddef>
 #include <utility>
@@ -17,13 +18,6 @@
 namespace InstructionSet {
 namespace x86 {
 
-enum class Model {
-	i8086,
-	i80186,
-	i80286,
-	i80386,
-};
-
 /*!
 	Implements Intel x86 instruction decoding.
 
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 0f27d2356..4baf9e763 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -357,6 +357,12 @@ enum class Source: uint8_t {
 	// Selectors.
 	ES, CS, SS, DS, FS, GS,
 
+	/// @c None can be treated as a source that produces 0 when encountered;
+	/// it is semantically valid to receive it with that meaning in some contexts —
+	/// e.g. to indicate no index in indirect addressing.
+	/// It's listed here in order to allow an [optional] segment override to fit into three bits.
+	None,
+
 	/// The address included within this instruction should be used as the source.
 	DirectAddress,
 	// TODO: is this better eliminated in favour of an indirect
@@ -365,11 +371,6 @@ enum class Source: uint8_t {
 	/// The immediate value included within this instruction should be used as the source.
 	Immediate,
 
-	/// @c None can be treated as a source that produces 0 when encountered;
-	/// it is semantically valid to receive it with that meaning in some contexts —
-	/// e.g. to indicate no index in indirect addressing.
-	None,
-
 	/// The ScaleIndexBase associated with this source should be used.
 	Indirect = 0b11000,
 	// Elsewhere, as an implementation detail, the low three bits of an indirect source
@@ -554,16 +555,22 @@ template<bool is_32bit> class Instruction {
 		/// this allows a denser packing of instructions into containers.
 		size_t packing_size() const		{	return sizeof(*this);	/* TODO */	}
 
-		DataPointer  source() const		{	return DataPointer(Source(sources_ & 0x3f), sib_);			}
+		DataPointer source() const		{	return DataPointer(Source(sources_ & 0x3f), sib_);			}
 		DataPointer destination() const	{	return DataPointer(Source((sources_ >> 6) & 0x3f), sib_);		}
 		bool lock() const				{	return sources_ & 0x8000;					}
 		bool address_size() const 		{	return address_size_;						}
-		Source segment_override() const	{	return Source((sources_ >> 12) & 7);		}
+		Source data_segment() const		{
+			const auto segment_override = Source((sources_ >> 12) & 7);
+			if(segment_override != Source::None) return segment_override;
+
+			// TODO: default source should be SS for anything touching the stack.
+			return Source::DS;
+		}
 
 		Repetition repetition() const	{	return Repetition(repetition_size_ & 3);	}
 		Size operation_size() const 	{	return Size(repetition_size_ >> 2);			}
 
-		// TODO: confirm whether far call for some reason makes thse 32-bit in protected mode.
+		// TODO: confirm whether far call for some reason makes these 32-bit in protected mode.
 		uint16_t segment() const		{	return uint16_t(operand_);					}
 		uint16_t offset() const			{	return uint16_t(displacement_);				}
 
diff --git a/InstructionSets/x86/Model.hpp b/InstructionSets/x86/Model.hpp
new file mode 100644
index 000000000..8f8189556
--- /dev/null
+++ b/InstructionSets/x86/Model.hpp
@@ -0,0 +1,27 @@
+//
+//  Model.hpp
+//  Clock Signal
+//
+//  Created by Thomas Harte on 27/02/2022.
+//  Copyright © 2022 Thomas Harte. All rights reserved.
+//
+
+#ifndef Model_h
+#define Model_h
+
+namespace InstructionSet {
+namespace x86 {
+
+enum class Model {
+	i8086,
+	i80186,
+	i80286,
+	i80386,
+};
+
+#define is_32bit(model) (model >= Model::i80386)
+
+}
+}
+
+#endif /* Model_h */
diff --git a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj
index 99ca08e07..5d4d81c10 100644
--- a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj	
+++ b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj	
@@ -2074,6 +2074,8 @@
 		4BE3231520532AA7006EF799 /* Target.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Target.hpp; sourceTree = "<group>"; };
 		4BE3231620532BED006EF799 /* Target.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Target.hpp; sourceTree = "<group>"; };
 		4BE34437238389E10058E78F /* AtariSTVideoTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AtariSTVideoTests.mm; sourceTree = "<group>"; };
+		4BE3C69327C793EF000EAD28 /* DataPointerResolver.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = DataPointerResolver.hpp; sourceTree = "<group>"; };
+		4BE3C69527CBC540000EAD28 /* Model.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Model.hpp; sourceTree = "<group>"; };
 		4BE76CF822641ED300ACD6FA /* QLTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = QLTests.mm; sourceTree = "<group>"; };
 		4BE845201F2FF7F100A5EA22 /* CRTC6845.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = CRTC6845.hpp; sourceTree = "<group>"; };
 		4BE8EB5425C0E9D40040BC40 /* Disassembler.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Disassembler.hpp; sourceTree = "<group>"; };
@@ -4731,6 +4733,8 @@
 				4BEDA3B925B25563000C2DBD /* Decoder.cpp */,
 				4BEDA3B825B25563000C2DBD /* Decoder.hpp */,
 				4BEDA3DB25B2588F000C2DBD /* Instruction.hpp */,
+				4BE3C69327C793EF000EAD28 /* DataPointerResolver.hpp */,
+				4BE3C69527CBC540000EAD28 /* Model.hpp */,
 			);
 			path = x86;
 			sourceTree = "<group>";
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 26c01569e..fe3354026 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -12,6 +12,7 @@
 #include <optional>
 #include <vector>
 #include "../../../InstructionSets/x86/Decoder.hpp"
+#include "../../../InstructionSets/x86/DataPointerResolver.hpp"
 
 namespace {
 

From 27d1df46993ebc4e48c7059f72057dbcdcf7505a Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sun, 27 Feb 2022 18:27:58 -0500
Subject: [PATCH 029/104] Introduce enough of a DataPointerResolver test to
 build but fail.

---
 InstructionSets/x86/DataPointerResolver.hpp   | 24 +++---
 InstructionSets/x86/Instruction.hpp           | 11 +++
 .../Clock Signal.xcodeproj/project.pbxproj    |  6 +-
 .../Clock SignalTests/x86DataPointerTests.mm  | 74 +++++++++++++++++++
 4 files changed, 105 insertions(+), 10 deletions(-)
 create mode 100644 OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm

diff --git a/InstructionSets/x86/DataPointerResolver.hpp b/InstructionSets/x86/DataPointerResolver.hpp
index 8b5080173..be937d1c8 100644
--- a/InstructionSets/x86/DataPointerResolver.hpp
+++ b/InstructionSets/x86/DataPointerResolver.hpp
@@ -49,34 +49,34 @@ enum class Register: uint8_t {
 /// `template<typename DataT> void write(Source segment, uint32_t address, DataT value)`.
 template <Model model, typename RegistersT, typename MemoryT> class DataPointerResolver {
 	public:
-		template <typename DataT> DataT read(
+		template <typename DataT> static DataT read(
 			RegistersT &registers,
 			MemoryT &memory,
 			const Instruction<is_32bit(model)> &instruction,
 			DataPointer pointer,
-			decltype(RegistersT::eSP) memory_mask = ~0) {
+			typename Instruction<is_32bit(model)>::ImmediateT memory_mask = ~0) {
 				DataT result;
 				access<true>(registers, memory, instruction, pointer, memory_mask, result);
 				return result;
 			}
 
-		template <typename DataT> void write(
+		template <typename DataT> static void write(
 			RegistersT &registers,
 			MemoryT &memory,
 			const Instruction<is_32bit(model)> &instruction,
 			DataPointer pointer,
 			DataT value,
-			decltype(RegistersT::eSP) memory_mask = ~0) {
+			typename Instruction<is_32bit(model)>::ImmediateT memory_mask = ~0) {
 				access<false>(registers, memory, instruction, pointer, memory_mask, value);
 			}
 
 	private:
-		template <bool is_write, typename DataT> DataT access(
+		template <bool is_write, typename DataT> static void access(
 			RegistersT &registers,
 			MemoryT &memory,
 			const Instruction<is_32bit(model)> &instruction,
 			DataPointer pointer,
-			decltype(RegistersT::eSP) memory_mask,
+			typename Instruction<is_32bit(model)>::ImmediateT memory_mask,
 			DataT &value) {
 				const Source source = pointer.source();
 
@@ -86,14 +86,20 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 			registers.template write<DataT, register_for_source<DataT>(Source::x)>(v);	\
 		} else {	\
 			value = registers.template read<DataT, register_for_source<DataT>(Source::x)>(); \
-		}
+		}	\
+	break;
 
 #define ALLREGS(v)	f(v, eAX); f(v, eCX); f(v, eDX); f(v, eBX); \
 					f(v, eSPorAH); f(v, eBPorCH); f(v, eSIorDH); f(v, eDIorBH); \
 					f(v, ES); f(v, CS); f(v, SS); f(v, DS); f(v, FS); f(v, GS);
 
 			switch(source) {
-				default:	return DataT(0);
+				default:
+					if constexpr (!is_write) {
+						value = 0;
+					}
+				return;
+
 #define f(x, y) read_or_write(x, y, true)
 				ALLREGS(value);
 #undef f
@@ -129,7 +135,7 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 					}
 
 					// TODO: verify application of memory_mask here.
-					value = memory.template get<DataT>(
+					value = memory.template read<DataT>(
 						instruction.data_segment(),
 						(base & memory_mask) + (index & memory_mask)
 					);
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 4baf9e763..ef9266d82 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -452,10 +452,21 @@ static_assert(alignof(ScaleIndexBase) == 1);
 /// In all cases, the applicable segment is indicated by the instruction.
 class DataPointer {
 	public:
+		/// Constricts a DataPointer referring to the given source; it shouldn't be ::Indirect.
 		constexpr DataPointer(Source source) noexcept : source_(source) {}
+
+		/// Constricts a DataPointer with a source of ::Indirect and the specified sib.
 		constexpr DataPointer(ScaleIndexBase sib) noexcept : sib_(sib) {}
+
+		/// Constructs a DataPointer with a source and SIB; use the source to indicate
+		/// whether the base field of the SIB is effective.
 		constexpr DataPointer(Source source, ScaleIndexBase sib) noexcept : source_(source), sib_(sib) {}
 
+		/// Constructs an indirect DataPointer referencing the given base, index and scale.
+		constexpr DataPointer(Source base, Source index, int scale) noexcept :
+			source_(base != Source::None ? Source::Indirect : SourceIndirectNoBase),
+			sib_(scale, index, base) {}
+
 		constexpr bool operator ==(const DataPointer &rhs) const {
 			// Require a SIB match only if source_ is ::Indirect or ::IndirectNoBase.
 			return source_ == rhs.source_ && (source_ < SourceIndirectNoBase || sib_ == rhs.sib_);
diff --git a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj
index 5d4d81c10..ff3d75cda 100644
--- a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj	
+++ b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj	
@@ -981,6 +981,7 @@
 		4BE21219253FCE9C00435408 /* AppleIIgs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4BE21214253FCE9C00435408 /* AppleIIgs.cpp */; };
 		4BE2121A253FCE9C00435408 /* AppleIIgs.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4BE21214253FCE9C00435408 /* AppleIIgs.cpp */; };
 		4BE34438238389E10058E78F /* AtariSTVideoTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 4BE34437238389E10058E78F /* AtariSTVideoTests.mm */; };
+		4BE3C69727CC32DC000EAD28 /* x86DataPointerTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 4BE3C69627CC32DC000EAD28 /* x86DataPointerTests.mm */; };
 		4BE76CF922641ED400ACD6FA /* QLTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 4BE76CF822641ED300ACD6FA /* QLTests.mm */; };
 		4BE8EB6625C750B50040BC40 /* DAT.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4BE8EB6425C750B50040BC40 /* DAT.cpp */; };
 		4BE90FFD22D5864800FB464D /* MacintoshVideoTests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 4BE90FFC22D5864800FB464D /* MacintoshVideoTests.mm */; };
@@ -2076,6 +2077,7 @@
 		4BE34437238389E10058E78F /* AtariSTVideoTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = AtariSTVideoTests.mm; sourceTree = "<group>"; };
 		4BE3C69327C793EF000EAD28 /* DataPointerResolver.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = DataPointerResolver.hpp; sourceTree = "<group>"; };
 		4BE3C69527CBC540000EAD28 /* Model.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Model.hpp; sourceTree = "<group>"; };
+		4BE3C69627CC32DC000EAD28 /* x86DataPointerTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = x86DataPointerTests.mm; sourceTree = "<group>"; };
 		4BE76CF822641ED300ACD6FA /* QLTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = QLTests.mm; sourceTree = "<group>"; };
 		4BE845201F2FF7F100A5EA22 /* CRTC6845.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = CRTC6845.hpp; sourceTree = "<group>"; };
 		4BE8EB5425C0E9D40040BC40 /* Disassembler.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = Disassembler.hpp; sourceTree = "<group>"; };
@@ -4121,7 +4123,6 @@
 			children = (
 				4B85322922778E4200F26553 /* Comparative68000.hpp */,
 				4B90467222C6FA31000E2074 /* TestRunner68000.hpp */,
-				4BF7019F26FFD32300996424 /* AmigaBlitterTests.mm */,
 				4B90467522C6FD6E000E2074 /* 68000ArithmeticTests.mm */,
 				4B9D0C4A22C7D70900DE1AD3 /* 68000BCDTests.mm */,
 				4B90467322C6FADD000E2074 /* 68000BitwiseTests.mm */,
@@ -4130,6 +4131,7 @@
 				4BC5C3DF22C994CC00795658 /* 68000MoveTests.mm */,
 				4B9D0C4E22C7E0CF00DE1AD3 /* 68000RollShiftTests.mm */,
 				4BD388872239E198002D14B5 /* 68000Tests.mm */,
+				4BF7019F26FFD32300996424 /* AmigaBlitterTests.mm */,
 				4B924E981E74D22700B76AF1 /* AtariStaticAnalyserTests.mm */,
 				4BE34437238389E10058E78F /* AtariSTVideoTests.mm */,
 				4BB2A9AE1E13367E001A5C23 /* CRCTests.mm */,
@@ -4150,6 +4152,7 @@
 				4B8DD3672633B2D400B3C866 /* SpectrumVideoContentionTests.mm */,
 				4B2AF8681E513FC20027EE29 /* TIATests.mm */,
 				4B1D08051E0F7A1100763741 /* TimeTests.mm */,
+				4BE3C69627CC32DC000EAD28 /* x86DataPointerTests.mm */,
 				4BEE4BD325A26E2B00011BD2 /* x86DecoderTests.mm */,
 				4BDA8234261E8E000021AA19 /* Z80ContentionTests.mm */,
 				4BB73EB81B587A5100552FC2 /* Info.plist */,
@@ -5919,6 +5922,7 @@
 				4B778F5C23A5F3070000D260 /* MSX.cpp in Sources */,
 				4B778F0323A5EBB00000D260 /* FAT12.cpp in Sources */,
 				4B778F4023A5F1910000D260 /* z8530.cpp in Sources */,
+				4BE3C69727CC32DC000EAD28 /* x86DataPointerTests.mm in Sources */,
 				4B778EFD23A5EB8E0000D260 /* AppleDSK.cpp in Sources */,
 				4B778EFB23A5EB7E0000D260 /* HFE.cpp in Sources */,
 				4BC751B21D157E61006C31D9 /* 6522Tests.swift in Sources */,
diff --git a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm
new file mode 100644
index 000000000..74a7f7298
--- /dev/null
+++ b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
@@ -0,0 +1,74 @@
+//
+//  x86DataPointerTests.m
+//  Clock Signal
+//
+//  Created by Thomas Harte on 27/02/2022.
+//  Copyright 2022 Thomas Harte. All rights reserved.
+//
+
+#import <XCTest/XCTest.h>
+
+#include "../../../InstructionSets/x86/DataPointerResolver.hpp"
+
+using namespace InstructionSet::x86;
+
+@interface x86DataPointerTests : XCTestCase
+@end
+
+@implementation x86DataPointerTests
+
+//- (InstructionSet::x86::Instruction<false>)instruction16WithSourceDataPointer:(DataPointer)pointer {
+//	return x86::Instruction<false>{
+//		InstructionSet::x86::Operation::AAA,
+//		S
+//	};
+//}
+
+
+- (void)testX {
+	const DataPointer pointer(
+		Source::eAX, Source::eDI, 2
+	);
+
+	struct Registers {
+		uint16_t ax = 0x1234, di = 0x00ee;
+
+		template <typename DataT, Register r> void write(DataT) {
+			assert(false);
+		}
+		template <typename DataT, Register r> DataT read() {
+			switch(r) {
+				case Register::AX:	return ax;
+				case Register::DI:	return di;
+				default: return 0;
+			}
+		}
+	} registers;
+
+	struct Memory {
+		template<typename DataT> DataT read(Source segment, uint32_t address) {
+			(void)segment;
+			(void)address;
+			return 0;
+		}
+		template<typename DataT> void write(Source, uint32_t, DataT) {
+			assert(false);
+		}
+
+	} memory;
+
+	const auto instruction = Instruction<false>();/*[self
+		instruction16WithSourceDataPointer:pointer];*/
+
+	const uint8_t value = DataPointerResolver<
+		Model::i8086, Registers, Memory>::read<uint8_t>(
+			registers,
+			memory,
+			instruction,
+			instruction.source()
+		);
+
+	printf("%d\n", value);
+}
+
+@end

From 84ac68a58ba2de15e509bba6602f337132d7085e Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sun, 27 Feb 2022 18:43:00 -0500
Subject: [PATCH 030/104] Fix indirect memory read/write

---
 InstructionSets/x86/DataPointerResolver.hpp   | 22 +++++++++++++++----
 .../Clock SignalTests/x86DataPointerTests.mm  |  5 +++--
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/InstructionSets/x86/DataPointerResolver.hpp b/InstructionSets/x86/DataPointerResolver.hpp
index be937d1c8..8bf2bec13 100644
--- a/InstructionSets/x86/DataPointerResolver.hpp
+++ b/InstructionSets/x86/DataPointerResolver.hpp
@@ -12,6 +12,8 @@
 #include "Instruction.hpp"
 #include "Model.hpp"
 
+#include <cassert>
+
 namespace InstructionSet {
 namespace x86 {
 
@@ -132,13 +134,25 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 
 					if constexpr (model >= Model::i80386) {
 						index <<= pointer.scale();
+					} else {
+						assert(!pointer.scale());
 					}
 
 					// TODO: verify application of memory_mask here.
-					value = memory.template read<DataT>(
-						instruction.data_segment(),
-						(base & memory_mask) + (index & memory_mask)
-					);
+					const uint32_t address = (base & memory_mask) + (index & memory_mask);
+
+					if constexpr (is_write) {
+						value = memory.template read<DataT>(
+							instruction.data_segment(),
+							address
+						);
+					} else {
+						memory.template write<DataT>(
+							instruction.data_segment(),
+							address,
+							value
+						);
+					}
 				}
 			}
 #undef ALLREGS
diff --git a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm
index 74a7f7298..71b21a717 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
@@ -27,7 +27,7 @@ using namespace InstructionSet::x86;
 
 - (void)testX {
 	const DataPointer pointer(
-		Source::eAX, Source::eDI, 2
+		Source::eAX, Source::eDI, 0
 	);
 
 	struct Registers {
@@ -49,6 +49,7 @@ using namespace InstructionSet::x86;
 		template<typename DataT> DataT read(Source segment, uint32_t address) {
 			(void)segment;
 			(void)address;
+			printf("Access at %d\n", address);
 			return 0;
 		}
 		template<typename DataT> void write(Source, uint32_t, DataT) {
@@ -65,7 +66,7 @@ using namespace InstructionSet::x86;
 			registers,
 			memory,
 			instruction,
-			instruction.source()
+			pointer
 		);
 
 	printf("%d\n", value);

From 9f12c009d66b08bd1632a27ac5e761c7439ac1de Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sun, 27 Feb 2022 19:45:03 -0500
Subject: [PATCH 031/104] Correct data size when accessing address registers.

---
 InstructionSets/x86/DataPointerResolver.hpp   | 24 +++++++++++--------
 InstructionSets/x86/Instruction.hpp           |  1 +
 .../Clock SignalTests/x86DataPointerTests.mm  |  2 +-
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/InstructionSets/x86/DataPointerResolver.hpp b/InstructionSets/x86/DataPointerResolver.hpp
index 8bf2bec13..b46da72d4 100644
--- a/InstructionSets/x86/DataPointerResolver.hpp
+++ b/InstructionSets/x86/DataPointerResolver.hpp
@@ -82,13 +82,13 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 			DataT &value) {
 				const Source source = pointer.source();
 
-#define read_or_write(v, x, allow_write)	\
-	case Source::x:	\
-		if constexpr(allow_write && is_write) {\
-			registers.template write<DataT, register_for_source<DataT>(Source::x)>(v);	\
-		} else {	\
-			value = registers.template read<DataT, register_for_source<DataT>(Source::x)>(); \
-		}	\
+#define read_or_write(v, x, allow_write)																\
+	case Source::x:																						\
+		if constexpr(allow_write && is_write) {															\
+			registers.template write<decltype(v), register_for_source<decltype(v)>(Source::x)>(v);		\
+		} else {																						\
+			v = registers.template read<decltype(v), register_for_source<decltype(v)>(Source::x)>();	\
+		}																								\
 	break;
 
 #define ALLREGS(v)	f(v, eAX); f(v, eCX); f(v, eDX); f(v, eBX); \
@@ -118,7 +118,8 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 				break;
 
 				case Source::Indirect: {
-					uint32_t base = 0, index = 0;
+					using AddressT = typename Instruction<is_32bit(model)>::AddressComponentT;
+					AddressT base = 0, index = 0;
 
 #define f(x, y) read_or_write(x, y, false)
 					switch(pointer.base()) {
@@ -132,14 +133,17 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 					}
 #undef f
 
+					// Compute address as 32-bit; its always at least 20 bits
+					// and at most 32.
+					uint32_t address = index;
 					if constexpr (model >= Model::i80386) {
-						index <<= pointer.scale();
+						address <<= pointer.scale();
 					} else {
 						assert(!pointer.scale());
 					}
 
 					// TODO: verify application of memory_mask here.
-					const uint32_t address = (base & memory_mask) + (index & memory_mask);
+					address = (address & memory_mask) + (base & memory_mask);
 
 					if constexpr (is_write) {
 						value = memory.template read<DataT>(
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index ef9266d82..4358c7dbc 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -511,6 +511,7 @@ template<bool is_32bit> class Instruction {
 
 		using DisplacementT = typename std::conditional<is_32bit, int32_t, int16_t>::type;
 		using ImmediateT = typename std::conditional<is_32bit, uint32_t, uint16_t>::type;
+		using AddressComponentT = ImmediateT;
 
 		/* Note to self — current thinking is:
 
diff --git a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm
index 71b21a717..27cac21bc 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
@@ -49,7 +49,7 @@ using namespace InstructionSet::x86;
 		template<typename DataT> DataT read(Source segment, uint32_t address) {
 			(void)segment;
 			(void)address;
-			printf("Access at %d\n", address);
+			printf("Access at %08x\n", address);
 			return 0;
 		}
 		template<typename DataT> void write(Source, uint32_t, DataT) {

From afbc57cc0cd947a183075fcf148e527c13ea501a Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 28 Feb 2022 09:53:23 -0500
Subject: [PATCH 032/104] Incorporate displacement, switch macro flag.

---
 InstructionSets/x86/DataPointerResolver.hpp   | 19 +++++++++++--------
 .../Clock SignalTests/x86DataPointerTests.mm  | 12 ++++--------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/InstructionSets/x86/DataPointerResolver.hpp b/InstructionSets/x86/DataPointerResolver.hpp
index b46da72d4..e560d6f6b 100644
--- a/InstructionSets/x86/DataPointerResolver.hpp
+++ b/InstructionSets/x86/DataPointerResolver.hpp
@@ -82,9 +82,9 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 			DataT &value) {
 				const Source source = pointer.source();
 
-#define read_or_write(v, x, allow_write)																\
+#define read_or_write(v, x, is_for_indirection)															\
 	case Source::x:																						\
-		if constexpr(allow_write && is_write) {															\
+		if constexpr(!is_for_indirection && is_write) {													\
 			registers.template write<decltype(v), register_for_source<decltype(v)>(Source::x)>(v);		\
 		} else {																						\
 			v = registers.template read<decltype(v), register_for_source<decltype(v)>(Source::x)>();	\
@@ -102,7 +102,7 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 					}
 				return;
 
-#define f(x, y) read_or_write(x, y, true)
+#define f(x, y) read_or_write(x, y, false)
 				ALLREGS(value);
 #undef f
 
@@ -121,7 +121,7 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 					using AddressT = typename Instruction<is_32bit(model)>::AddressComponentT;
 					AddressT base = 0, index = 0;
 
-#define f(x, y) read_or_write(x, y, false)
+#define f(x, y) read_or_write(x, y, true)
 					switch(pointer.base()) {
 						default: break;
 						ALLREGS(base);
@@ -133,8 +133,12 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 					}
 #undef f
 
-					// Compute address as 32-bit; its always at least 20 bits
-					// and at most 32.
+					// Always compute address as 32-bit.
+					// TODO: verify application of memory_mask here.
+					// The point of memory_mask is that 32-bit x86 offers the memory size modifier,
+					// permitting 16-bit addresses to be generated in 32-bit mode and vice versa.
+					// To figure out is at what point in the calculation the 16-bit constraint is
+					// applied when active.
 					uint32_t address = index;
 					if constexpr (model >= Model::i80386) {
 						address <<= pointer.scale();
@@ -142,8 +146,7 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 						assert(!pointer.scale());
 					}
 
-					// TODO: verify application of memory_mask here.
-					address = (address & memory_mask) + (base & memory_mask);
+					address = (address & memory_mask) + (base & memory_mask) + instruction.displacement();
 
 					if constexpr (is_write) {
 						value = memory.template read<DataT>(
diff --git a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm
index 27cac21bc..ed483bf25 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
@@ -46,10 +46,8 @@ using namespace InstructionSet::x86;
 	} registers;
 
 	struct Memory {
-		template<typename DataT> DataT read(Source segment, uint32_t address) {
-			(void)segment;
-			(void)address;
-			printf("Access at %08x\n", address);
+		template<typename DataT> DataT read(Source, uint32_t address) {
+			if(address == 01234 + 0x00ee) return 0xff;
 			return 0;
 		}
 		template<typename DataT> void write(Source, uint32_t, DataT) {
@@ -58,9 +56,7 @@ using namespace InstructionSet::x86;
 
 	} memory;
 
-	const auto instruction = Instruction<false>();/*[self
-		instruction16WithSourceDataPointer:pointer];*/
-
+	const auto instruction = Instruction<false>();
 	const uint8_t value = DataPointerResolver<
 		Model::i8086, Registers, Memory>::read<uint8_t>(
 			registers,
@@ -69,7 +65,7 @@ using namespace InstructionSet::x86;
 			pointer
 		);
 
-	printf("%d\n", value);
+	XCTAssertEqual(value, 0xff);
 }
 
 @end

From d8601ef01f70197f8d8eb8fdc61f2a0da30276a1 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 28 Feb 2022 09:54:29 -0500
Subject: [PATCH 033/104] Add missing hex specifier. Test now passes.

---
 OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm
index ed483bf25..6917bb23a 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
@@ -33,9 +33,6 @@ using namespace InstructionSet::x86;
 	struct Registers {
 		uint16_t ax = 0x1234, di = 0x00ee;
 
-		template <typename DataT, Register r> void write(DataT) {
-			assert(false);
-		}
 		template <typename DataT, Register r> DataT read() {
 			switch(r) {
 				case Register::AX:	return ax;
@@ -43,17 +40,19 @@ using namespace InstructionSet::x86;
 				default: return 0;
 			}
 		}
+		template <typename DataT, Register r> void write(DataT) {
+			assert(false);
+		}
 	} registers;
 
 	struct Memory {
 		template<typename DataT> DataT read(Source, uint32_t address) {
-			if(address == 01234 + 0x00ee) return 0xff;
+			if(address == 0x1234 + 0x00ee) return 0xff;
 			return 0;
 		}
 		template<typename DataT> void write(Source, uint32_t, DataT) {
 			assert(false);
 		}
-
 	} memory;
 
 	const auto instruction = Instruction<false>();

From b920507f34952af81f89d34c98582649ce0069fd Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 28 Feb 2022 10:03:58 -0500
Subject: [PATCH 034/104] Double down on `AddressT`, add an `assert` on
 `memory_mask`.

---
 InstructionSets/x86/DataPointerResolver.hpp | 11 ++++++-----
 InstructionSets/x86/Instruction.hpp         |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/InstructionSets/x86/DataPointerResolver.hpp b/InstructionSets/x86/DataPointerResolver.hpp
index e560d6f6b..4ad3ea766 100644
--- a/InstructionSets/x86/DataPointerResolver.hpp
+++ b/InstructionSets/x86/DataPointerResolver.hpp
@@ -56,7 +56,7 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 			MemoryT &memory,
 			const Instruction<is_32bit(model)> &instruction,
 			DataPointer pointer,
-			typename Instruction<is_32bit(model)>::ImmediateT memory_mask = ~0) {
+			typename Instruction<is_32bit(model)>::AddressT memory_mask = ~0) {
 				DataT result;
 				access<true>(registers, memory, instruction, pointer, memory_mask, result);
 				return result;
@@ -68,7 +68,7 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 			const Instruction<is_32bit(model)> &instruction,
 			DataPointer pointer,
 			DataT value,
-			typename Instruction<is_32bit(model)>::ImmediateT memory_mask = ~0) {
+			typename Instruction<is_32bit(model)>::AddressT memory_mask = ~0) {
 				access<false>(registers, memory, instruction, pointer, memory_mask, value);
 			}
 
@@ -78,13 +78,14 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 			MemoryT &memory,
 			const Instruction<is_32bit(model)> &instruction,
 			DataPointer pointer,
-			typename Instruction<is_32bit(model)>::ImmediateT memory_mask,
+			typename Instruction<is_32bit(model)>::AddressT memory_mask,
 			DataT &value) {
+				assert(memory_mask == 0xffff'ffff || memory_mask == 0xffff);
 				const Source source = pointer.source();
 
 #define read_or_write(v, x, is_for_indirection)															\
 	case Source::x:																						\
-		if constexpr(!is_for_indirection && is_write) {													\
+		if constexpr (!is_for_indirection && is_write) {												\
 			registers.template write<decltype(v), register_for_source<decltype(v)>(Source::x)>(v);		\
 		} else {																						\
 			v = registers.template read<decltype(v), register_for_source<decltype(v)>(Source::x)>();	\
@@ -118,7 +119,7 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 				break;
 
 				case Source::Indirect: {
-					using AddressT = typename Instruction<is_32bit(model)>::AddressComponentT;
+					using AddressT = typename Instruction<is_32bit(model)>::AddressT;
 					AddressT base = 0, index = 0;
 
 #define f(x, y) read_or_write(x, y, true)
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 4358c7dbc..6a6268614 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -511,7 +511,7 @@ template<bool is_32bit> class Instruction {
 
 		using DisplacementT = typename std::conditional<is_32bit, int32_t, int16_t>::type;
 		using ImmediateT = typename std::conditional<is_32bit, uint32_t, uint16_t>::type;
-		using AddressComponentT = ImmediateT;
+		using AddressT = ImmediateT;
 
 		/* Note to self — current thinking is:
 

From 2c816db45e8fe81fd02c385cdd6290a897a68c7f Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Tue, 1 Mar 2022 09:36:37 -0500
Subject: [PATCH 035/104] Refactor: (i) to expose effective address
 calculation; and (ii) to include address size in Instruction.

---
 InstructionSets/x86/DataPointerResolver.hpp | 352 +++++++++++---------
 InstructionSets/x86/Instruction.hpp         |  22 +-
 2 files changed, 213 insertions(+), 161 deletions(-)

diff --git a/InstructionSets/x86/DataPointerResolver.hpp b/InstructionSets/x86/DataPointerResolver.hpp
index 4ad3ea766..5abbacead 100644
--- a/InstructionSets/x86/DataPointerResolver.hpp
+++ b/InstructionSets/x86/DataPointerResolver.hpp
@@ -39,6 +39,63 @@ enum class Register: uint8_t {
 	None
 };
 
+template <typename DataT> constexpr Register register_for_source(Source source) {
+	static_assert(sizeof(DataT) == 4 || sizeof(DataT) == 2 || sizeof(DataT) == 1);
+
+	if constexpr (sizeof(DataT) == 4) {
+		switch(source) {
+			case Source::eAX:		return Register::EAX;
+			case Source::eCX:		return Register::ECX;
+			case Source::eDX:		return Register::EDX;
+			case Source::eBX:		return Register::EBX;
+			case Source::eSPorAH:	return Register::ESP;
+			case Source::eBPorCH:	return Register::EBP;
+			case Source::eSIorDH:	return Register::ESI;
+			case Source::eDIorBH:	return Register::EDI;
+
+			default:				break;
+		}
+	}
+
+	if constexpr (sizeof(DataT) == 2) {
+		switch(source) {
+			case Source::eAX:		return Register::AX;
+			case Source::eCX:		return Register::CX;
+			case Source::eDX:		return Register::DX;
+			case Source::eBX:		return Register::BX;
+			case Source::eSPorAH:	return Register::SP;
+			case Source::eBPorCH:	return Register::BP;
+			case Source::eSIorDH:	return Register::SI;
+			case Source::eDIorBH:	return Register::DI;
+			case Source::ES:		return Register::ES;
+			case Source::CS:		return Register::CS;
+			case Source::SS:		return Register::SS;
+			case Source::DS:		return Register::DS;
+			case Source::FS:		return Register::FS;
+			case Source::GS:		return Register::GS;
+
+			default:				break;
+		}
+	}
+
+	if constexpr (sizeof(DataT) == 1) {
+		switch(source) {
+			case Source::eAX:		return Register::AL;
+			case Source::eCX:		return Register::CL;
+			case Source::eDX:		return Register::DL;
+			case Source::eBX:		return Register::BL;
+			case Source::eSPorAH:	return Register::AH;
+			case Source::eBPorCH:	return Register::CH;
+			case Source::eSIorDH:	return Register::DH;
+			case Source::eDIorBH:	return Register::BH;
+
+			default:				break;
+		}
+	}
+
+	return Register::None;
+}
+
 /// Reads from or writes to the source or target identified by a DataPointer, relying upon two user-supplied classes:
 ///
 /// * a register bank; and
@@ -51,26 +108,28 @@ enum class Register: uint8_t {
 /// `template<typename DataT> void write(Source segment, uint32_t address, DataT value)`.
 template <Model model, typename RegistersT, typename MemoryT> class DataPointerResolver {
 	public:
+	public:
+		/// Reads the data pointed to by @c pointer, referencing @c instruction, @c memory and @c registers as necessary.
 		template <typename DataT> static DataT read(
 			RegistersT &registers,
 			MemoryT &memory,
 			const Instruction<is_32bit(model)> &instruction,
-			DataPointer pointer,
-			typename Instruction<is_32bit(model)>::AddressT memory_mask = ~0) {
-				DataT result;
-				access<true>(registers, memory, instruction, pointer, memory_mask, result);
-				return result;
-			}
+			DataPointer pointer);
 
+		/// Writes @c value to the data pointed to by @c pointer, referencing @c instruction, @c memory and @c registers as necessary.
 		template <typename DataT> static void write(
 			RegistersT &registers,
 			MemoryT &memory,
 			const Instruction<is_32bit(model)> &instruction,
 			DataPointer pointer,
-			DataT value,
-			typename Instruction<is_32bit(model)>::AddressT memory_mask = ~0) {
-				access<false>(registers, memory, instruction, pointer, memory_mask, value);
-			}
+			DataT value);
+
+		/// Computes the effective address of @c pointer including any displacement applied by @c instruction.
+		/// @c pointer must be of type Source::Indirect.
+		static uint32_t effective_address(
+			RegistersT &registers,
+			const Instruction<is_32bit(model)> &instruction,
+			DataPointer pointer);
 
 	private:
 		template <bool is_write, typename DataT> static void access(
@@ -78,150 +137,139 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 			MemoryT &memory,
 			const Instruction<is_32bit(model)> &instruction,
 			DataPointer pointer,
-			typename Instruction<is_32bit(model)>::AddressT memory_mask,
-			DataT &value) {
-				assert(memory_mask == 0xffff'ffff || memory_mask == 0xffff);
-				const Source source = pointer.source();
-
-#define read_or_write(v, x, is_for_indirection)															\
-	case Source::x:																						\
-		if constexpr (!is_for_indirection && is_write) {												\
-			registers.template write<decltype(v), register_for_source<decltype(v)>(Source::x)>(v);		\
-		} else {																						\
-			v = registers.template read<decltype(v), register_for_source<decltype(v)>(Source::x)>();	\
-		}																								\
-	break;
-
-#define ALLREGS(v)	f(v, eAX); f(v, eCX); f(v, eDX); f(v, eBX); \
-					f(v, eSPorAH); f(v, eBPorCH); f(v, eSIorDH); f(v, eDIorBH); \
-					f(v, ES); f(v, CS); f(v, SS); f(v, DS); f(v, FS); f(v, GS);
-
-			switch(source) {
-				default:
-					if constexpr (!is_write) {
-						value = 0;
-					}
-				return;
-
-#define f(x, y) read_or_write(x, y, false)
-				ALLREGS(value);
-#undef f
-
-				case Source::DirectAddress:
-					if constexpr(is_write) {
-						memory.template write<DataT>(instruction.data_segment(), instruction.displacement(), value);
-					} else {
-						value = memory.template read<DataT>(instruction.data_segment(), instruction.displacement());
-					}
-				break;
-				case Source::Immediate:
-					value = DataT(instruction.operand());
-				break;
-
-				case Source::Indirect: {
-					using AddressT = typename Instruction<is_32bit(model)>::AddressT;
-					AddressT base = 0, index = 0;
-
-#define f(x, y) read_or_write(x, y, true)
-					switch(pointer.base()) {
-						default: break;
-						ALLREGS(base);
-					}
-
-					switch(pointer.index()) {
-						default: break;
-						ALLREGS(index);
-					}
-#undef f
-
-					// Always compute address as 32-bit.
-					// TODO: verify application of memory_mask here.
-					// The point of memory_mask is that 32-bit x86 offers the memory size modifier,
-					// permitting 16-bit addresses to be generated in 32-bit mode and vice versa.
-					// To figure out is at what point in the calculation the 16-bit constraint is
-					// applied when active.
-					uint32_t address = index;
-					if constexpr (model >= Model::i80386) {
-						address <<= pointer.scale();
-					} else {
-						assert(!pointer.scale());
-					}
-
-					address = (address & memory_mask) + (base & memory_mask) + instruction.displacement();
-
-					if constexpr (is_write) {
-						value = memory.template read<DataT>(
-							instruction.data_segment(),
-							address
-						);
-					} else {
-						memory.template write<DataT>(
-							instruction.data_segment(),
-							address,
-							value
-						);
-					}
-				}
-			}
-#undef ALLREGS
-		}
-
-		template <typename DataT> constexpr static Register register_for_source(Source source) {
-			if constexpr (sizeof(DataT) == 4) {
-				switch(source) {
-					case Source::eAX:		return Register::EAX;
-					case Source::eCX:		return Register::ECX;
-					case Source::eDX:		return Register::EDX;
-					case Source::eBX:		return Register::EBX;
-					case Source::eSPorAH:	return Register::ESP;
-					case Source::eBPorCH:	return Register::EBP;
-					case Source::eSIorDH:	return Register::ESI;
-					case Source::eDIorBH:	return Register::EDI;
-
-					default:				break;
-				}
-			}
-
-			if constexpr (sizeof(DataT) == 2) {
-				switch(source) {
-					case Source::eAX:		return Register::AX;
-					case Source::eCX:		return Register::CX;
-					case Source::eDX:		return Register::DX;
-					case Source::eBX:		return Register::BX;
-					case Source::eSPorAH:	return Register::SP;
-					case Source::eBPorCH:	return Register::BP;
-					case Source::eSIorDH:	return Register::SI;
-					case Source::eDIorBH:	return Register::DI;
-					case Source::ES:		return Register::ES;
-					case Source::CS:		return Register::CS;
-					case Source::SS:		return Register::SS;
-					case Source::DS:		return Register::DS;
-					case Source::FS:		return Register::FS;
-					case Source::GS:		return Register::GS;
-
-					default:				break;
-				}
-			}
-
-			if constexpr (sizeof(DataT) == 1) {
-				switch(source) {
-					case Source::eAX:		return Register::AL;
-					case Source::eCX:		return Register::CL;
-					case Source::eDX:		return Register::DL;
-					case Source::eBX:		return Register::BL;
-					case Source::eSPorAH:	return Register::AH;
-					case Source::eBPorCH:	return Register::CH;
-					case Source::eSIorDH:	return Register::DH;
-					case Source::eDIorBH:	return Register::BH;
-
-					default:				break;
-				}
-			}
-
-			return Register::None;
-		}
+			DataT &value);
 };
 
+
+//
+//	Implementation begins here.
+//
+
+template <Model model, typename RegistersT, typename MemoryT>
+template <typename DataT> DataT DataPointerResolver<model, RegistersT, MemoryT>::read(
+	RegistersT &registers,
+	MemoryT &memory,
+	const Instruction<is_32bit(model)> &instruction,
+	DataPointer pointer) {
+		DataT result;
+		access<true>(registers, memory, instruction, pointer, result);
+		return result;
+	}
+
+template <Model model, typename RegistersT, typename MemoryT>
+template <typename DataT> void DataPointerResolver<model, RegistersT, MemoryT>::write(
+	RegistersT &registers,
+	MemoryT &memory,
+	const Instruction<is_32bit(model)> &instruction,
+	DataPointer pointer,
+	DataT value) {
+		access<false>(registers, memory, instruction, pointer, value);
+	}
+
+#define rw(v, r, is_write)														\
+	case Source::r: {															\
+		if constexpr (is_write) {												\
+			registers.template write<decltype(v), register_for_source<decltype(v)>(Source::r)>(v);						\
+		} else {																\
+			v = registers.template read<decltype(v), register_for_source<decltype(v)>(Source::r)>();					\
+		}																		\
+	} break;
+
+#define ALLREGS(v, i)	rw(v, eAX, i); 		rw(v, eCX, i); 		\
+						rw(v, eDX, i);		rw(v, eBX, i); 		\
+						rw(v, eSPorAH, i);	rw(v, eBPorCH, i);	\
+						rw(v, eSIorDH, i);	rw(v, eDIorBH, i);	\
+						rw(v, ES, i);		rw(v, CS, i); 		\
+						rw(v, SS, i);		rw(v, DS, i); 		\
+						rw(v, FS, i);		rw(v, GS, i);
+
+template <Model model, typename RegistersT, typename MemoryT>
+uint32_t DataPointerResolver<model, RegistersT, MemoryT>::effective_address(
+	RegistersT &registers,
+	const Instruction<is_32bit(model)> &instruction,
+	DataPointer pointer) {
+		using AddressT = typename Instruction<is_32bit(model)>::AddressT;
+		AddressT base = 0, index = 0;
+
+		switch(pointer.base()) {
+			default: break;
+			ALLREGS(base, false);
+		}
+
+		switch(pointer.index()) {
+			default: break;
+			ALLREGS(index, false);
+		}
+
+		// Always compute address as 32-bit.
+		// TODO: verify application of memory_mask around here.
+		// The point of memory_mask is that 32-bit x86 offers the memory size modifier,
+		// permitting 16-bit addresses to be generated in 32-bit mode and vice versa.
+		// To figure out is at what point in the calculation the 16-bit constraint is
+		// applied when active.
+		uint32_t address = index;
+		if constexpr (model >= Model::i80386) {
+			address <<= pointer.scale();
+		} else {
+			assert(!pointer.scale());
+		}
+
+		constexpr uint32_t memory_masks[] = {0x0000'ffff, 0xffff'ffff};
+		const uint32_t memory_mask = memory_masks[instruction.address_size_is_32()];
+		address = (address & memory_mask) + (base & memory_mask) + instruction.displacement();
+		return address;
+	}
+
+template <Model model, typename RegistersT, typename MemoryT>
+template <bool is_write, typename DataT> void DataPointerResolver<model, RegistersT, MemoryT>::access(
+	RegistersT &registers,
+	MemoryT &memory,
+	const Instruction<is_32bit(model)> &instruction,
+	DataPointer pointer,
+	DataT &value) {
+		const Source source = pointer.source();
+
+		switch(source) {
+			default:
+				if constexpr (!is_write) {
+					value = 0;
+				}
+			return;
+
+			ALLREGS(value, is_write);
+
+			case Source::DirectAddress:
+				if constexpr(is_write) {
+					memory.template write<DataT>(instruction.data_segment(), instruction.displacement(), value);
+				} else {
+					value = memory.template read<DataT>(instruction.data_segment(), instruction.displacement());
+				}
+			break;
+			case Source::Immediate:
+				value = DataT(instruction.operand());
+			break;
+
+			case Source::Indirect: {
+				const auto address = effective_address(registers, instruction, pointer);
+
+				if constexpr (is_write) {
+					value = memory.template read<DataT>(
+						instruction.data_segment(),
+						address
+					);
+				} else {
+					memory.template write<DataT>(
+						instruction.data_segment(),
+						address,
+						value
+					);
+				}
+			}
+		}
+	}
+#undef ALLREGS
+#undef read_or_write
+
 }
 }
 
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 6a6268614..80eda82cd 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -522,14 +522,18 @@ template<bool is_32bit> class Instruction {
 				8 bits operation;
 				4 bits original instruction size;
 				2 bits data size;
-				3 bits extension flags.
+				1 bit memory size;
+				2 bits extension flags.
 
 			Extensions (16 or 32 bit, depending on templated size):
-				1) reptition + segment override + lock + memory size toggle (= 7 bits);
+				1) reptition + segment override + lock + original instruction size (= 10 bits);
 				2) displacement;
 				3) immediate operand.
 
-			Presence or absence of extensions is dictated by the extention flags.
+			Presence or absence of extensions is dictated by:
+				* instruction size = 0 => the repetition, etc extension (including the real extension size); and
+				* the extension flags for displacement and/or immediate.
+
 			Therefore an instruction's footprint is:
 				* 4–8 bytes (16-bit processors);
 				* 4–12 bytes (32-bit processors).
@@ -537,9 +541,9 @@ template<bool is_32bit> class Instruction {
 			I'll then implement a collection suited to packing these things based on their
 			packing_size(), and later iterating them.
 
-			To verify: do the 8086 and 80286 limit instructions to 15 bytes as later members
-			of the family do? If not then consider original instruction size = 0 to imply an
-			extension of one word prior to the other extensions.
+			To verify: the 8086 allows unlimited-length instructions (which I'll probably handle by
+			generating length-15 NOPs and not resetting parser state), the 80386 limits them to
+			15 bytes, but what do the processors in between do?
 		*/
 
 	private:
@@ -570,7 +574,7 @@ template<bool is_32bit> class Instruction {
 		DataPointer source() const		{	return DataPointer(Source(sources_ & 0x3f), sib_);			}
 		DataPointer destination() const	{	return DataPointer(Source((sources_ >> 6) & 0x3f), sib_);		}
 		bool lock() const				{	return sources_ & 0x8000;					}
-		bool address_size() const 		{	return address_size_;						}
+		bool address_size_is_32() const {	return address_size_;						}
 		Source data_segment() const		{
 			const auto segment_override = Source((sources_ >> 12) & 7);
 			if(segment_override != Source::None) return segment_override;
@@ -586,8 +590,8 @@ template<bool is_32bit> class Instruction {
 		uint16_t segment() const		{	return uint16_t(operand_);					}
 		uint16_t offset() const			{	return uint16_t(displacement_);				}
 
-		DisplacementT displacement() const	{	return displacement_;						}
-		ImmediateT operand() const			{	return operand_;							}
+		DisplacementT displacement() const	{	return displacement_;					}
+		ImmediateT operand() const			{	return operand_;						}
 
 		Instruction() noexcept {}
 		Instruction(

From 5e7a142ff1d6e8ed8136004ec3932c49c037c659 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Tue, 1 Mar 2022 16:51:54 -0500
Subject: [PATCH 036/104] Fix `is_write` errors, update comment, add additional
 source for asserts.

---
 InstructionSets/x86/DataPointerResolver.hpp   | 96 ++++++++++++-------
 .../Clock SignalTests/x86DataPointerTests.mm  |  1 +
 2 files changed, 64 insertions(+), 33 deletions(-)

diff --git a/InstructionSets/x86/DataPointerResolver.hpp b/InstructionSets/x86/DataPointerResolver.hpp
index 5abbacead..e74744d21 100644
--- a/InstructionSets/x86/DataPointerResolver.hpp
+++ b/InstructionSets/x86/DataPointerResolver.hpp
@@ -22,23 +22,49 @@ namespace x86 {
 /// listed separately and uniquely, rather than being eAX+size or
 /// eSPorAH with a size of 1.
 enum class Register: uint8_t {
-	AL, AH, AX, EAX,
-	CL, CH, CX, ECX,
-	DL, DH, DX, EDX,
-	BL, BH, BX, EBX,
-	SP, ESP,
-	BP, EBP,
-	SI, ESI,
-	DI, EDI,
-	ES,
-	CS,
-	SS,
-	DS,
-	FS,
-	GS,
+	// 8-bit registers.
+	AL, AH,
+	CL, CH,
+	DL, DH,
+	BL, BH,
+
+	// 16-bit registers.
+	AX, CX, DX, BX,
+	SP, BP, SI, DI,
+	ES, CS, SS, DS,
+	FS, GS,
+
+	// 32-bit registers.
+	EAX, ECX, EDX, EBX,
+	ESP, EBP, ESI, EDI,
+
+	//
 	None
 };
 
+/// @returns @c true if @c r is the same size as @c DataT; @c false otherwise.
+/// @discussion Provided primarily to aid in asserts; if the decoder and resolver are both
+/// working then it shouldn't be necessary to test this in register files.
+template <typename DataT> constexpr bool is_sized(Register r) {
+	static_assert(sizeof(DataT) == 4 || sizeof(DataT) == 2 || sizeof(DataT) == 1);
+
+	if constexpr (sizeof(DataT) == 4) {
+		return r >= Register::EAX && r < Register::None;
+	}
+
+	if constexpr (sizeof(DataT) == 2) {
+		return r >= Register::AX && r < Register::EAX;
+	}
+
+	if constexpr (sizeof(DataT) == 1) {
+		return r >= Register::AL && r < Register::AX;
+	}
+
+	return false;
+}
+
+/// @returns the proper @c Register given @c source and data of size @c sizeof(DataT),
+/// or Register::None if no such register exists (e.g. asking for a 32-bit version of CS).
 template <typename DataT> constexpr Register register_for_source(Source source) {
 	static_assert(sizeof(DataT) == 4 || sizeof(DataT) == 2 || sizeof(DataT) == 1);
 
@@ -152,7 +178,7 @@ template <typename DataT> DataT DataPointerResolver<model, RegistersT, MemoryT>:
 	const Instruction<is_32bit(model)> &instruction,
 	DataPointer pointer) {
 		DataT result;
-		access<true>(registers, memory, instruction, pointer, result);
+		access<false>(registers, memory, instruction, pointer, result);
 		return result;
 	}
 
@@ -163,17 +189,18 @@ template <typename DataT> void DataPointerResolver<model, RegistersT, MemoryT>::
 	const Instruction<is_32bit(model)> &instruction,
 	DataPointer pointer,
 	DataT value) {
-		access<false>(registers, memory, instruction, pointer, value);
+		access<true>(registers, memory, instruction, pointer, value);
 	}
 
 #define rw(v, r, is_write)														\
-	case Source::r: {															\
+	case Source::r:																\
+		using VType = typename std::remove_reference<decltype(v)>::type;		\
 		if constexpr (is_write) {												\
-			registers.template write<decltype(v), register_for_source<decltype(v)>(Source::r)>(v);						\
+			registers.template write<VType, register_for_source<VType>(Source::r)>(v);		\
 		} else {																\
-			v = registers.template read<decltype(v), register_for_source<decltype(v)>(Source::r)>();					\
+			 v = registers.template read<VType, register_for_source<VType>(Source::r)>();	\
 		}																		\
-	} break;
+	break;
 
 #define ALLREGS(v, i)	rw(v, eAX, i); 		rw(v, eCX, i); 		\
 						rw(v, eDX, i);		rw(v, eBX, i); 		\
@@ -201,12 +228,6 @@ uint32_t DataPointerResolver<model, RegistersT, MemoryT>::effective_address(
 			ALLREGS(index, false);
 		}
 
-		// Always compute address as 32-bit.
-		// TODO: verify application of memory_mask around here.
-		// The point of memory_mask is that 32-bit x86 offers the memory size modifier,
-		// permitting 16-bit addresses to be generated in 32-bit mode and vice versa.
-		// To figure out is at what point in the calculation the 16-bit constraint is
-		// applied when active.
 		uint32_t address = index;
 		if constexpr (model >= Model::i80386) {
 			address <<= pointer.scale();
@@ -214,6 +235,15 @@ uint32_t DataPointerResolver<model, RegistersT, MemoryT>::effective_address(
 			assert(!pointer.scale());
 		}
 
+		// Always compute address as 32-bit.
+		// TODO: verify use of memory_mask around here.
+		// Also I think possibly an exception is supposed to be generated
+		// if the programmer is in 32-bit mode and has asked for 16-bit
+		// address computation but generated e.g. a 17-bit result. Look into
+		// that when working on execution. For now the goal is merely decoding
+		// and this code exists both to verify the presence of all necessary
+		// fields and to help to explore the best breakdown of storage
+		// within Instruction.
 		constexpr uint32_t memory_masks[] = {0x0000'ffff, 0xffff'ffff};
 		const uint32_t memory_mask = memory_masks[instruction.address_size_is_32()];
 		address = (address & memory_mask) + (base & memory_mask) + instruction.displacement();
@@ -240,7 +270,7 @@ template <bool is_write, typename DataT> void DataPointerResolver<model, Registe
 
 			case Source::DirectAddress:
 				if constexpr(is_write) {
-					memory.template write<DataT>(instruction.data_segment(), instruction.displacement(), value);
+					memory.template write(instruction.data_segment(), instruction.displacement(), value);
 				} else {
 					value = memory.template read<DataT>(instruction.data_segment(), instruction.displacement());
 				}
@@ -253,16 +283,16 @@ template <bool is_write, typename DataT> void DataPointerResolver<model, Registe
 				const auto address = effective_address(registers, instruction, pointer);
 
 				if constexpr (is_write) {
-					value = memory.template read<DataT>(
-						instruction.data_segment(),
-						address
-					);
-				} else {
-					memory.template write<DataT>(
+					memory.template write(
 						instruction.data_segment(),
 						address,
 						value
 					);
+				} else {
+					value = memory.template read<DataT>(
+						instruction.data_segment(),
+						address
+					);
 				}
 			}
 		}
diff --git a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm
index 6917bb23a..efa133f17 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
@@ -34,6 +34,7 @@ using namespace InstructionSet::x86;
 		uint16_t ax = 0x1234, di = 0x00ee;
 
 		template <typename DataT, Register r> DataT read() {
+			assert(is_sized<DataT>(r));
 			switch(r) {
 				case Register::AX:	return ax;
 				case Register::DI:	return di;

From 8ee62b4789069dbd5ab2695dd6c573d662cdd79b Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Tue, 1 Mar 2022 17:29:26 -0500
Subject: [PATCH 037/104] Simplify address size semantics.

Since it'll no longer be a mode-dependant toggle, but a fully-retained value.
---
 InstructionSets/x86/DataPointerResolver.hpp   |  2 +-
 InstructionSets/x86/Decoder.cpp               | 10 +++----
 InstructionSets/x86/Decoder.hpp               |  4 +--
 InstructionSets/x86/Instruction.hpp           | 28 ++++++++++++-------
 .../Mac/Clock SignalTests/x86DecoderTests.mm  |  6 ++--
 5 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/InstructionSets/x86/DataPointerResolver.hpp b/InstructionSets/x86/DataPointerResolver.hpp
index e74744d21..c4f95c055 100644
--- a/InstructionSets/x86/DataPointerResolver.hpp
+++ b/InstructionSets/x86/DataPointerResolver.hpp
@@ -245,7 +245,7 @@ uint32_t DataPointerResolver<model, RegistersT, MemoryT>::effective_address(
 		// fields and to help to explore the best breakdown of storage
 		// within Instruction.
 		constexpr uint32_t memory_masks[] = {0x0000'ffff, 0xffff'ffff};
-		const uint32_t memory_mask = memory_masks[instruction.address_size_is_32()];
+		const uint32_t memory_mask = memory_masks[int(instruction.address_size())];
 		address = (address & memory_mask) + (base & memory_mask) + instruction.displacement();
 		return address;
 	}
diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 45879036f..0dcfdf5b8 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -179,10 +179,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				RequiresMin(i80286);
 				MemRegReg(ARPL, MemReg_Reg, 2);
 			break;
-			case 0x67:
-				RequiresMin(i80386);
-				address_size_ = true;
-			break;
+//			case 0x67:
+//				RequiresMin(i80386);
+//				address_size_ = true;
+//			break;
 			case 0x6c:	// INSB
 				RequiresMin(i80186);
 				Complete(INS, None, None, 1);
@@ -710,7 +710,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				address_size_,
 				segment_override_,
 				repetition_,
-				Size(operation_size_),
+				DataSize(operation_size_),
 				displacement_,
 				operand_)
 		);
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 30d6bd92e..43b6b9b51 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -148,7 +148,7 @@ template <Model model> class Decoder {
 		// Prefix capture fields.
 		Repetition repetition_ = Repetition::None;
 		bool lock_ = false;
-		bool address_size_ = false;
+		AddressSize address_size_ = AddressSize::b16;
 		Source segment_override_ = Source::None;
 
 		/// Resets size capture and all fields with default values.
@@ -157,7 +157,7 @@ template <Model model> class Decoder {
 			displacement_size_ = operand_size_ = 0;
 			displacement_ = operand_ = 0;
 			lock_ = false;
-			address_size_ = false;
+			address_size_ = AddressSize::b16;
 			segment_override_ = Source::None;
 			repetition_ = Repetition::None;
 			phase_ = Phase::Instruction;
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 80eda82cd..1240756a6 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -321,11 +321,16 @@ enum class Operation: uint8_t {
 	LODSD,
 };
 
-enum class Size: uint8_t {
+enum class DataSize: uint8_t {
 	Implied = 0,
 	Byte = 1,
 	Word = 2,
-	DWord = 4,
+	DWord = 3,
+};
+
+enum class AddressSize: uint8_t {
+	b16 = 0,
+	b32 = 1,
 };
 
 enum class Source: uint8_t {
@@ -563,7 +568,7 @@ template<bool is_32bit> class Instruction {
 
 		// Fields yet to be properly incorporated...
 		ScaleIndexBase sib_;
-		bool address_size_ = false;
+		AddressSize address_size_ = AddressSize::b16;
 
 	public:
 		/// @returns The number of bytes used for meaningful content within this class. A receiver must use at least @c sizeof(Instruction) bytes
@@ -572,10 +577,13 @@ template<bool is_32bit> class Instruction {
 		size_t packing_size() const		{	return sizeof(*this);	/* TODO */	}
 
 		DataPointer source() const		{	return DataPointer(Source(sources_ & 0x3f), sib_);			}
-		DataPointer destination() const	{	return DataPointer(Source((sources_ >> 6) & 0x3f), sib_);		}
-		bool lock() const				{	return sources_ & 0x8000;					}
-		bool address_size_is_32() const {	return address_size_;						}
-		Source data_segment() const		{
+		DataPointer destination() const	{	return DataPointer(Source((sources_ >> 6) & 0x3f), sib_);	}
+		bool lock() const				{	return sources_ & 0x8000;									}
+
+		AddressSize address_size() const {
+			return AddressSize(address_size_);
+		}
+		Source data_segment() const {
 			const auto segment_override = Source((sources_ >> 12) & 7);
 			if(segment_override != Source::None) return segment_override;
 
@@ -584,7 +592,7 @@ template<bool is_32bit> class Instruction {
 		}
 
 		Repetition repetition() const	{	return Repetition(repetition_size_ & 3);	}
-		Size operation_size() const 	{	return Size(repetition_size_ >> 2);			}
+		DataSize operation_size() const {	return DataSize(repetition_size_ >> 2);		}
 
 		// TODO: confirm whether far call for some reason makes these 32-bit in protected mode.
 		uint16_t segment() const		{	return uint16_t(operand_);					}
@@ -600,10 +608,10 @@ template<bool is_32bit> class Instruction {
 			Source destination,
 			ScaleIndexBase sib,
 			bool lock,
-			bool address_size,
+			AddressSize address_size,
 			Source segment_override,
 			Repetition repetition,
-			Size operation_size,
+			DataSize operation_size,
 			DisplacementT displacement,
 			ImmediateT operand) noexcept :
 				operation(operation),
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index fe3354026..cc91cb5ed 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -20,13 +20,13 @@ using Operation = InstructionSet::x86::Operation;
 using Instruction = InstructionSet::x86::Instruction<false>;
 using Model = InstructionSet::x86::Model;
 using Source = InstructionSet::x86::Source;
-using Size = InstructionSet::x86::Size;
+using Size = InstructionSet::x86::DataSize;
 using ScaleIndexBase = InstructionSet::x86::ScaleIndexBase;
 
 // MARK: - Specific instruction asserts.
 
 template <typename InstructionT> void test(const InstructionT &instruction, int size, Operation operation) {
-	XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::Size(size));
+	XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::DataSize(size));
 	XCTAssertEqual(instruction.operation, operation);
 }
 
@@ -39,7 +39,7 @@ template <typename InstructionT> void test(
 	std::optional<typename InstructionT::ImmediateT> operand = std::nullopt,
 	std::optional<typename InstructionT::DisplacementT> displacement = std::nullopt) {
 
-	XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::Size(size));
+	XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::DataSize(size));
 	XCTAssertEqual(instruction.operation, operation);
 	XCTAssert(instruction.source() == source);
 	if(destination) XCTAssert(instruction.destination() == *destination);

From d1148c4cabb3ead21b0322c4341b72464fcca81a Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Tue, 1 Mar 2022 17:30:41 -0500
Subject: [PATCH 038/104] Switch to constexpr function, for guaranteed
 semantics.

---
 InstructionSets/x86/Model.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InstructionSets/x86/Model.hpp b/InstructionSets/x86/Model.hpp
index 8f8189556..304214475 100644
--- a/InstructionSets/x86/Model.hpp
+++ b/InstructionSets/x86/Model.hpp
@@ -19,7 +19,7 @@ enum class Model {
 	i80386,
 };
 
-#define is_32bit(model) (model >= Model::i80386)
+static constexpr bool is_32bit(Model model) { return model >= Model::i80386; }
 
 }
 }

From 4b4135e35a054c744a6e0b504dc1dab3fabb2d66 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Tue, 1 Mar 2022 18:23:24 -0500
Subject: [PATCH 039/104] Correct #undef.

---
 InstructionSets/x86/DataPointerResolver.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InstructionSets/x86/DataPointerResolver.hpp b/InstructionSets/x86/DataPointerResolver.hpp
index c4f95c055..c6754ee08 100644
--- a/InstructionSets/x86/DataPointerResolver.hpp
+++ b/InstructionSets/x86/DataPointerResolver.hpp
@@ -298,7 +298,7 @@ template <bool is_write, typename DataT> void DataPointerResolver<model, Registe
 		}
 	}
 #undef ALLREGS
-#undef read_or_write
+#undef rw
 
 }
 }

From 8080d1d961bf7b1abcdc519988d49a45b78c76ba Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Tue, 1 Mar 2022 20:22:43 -0500
Subject: [PATCH 040/104] Extend test case slightly.

---
 .../Clock SignalTests/x86DataPointerTests.mm  | 60 +++++++++++++++----
 1 file changed, 50 insertions(+), 10 deletions(-)

diff --git a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm
index efa133f17..e9b01f5e9 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
@@ -9,6 +9,7 @@
 #import <XCTest/XCTest.h>
 
 #include "../../../InstructionSets/x86/DataPointerResolver.hpp"
+#include <map>
 
 using namespace InstructionSet::x86;
 
@@ -25,47 +26,86 @@ using namespace InstructionSet::x86;
 //}
 
 
-- (void)testX {
-	const DataPointer pointer(
+- (void)test16bitSize1 {
+	const DataPointer indirectPointer(
 		Source::eAX, Source::eDI, 0
 	);
+	const DataPointer registerPointer(
+		Source::eBX
+	);
 
 	struct Registers {
 		uint16_t ax = 0x1234, di = 0x00ee;
+		uint8_t bl = 0xaa;
 
 		template <typename DataT, Register r> DataT read() {
 			assert(is_sized<DataT>(r));
 			switch(r) {
 				case Register::AX:	return ax;
+				case Register::BL:	return bl;
 				case Register::DI:	return di;
 				default: return 0;
 			}
 		}
-		template <typename DataT, Register r> void write(DataT) {
-			assert(false);
+		template <typename DataT, Register r> void write(DataT value) {
+			assert(is_sized<DataT>(r));
+			switch(r) {
+				case Register::BL:	bl = value;	break;
+				default: assert(false);
+			}
 		}
 	} registers;
 
 	struct Memory {
+		std::map<uint32_t, uint8_t> data;
+
 		template<typename DataT> DataT read(Source, uint32_t address) {
 			if(address == 0x1234 + 0x00ee) return 0xff;
 			return 0;
 		}
-		template<typename DataT> void write(Source, uint32_t, DataT) {
-			assert(false);
+		template<typename DataT> void write(Source, uint32_t address, DataT value) {
+			data[address] = value;
 		}
 	} memory;
 
+	// TODO: construct this more formally; the code below just assumes size = 1, which is not a contractual guarantee.
 	const auto instruction = Instruction<false>();
-	const uint8_t value = DataPointerResolver<
-		Model::i8086, Registers, Memory>::read<uint8_t>(
+
+	using Resolver = DataPointerResolver<Model::i8086, Registers, Memory>;
+	const uint8_t memoryValue = Resolver::read<uint8_t>(
 			registers,
 			memory,
 			instruction,
-			pointer
+			indirectPointer
+		);
+	registers.ax = 0x0100;
+	Resolver::write<uint8_t>(
+			registers,
+			memory,
+			instruction,
+			indirectPointer,
+			0xef
 		);
 
-	XCTAssertEqual(value, 0xff);
+	XCTAssertEqual(memoryValue, 0xff);
+	XCTAssertEqual(memory.data[0x01ee], 0xef);
+
+	const uint8_t registerValue = Resolver::read<uint8_t>(
+			registers,
+			memory,
+			instruction,
+			registerPointer
+		);
+	Resolver::write<uint8_t>(
+			registers,
+			memory,
+			instruction,
+			registerPointer,
+			0x93
+		);
+
+	XCTAssertEqual(registerValue, 0xaa);
+	XCTAssertEqual(registers.bl, 0x93);
 }
 
 @end

From 8e3ae2c78fc582893d16618b6ed28845e8667982 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 2 Mar 2022 20:00:21 -0500
Subject: [PATCH 041/104] Add opcode map as documentation.

---
 .../x86/Documentation/80386 opcode map.html   | 951 ++++++++++++++++++
 1 file changed, 951 insertions(+)
 create mode 100644 InstructionSets/x86/Documentation/80386 opcode map.html

diff --git a/InstructionSets/x86/Documentation/80386 opcode map.html b/InstructionSets/x86/Documentation/80386 opcode map.html
new file mode 100644
index 000000000..3bd234a0e
--- /dev/null
+++ b/InstructionSets/x86/Documentation/80386 opcode map.html	
@@ -0,0 +1,951 @@
+<!DOCTYPE html>
+<html>
+	<head>
+		<meta charset="UTF-8">
+		<title>80386 Opcode Map</title>
+		<style>
+			table, table th, table td {
+				border: 1px solid;
+				border-collapse: collapse;
+				text-align: center;
+			}
+
+			.codetable, .codetable th, .codetable td {
+				border: 0px;
+				border-collapse: collapse;
+				padding-right: 1em;
+				text-align: left;
+				vertical-align: top;
+			}
+			
+			.optable th, .optable td {
+				width: 5em;
+			}
+			.optable tr:nth-child(even) {
+				border-top: 3px solid;
+			}
+
+			.grouptable, .grouptable th, .grouptable td {
+				border-bottom: 3px solid;
+			}
+			.grouptable th, .grouptable td {
+				width: 4em;
+			}
+			
+			.skiprow {
+				background-color: darkgray;				
+			}
+		</style>
+	</head>
+	<body>
+		<h1>Codes for Addressing Method</h1>
+		
+		<table class="codetable">
+			<tr>
+				<td>A</td>
+				<td>Direct address; the instruction has no MODRM field; the address of the operand is encoded in the instruction; no base register, index register, or scaling factor can be applied; e.g., far JMP (EA).</td>
+			</tr>
+			<tr>
+				<td>C</td>
+				<td>The reg field of the MODRM field selects a control register; e.g., MOV (0F20, 0F22).</td>
+			</tr>
+			<tr>
+				<td>D</td>
+				<td>The reg field of the MODRM field selects a debug register; e.g., MOV (0F21, 0F23).</td>
+			</tr>
+			<tr>
+				<td>E</td>
+				<td>A MODRM field follows the opcode and specifies the operand. The operand is either a general register or a memory address. If it is a memory address, the address is computed from a segment register and any of the following values: a base register, an index register, a scaling factor, a displacement.</td>
+			</tr>
+			<tr>
+				<td>F</td>
+				<td>Flags register</td>
+			</tr>
+			<tr>
+				<td>G</td>
+				<td>The reg field of the MODRM field selects a general register; e.g,. ADD (00).</td>
+			</tr>
+			<tr>
+				<td>I</td>
+				<td>Immediate data. The value of the operand is encoded in subsequent bytes of the instruction.</td>
+			</tr>
+			<tr>
+				<td>J</td>
+				<td>The instruction contains a relative offset to be added to the instruction-pointer register; e.g., JMP short, LOOP.</td>
+			</tr>
+			<tr>
+				<td>M</td>
+				<td>The MODRM field may refer only to memory; e.g., BOUND, LES, LDS, LSS, LFS, LGS.</td>
+			</tr>
+			<tr>
+				<td>O</td>
+				<td>The instruction has no MODRM field; the offset of the operand is coded as a word or dword (depending on address sie attribute) in the instruction. No base register, index register, or scaling factor can be applied; e.g., MOV (A0–A3).</td>
+			</tr>
+			<tr>
+				<td>R</td>
+				<td>The mod field of the MODRM field may refer only to a general register; e.g., MOV(0F20–0F24, 0F26).</td>
+			</tr>
+			<tr>
+				<td>S</td>
+				<td>The reg field of the MODRM field selects a segment register; e.g., MOV (8C, 8E).</td>
+			</tr>
+			<tr>
+				<td>T</td>
+				<td>The reg field of the MODRM field selects a test register; e.g., MOV (0F24, 0F26).</td>
+			</tr>
+			<tr>
+				<td>X</td>
+				<td>Memory addressed by DS:SI; e.g., MOVS, COMPS, OUTS, LODS, SCAS.</td>
+			</tr>
+			<tr>
+				<td>Y</td>
+				<td>Memory addressed by ES:DI; e.g., MOVS, CMPS, INS, STOS.</td>
+			</tr>
+		</table>
+		
+		<h1>Codes for Operand Type</h1>
+		
+		<table class="codetable">
+			<tr>
+				<td>a</td>
+				<td>Two one-word operands in memory or two dword operands in memory, depending on operand size attribute (used only by BOUND).</td>
+			</tr>
+			<tr>
+				<td>b</td>
+				<td>Byte (regardless of operand size attribute).</td>
+			</tr>
+			<tr>
+				<td>c</td>
+				<td>Byte or word, depending on operand size attribute.</td>
+			</tr>
+			<tr>
+				<td>d</td>
+				<td>Dword (regardless of operand size attribute).</td>
+			</tr>
+			<tr>
+				<td>p</td>
+				<td>32-bit or 48-bit pointer, depending on operand size attribute.</td>
+			</tr>
+			<tr>
+				<td>s</td>
+				<td>Six-byte pesudo-descriptor.</td>
+			</tr>
+			<tr>
+				<td>v</td>
+				<td>Word or dword, depending on operand size attribute.</td>
+			</tr>
+			<tr>
+				<td>w</td>
+				<td>Word (regardless of operand size attribute).</td>
+			</tr>
+		</table>
+		
+		<h1>Register Codes</h1>
+		
+		When an operand is a specific register encoded in the opcode, the register is identifed by its name; e.g., AX, CL, or ESI. The name of the register indicates whether the register is 32, 16, or 8 bits wide. A register identifier of the form eXX is used when the width of the register depends on the operand size attribute. For example, eAX indicates that the AX register is used when the operand size attribute is 16, and the EAX register is used when the operand size attribute is 32.
+
+		<h1>One-byte 80386 Opcode Map</h1>
+		<table class="optable">
+			<tr>
+				<th></th>
+				<th>0</th>
+				<th>1</th>
+				<th>2</th>
+				<th>3</th>
+				<th>4</th>
+				<th>5</th>
+				<th>6</th>
+				<th>7</th>
+				<th>8</th>
+				<th>9</th>
+				<th>A</th>
+				<th>B</th>
+				<th>C</th>
+				<th>D</th>
+				<th>E</th>
+				<th>F</th>
+			</tr>
+			<tr>
+				<th rowspan=2>0</th>
+
+				<td colspan=6>ADD</td>
+				<td rowspan=2>PUSH ES</td>
+				<td rowspan=2>POP ES</td>
+				<td colspan=6>OR</td>
+				<td rowspan=2>PUSH CS</td>
+				<td rowspan=2>2-byte escape codes</td>
+			</tr>
+			<tr>
+				<!-- ADD -->
+				<td>Eb, Gb</td>
+				<td>Ev, Gv</td>
+				<td>Gb, Eb</td>
+				<td>Gv, Ev</td>
+				<td>AL, Ib</td>
+				<td>eAX, Iv</td>
+
+				<!-- OR -->
+				<td>Eb, Gb</td>
+				<td>Ev, Gv</td>
+				<td>Gb, Eb</td>
+				<td>Gv, Ev</td>
+				<td>AL, Ib</td>
+				<td>eAX, Iv</td>
+			</tr>
+			<tr>
+				<th rowspan=2>1</th>
+
+				<td colspan=6>ADC</td>
+				<td rowspan=2>PUSH SS</td>
+				<td rowspan=2>POP SS</td>
+				<td colspan=6>SBB</td>
+				<td rowspan=2>PUSH DS</td>
+				<td rowspan=2>POP DS</td>
+			</tr>
+			<tr>
+				<!-- ADC -->
+				<td>Eb, Gb</td>
+				<td>Ev, Gv</td>
+				<td>Gb, Eb</td>
+				<td>Gv, Ev</td>
+				<td>AL, Ib</td>
+				<td>eAX, Iv</td>
+
+				<!-- SBB -->
+				<td>Eb, Gb</td>
+				<td>Ev, Gv</td>
+				<td>Gb, Eb</td>
+				<td>Gv, Ev</td>
+				<td>AL, Ib</td>
+				<td>eAX, Iv</td>
+			</tr>
+			<tr>
+				<th rowspan=2>2</th>
+
+				<td colspan=6>AND</td>
+				<td rowspan=2>SEG =ES</td>
+				<td rowspan=2>POP ES</td>
+				<td colspan=6>SUB</td>
+				<td rowspan=2>SEG =CS</td>
+				<td rowspan=2>DAS</td>
+			</tr>
+			<tr>
+				<!-- AND -->
+				<td>Eb, Gb</td>
+				<td>Ev, Gv</td>
+				<td>Gb, Eb</td>
+				<td>Gv, Ev</td>
+				<td>AL, Ib</td>
+				<td>eAX, Iv</td>
+
+				<!-- SUB -->
+				<td>Eb, Gb</td>
+				<td>Ev, Gv</td>
+				<td>Gb, Eb</td>
+				<td>Gv, Ev</td>
+				<td>AL, Ib</td>
+				<td>eAX, Iv</td>
+			</tr>
+			<tr>
+				<th rowspan=2>3</th>
+
+				<td colspan=6>XOR</td>
+				<td rowspan=2>SEG =SS</td>
+				<td rowspan=2>AAA</td>
+				<td colspan=6>CMP</td>
+				<td rowspan=2>SEG =DS</td>
+				<td rowspan=2>AAS</td>
+			</tr>
+			<tr>
+				<!-- XOR -->
+				<td>Eb, Gb</td>
+				<td>Ev, Gv</td>
+				<td>Gb, Eb</td>
+				<td>Gv, Ev</td>
+				<td>AL, Ib</td>
+				<td>eAX, Iv</td>
+
+				<!-- CMP -->
+				<td>Eb, Gb</td>
+				<td>Ev, Gv</td>
+				<td>Gb, Eb</td>
+				<td>Gv, Ev</td>
+				<td>AL, Ib</td>
+				<td>eAX, Iv</td>
+			</tr>
+			<tr>
+				<th rowspan=2>4</th>
+
+				<td colspan=8>INC general register</td>
+				<td colspan=8>DEC general register</td>
+			</tr>
+			<tr>
+				<!-- INC general register -->
+				<td>eAX</td>
+				<td>eCX</td>
+				<td>eDX</td>
+				<td>eBX</td>
+				<td>eSP</td>
+				<td>eBP</td>
+				<td>eSI</td>
+				<td>eDI</td>
+
+				<!-- DEC general register -->
+				<td>eAX</td>
+				<td>eCX</td>
+				<td>eDX</td>
+				<td>eBX</td>
+				<td>eSP</td>
+				<td>eBP</td>
+				<td>eSI</td>
+				<td>eDI</td>
+			</tr>
+			<tr>
+				<th rowspan=2>5</th>
+
+				<td colspan=8>PUSH general register</td>
+				<td colspan=8>POP general register</td>
+			</tr>
+			<tr>
+				<!-- PUSH general register -->
+				<td>eAX</td>
+				<td>eCX</td>
+				<td>eDX</td>
+				<td>eBX</td>
+				<td>eSP</td>
+				<td>eBP</td>
+				<td>eSI</td>
+				<td>eDI</td>
+
+				<!-- POP general register -->
+				<td>eAX</td>
+				<td>eCX</td>
+				<td>eDX</td>
+				<td>eBX</td>
+				<td>eSP</td>
+				<td>eBP</td>
+				<td>eSI</td>
+				<td>eDI</td>
+			</tr>
+			<tr>
+				<th rowspan=2>6</th>
+
+				<td rowspan=2>PUSHA</td>
+				<td rowspan=2>POPA</td>
+				<td rowspan=2>BOUND Gv, Ma</td>
+				<td rowspan=2>ARPL Gv, Ma</td>
+				<td rowspan=2>SEG =FS</td>
+				<td rowspan=2>SEG =GS</td>
+				<td rowspan=2>Operand Size</td>
+				<td rowspan=2>Address Size</td>
+				<td rowspan=2>PUSH Iv</td>
+				<td rowspan=2>IMUL GvEvIv</td>
+				<td rowspan=2>PUSH Ib</td>
+				<td rowspan=2>IMUL GvEvIb</td>
+				<td rowspan=2>INSB Yb, Dx</td>
+				<td rowspan=2>INSW/D Yv, Dx</td>
+				<td rowspan=2>OUTSB Dx, Xb</td>
+				<td rowspan=2>OUTSW/D Dx, Xb</td>
+			</tr>
+			<tr></tr>
+			<tr>
+				<th rowspan=2>7</th>
+
+				<td colspan=16>Short-displacement jump on condition (Jb)</td>
+			</tr>
+			<tr>
+				<!-- Short-displacement jump on condition (Jb) -->
+				<td>JO</td>
+				<td>JNO</td>
+				<td>JB</td>
+				<td>JNB</td>
+				<td>JZ</td>
+				<td>JNZ</td>
+				<td>JBE</td>
+				<td>JNBE</td>
+				<td>JS</td>
+				<td>JNS</td>
+				<td>JP</td>
+				<td>JNP</td>
+				<td>JP</td>
+				<td>JNL</td>
+				<td>JLE</td>
+				<td>JNLE</td>
+			</tr>
+			<tr>
+				<th rowspan=2>8</th>
+
+				<td colspan=2>Immediate Grp1</td>
+				<td rowspan=2></td>
+				<td rowspan=2>Grp1 Ev, Ib</td>
+				<td colspan=2>TEST</td>
+				<td colspan=2>XCHG</td>
+				<td colspan=4>MOV</td>
+				<td rowspan=2>MOV Ew, Sw</td>
+				<td rowspan=2>LEA Gv, M</td>
+				<td rowspan=2>MOV Sw, Ew</td>
+				<td rowspan=2>POP Ev</td>
+			</tr>
+			<tr>
+				<!-- Immediate Grp1 -->
+				<td>Eb, Ib</td>
+				<td>Ev, Iv</td>
+
+				<!-- TEST -->
+				<td>Eb, Gb</td>
+				<td>Ev, Gv</td>
+				
+				<!-- XCHG -->
+				<td>Eb, Gb</td>
+				<td>Ev, Gv</td>
+				
+				<!-- MOV -->
+				<td>Eb, Gb</td>
+				<td>Ev, Gv</td>
+				<td>Gb, Eb</td>
+				<td>Gv, Ev</td>
+			</tr>
+			<tr>
+				<th rowspan=2>9</th>
+
+				<td rowspan=2>NOP</td>
+				<td colspan=7>XCHG word or double-word register with eAX</td>
+				<td rowspan=2>CBW</td>
+				<td rowspan=2>CWD</td>
+				<td rowspan=2>CALL Ap</td>
+				<td rowspan=2>WAIT</td>
+				<td rowspan=2>PUSHF Fv</td>
+				<td rowspan=2>POPF Fv</td>
+				<td rowspan=2>SAHF</td>
+				<td rowspan=2>LAHF</td>
+			</tr>
+			<tr>
+				<!-- XCHG -->
+				<td>eCX</td>
+				<td>eDX</td>
+				<td>eBX</td>
+				<td>eSP</td>
+				<td>eBP</td>
+				<td>eSI</td>
+				<td>eDI</td>
+			</tr>
+			<tr>
+				<th rowspan=2>A</th>
+
+				<td colspan=4>MOV</td>
+				<td rowspan=2>MOVSB Xb, Yv</td>
+				<td rowspan=2>MOVSW/D Xv, Yv</td>
+				<td rowspan=2>CMPSB Xb, Yb</td>
+				<td rowspan=2>CMPSW/D Xv, Yv</td>
+				<td colspan=2>TEST</td>
+				<td rowspan=2>STOSB Yb, AL</td>
+				<td rowspan=2>STOSW/D Yv, eAX</td>
+				<td rowspan=2>LDSB AL, Xb</td>
+				<td rowspan=2>LDSW/D eAX, Yv</td>
+				<td rowspan=2>SCASB AL, Xb</td>
+				<td rowspan=2>SCASW/D eAX, Xv</td>
+			</tr>
+			<tr>
+				<!-- MOV -->
+				<td>AL, Ob</td>
+				<td>eAX, Ov</td>
+				<td>Ob, AL</td>
+				<td>Ov, eAX</td>
+
+				<!-- TEST -->
+				<td>AL, Ib</td>
+				<td>eAX, Iv</td>
+			</tr>
+			<tr>
+				<th rowspan=2>B</th>
+
+				<td colspan=8>MOV immediate byte into byte register</td>
+				<td colspan=8>MOV immediate word or double into word or double register</td>
+			</tr>
+			<tr>
+				<td>AL</td>
+				<td>CL</td>
+				<td>DL</td>
+				<td>BL</td>
+				<td>AH</td>
+				<td>CH</td>
+				<td>DH</td>
+				<td>BH</td>
+				
+				<td>eAX</td>
+				<td>eCX</td>
+				<td>eDX</td>
+				<td>eBX</td>
+				<td>eSP</td>
+				<td>eBP</td>
+				<td>eSI</td>
+				<td>eDI</td>
+			</tr>
+			<tr>
+				<th rowspan=2>C</th>
+
+				<td colspan=2>Shift Grp2</td>
+				<td colspan=2>RET near</td>
+				<td rowspan=2>LES Gv, Mp</td>
+				<td rowspan=2>LDS Gv, Mp</td>
+				<td colspan=2>MOV</td>
+				<td rowspan=2>ENTER</td>
+				<td rowspan=2>LEAVE</td>
+				<td colspan=2>RET far</td>
+				<td rowspan=2>INT 3</td>
+				<td rowspan=2>INT Ib</td>
+				<td rowspan=2>INTO</td>
+				<td rowspan=2>IRET</td>
+			</tr>
+			<tr>
+				<td>Eb, Ib</td>
+				<td>Ev, Iv</td>
+				<td>Iw</td>
+				<td></td>
+				<td>Eb, Ib</td>
+				<td>Ev, Iv</td>
+				<td>Iw</td>
+				<td></td>
+			</tr>
+			<tr>
+				<th rowspan=2>D</th>
+
+				<td colspan=4>Shift Grp2</td>
+				<td rowspan=2>AAM</td>
+				<td rowspan=2>AAD</td>
+				<td rowspan=2></td>
+				<td rowspan=2>XLAT</td>
+				<td colspan=8 rowspan=2>ESC (Escape to coprocessor instruction set)</td>
+			</tr>
+			<tr>
+				<td>Eb, 1</td>
+				<td>Ev, 1</td>
+				<td>Eb, CL</td>
+				<td>Ev, CL</td>
+			</tr>
+			<tr>
+				<th rowspan=2>E</th>
+
+				<td rowspan=2>LOOPNE Jb</td>
+				<td rowspan=2>LOOPE Jb</td>
+				<td rowspan=2>LOOP Jb</td>
+				<td rowspan=2>JCXZ Jb</td>
+				<td colspan=2>IN</td>
+				<td colspan=2>OUT</td>
+				<td rowspan=2>CALL Av</td>
+				<td colspan=3>JMP</td>
+				<td colspan=2>IN</td>
+				<td colspan=2>OUT</td>
+			</tr>
+			<tr>
+				<!-- IN -->
+				<td>AL, Ib</td>
+				<td>eAX, Ib</td>
+
+				<!-- OUT -->
+				<td>Ib, AL</td>
+				<td>Ib, eAX</td>
+
+				<!-- JMP -->
+				<td>Jv</td>
+				<td>Ap</td>
+				<td>Jb</td>
+
+				<!-- IN -->
+				<td>AL, DX</td>
+				<td>eAX, DX</td>
+
+				<!-- OUT -->
+				<td>DX, AL</td>
+				<td>DX, eAX</td>
+			</tr>
+			<tr>
+				<th rowspan=2>F</th>
+				
+				<td rowspan=2>LOCK</td>
+				<td rowspan=2></td>
+				<td rowspan=2>REPNE</td>
+				<td rowspan=2>REP / REPE</td>
+				<td rowspan=2>HLT</td>
+				<td rowspan=2>CMC</td>
+				<td colspan=2>Unary Grp3</td>
+				<td rowspan=2>CLC</td>
+				<td rowspan=2>STC</td>
+				<td rowspan=2>CLI</td>
+				<td rowspan=2>STI</td>
+				<td rowspan=2>CLD</td>
+				<td rowspan=2>STD</td>
+				<td rowspan=2>INC/DEC Grp4</td>
+				<td rowspan=2>Indirect Grp5</td>
+			</tr>
+			<tr>
+				<!-- Unary Grp3 -->
+				<td>Eb</td>
+				<td>Ev</td>
+			</tr>
+		</table>
+
+		<h1>Two-Byte 80386 Opcode Map (First byte is 0FH)</h1>
+		<table class="optable">
+			<tr>
+				<th></th>
+				<th>0</th>
+				<th>1</th>
+				<th>2</th>
+				<th>3</th>
+				<th>4</th>
+				<th>5</th>
+				<th>6</th>
+				<th>7</th>
+				<th>8</th>
+				<th>9</th>
+				<th>A</th>
+				<th>B</th>
+				<th>C</th>
+				<th>D</th>
+				<th>E</th>
+				<th>F</th>
+			</tr>
+			<tr>
+				<th rowspan=2>0</th>
+				
+				<td rowspan=2>Grp6</td>
+				<td rowspan=2>Grp7</td>
+				<td rowspan=2>LAR Gv, Ew</td>
+				<td rowspan=2>LSL Gv, Ew</td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2>CLTS</td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+			</tr>
+			<tr></tr>
+			<tr>
+				<th rowspan=2>1</th>
+				
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+			</tr>
+			<tr></tr>
+			<tr>
+				<th rowspan=2>2</th>
+				
+				<td rowspan=2>MOV Cr, Rd</td>
+				<td rowspan=2>MOV Dd, Rd</td>
+				<td rowspan=2>MOV Rd, Cd</td>
+				<td rowspan=2>MOV Rd, Dd</td>
+				<td rowspan=2>MOV Td, Rd</td>
+				<td rowspan=2></td>
+				<td rowspan=2>MOV Rd, Td</td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+			</tr>
+			<tr></tr>
+			<tr class="skiprow">
+				<th rowspan=2>≈</th>
+				
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+			</tr>
+			<tr></tr>
+			<tr>
+				<th rowspan=2>8</th>
+				
+				<td colspan=16>Long-displacement jump on condition (Jv)</td>
+			</tr>
+			<tr>
+				<!-- Long-displacement jump on condition (Jv) -->
+				<td>JO</td>
+				<td>JNO</td>
+				<td>JB</td>
+				<td>JNB</td>
+				<td>JZ</td>
+				<td>JNZ</td>
+				<td>JBE</td>
+				<td>JNBE</td>
+				<td>JS</td>
+				<td>JNS</td>
+				<td>JP</td>
+				<td>JNP</td>
+				<td>JP</td>
+				<td>JNL</td>
+				<td>JLE</td>
+				<td>JNLE</td>
+			</tr>
+			<tr>
+				<th rowspan=2>9</th>
+				
+				<td colspan=8>Byte set on condition (Eb)</td>
+				<td rowspan=2>SETS</td>
+				<td rowspan=2>SETNS</td>
+				<td rowspan=2>SETP</td>
+				<td rowspan=2>SETNP</td>
+				<td rowspan=2>SETL</td>
+				<td rowspan=2>SETNL</td>
+				<td rowspan=2>SETLE</td>
+				<td rowspan=2>SETNLE</td>
+			</tr>
+			<tr>
+				<!-- Byte set on condition (Eb) -->
+				<td>SETO</td>
+				<td>SETNO</td>
+				<td>SETB</td>
+				<td>SETNB</td>
+				<td>SETZ</td>
+				<td>SETNZ</td>
+				<td>SETBE</td>
+				<td>SETNBE</td>
+			</tr>
+			<tr>
+				<th rowspan=2>A</th>
+				
+				<td rowspan=2>PUSH FS</td>
+				<td rowspan=2>POP FS</td>
+				<td rowspan=2></td>
+				<td rowspan=2>BT Ev, Gv</td>
+				<td rowspan=2>SHLD EvGvIb</td>
+				<td rowspan=2>SHLD EvGcCL</td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2>PUSH GS</td>
+				<td rowspan=2>POP GS</td>
+				<td rowspan=2></td>
+				<td rowspan=2>BTS Ev, Gv</td>
+				<td rowspan=2>SHRD EvGvIb</td>
+				<td rowspan=2>SHRD EvGvCL</td>
+				<td rowspan=2></td>
+				<td rowspan=2>IMUL Gv, Ev</td>
+			</tr>
+			<tr></tr>
+			<tr>
+				<th rowspan=2>B</th>
+				
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2>LSS Mp</td>
+				<td rowspan=2>BTR Ev, Gv</td>
+				<td rowspan=2>LFS Mp</td>
+				<td rowspan=2>LGS Mp</td>
+				<td colspan=2>MOVZX</td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2>Grp8 Ev, Ib</td>
+				<td rowspan=2>BTC Ev, Gv</td>
+				<td rowspan=2>BSF Gv, Ev</td>
+				<td rowspan=2>BSR Gv, Ev</td>
+				<td colspan=2>MOVSX</td>
+			</tr>
+			<tr>
+				<!-- MOVZX -->
+				<td>Gv, Eb</td>
+				<td>Gv, Ew</td>
+
+				<!-- MOVSX -->
+				<td>Gv, Eb</td>
+				<td>Gv, Ew</td>
+			</tr>
+			<tr class="skiprow">
+				<th rowspan=2>≈</th>
+				
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+				<td rowspan=2>≈</td>
+			</tr>
+			<tr></tr>
+			<tr>
+				<th rowspan=2>F</th>
+				
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+				<td rowspan=2></td>
+			</tr>
+			<tr></tr>
+		</table>
+		<h1>Opcodes Determined by Bits 5, 4, 3 of MODRM Field</h1>
+		<table>
+			<tr>
+				<td>mod</td>
+				<td>nnn</td>
+				<td>R/M</td>
+			</tr>
+		</table>
+		<br />
+		<table class="grouptable">
+			<tr>
+				<th></th>
+				<th>000</th>
+				<th>001</th>
+				<th>010</th>
+				<th>011</th>
+				<th>100</th>
+				<th>101</th>
+				<th>110</th>
+				<th>111</th>
+			</tr>
+			<tr>
+				<th>Group 1</th>
+				
+				<td>ADD</td>
+				<td>OR</td>
+				<td>ADC</td>
+				<td>SBB</td>
+				<td>AND</td>
+				<td>SUB</td>
+				<td>XOR</td>
+				<td>CMP</td>
+			</tr>
+			<tr>
+				<th>Group 2</th>
+				
+				<td>ROL</td>
+				<td>ROR</td>
+				<td>RCL</td>
+				<td>RCR</td>
+				<td>SHL</td>
+				<td>SHR</td>
+				<td></td>
+				<td>SAR</td>
+			</tr>
+			<tr>
+				<th>Group 3</th>
+				
+				<td>TEST Ib/Iv</td>
+				<td></td>
+				<td>NOT</td>
+				<td>NEG</td>
+				<td>MUL AL/eAX</td>
+				<td>IMUL AL/EAX</td>
+				<td>DIV AL/eAX</td>
+				<td>IDIV AL/eAX</td>
+			</tr>
+			<tr>
+				<th>Group 4</th>
+				
+				<td>INC Eb</td>
+				<td>DEC Eb</td>
+				<td></td>
+				<td></td>
+				<td></td>
+				<td></td>
+				<td></td>
+				<td></td>
+			</tr>
+			<tr>
+				<th>Group 5</th>
+				
+				<td>INC Ev</td>
+				<td>DEC Ev</td>
+				<td>CALL Ev</td>
+				<td>CALL Ep</td>
+				<td>JMP Ev</td>
+				<td>JMP Ep</td>
+				<td>PUSH Ev</td>
+				<td></td>
+			</tr>
+			<tr>
+				<th>Group 6</th>
+				
+				<td>SLDT Ew</td>
+				<td>STR Ew</td>
+				<td>LLDT Ew</td>
+				<td>LTR Ew</td>
+				<td>VERR Ew</td>
+				<td>VERW Ew</td>
+				<td></td>
+				<td></td>
+			</tr>
+			<tr>
+				<th>Group 7</th>
+
+				<td>SGDT Ms</td>
+				<td>SIDT Ms</td>
+				<td>LGDT Ms</td>
+				<td>LIDT Ms</td>
+				<td></td>
+				<td>LMSW Ew</td>
+				<td></td>
+				<td></td>
+			</tr>
+			<tr>
+				<th>Group 6</th>
+				
+				<td></td>
+				<td></td>
+				<td></td>
+				<td></td>
+				<td>BT</td>
+				<td>BTS</td>
+				<td>BTR</td>
+				<td>BTC</td>
+			</tr>
+		</table>
+	</body>
+</html>
\ No newline at end of file

From 11bb594fa2adad4fe5716df760a309a9d8ef9339 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 2 Mar 2022 20:23:35 -0500
Subject: [PATCH 042/104] Sets up [ignored] memory and data size prefixes.

---
 InstructionSets/x86/Decoder.cpp | 27 +++++++++++++++++++++++----
 InstructionSets/x86/Decoder.hpp | 15 +++++++++++++--
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 0dcfdf5b8..a3f442ca3 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -179,10 +179,14 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				RequiresMin(i80286);
 				MemRegReg(ARPL, MemReg_Reg, 2);
 			break;
-//			case 0x67:
-//				RequiresMin(i80386);
-//				address_size_ = true;
-//			break;
+			case 0x66:
+				RequiresMin(i80386);
+				data_size_ = DataSize(int(default_data_size_) ^ int(DataSize::Word) ^ int(DataSize::DWord));
+			break;
+			case 0x67:
+				RequiresMin(i80386);
+				address_size_ = AddressSize(int(default_address_size_) ^ int(AddressSize::b16) ^ int(AddressSize::b32));
+			break;
 			case 0x6c:	// INSB
 				RequiresMin(i80186);
 				Complete(INS, None, None, 1);
@@ -722,6 +726,21 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 	return std::make_pair(0, InstructionT());
 }
 
+template <Model model> void Decoder<model>::set_32bit_protected_mode(bool enabled) {
+	if constexpr (!is_32bit(model)) {
+		assert(!enabled);
+		return;
+	}
+
+	if(enabled) {
+		default_address_size_ = address_size_ = AddressSize::b32;
+		default_data_size_ = data_size_ = DataSize::DWord;
+	} else {
+		default_address_size_ = address_size_ = AddressSize::b16;
+		default_data_size_ = data_size_ = DataSize::Word;
+	}
+}
+
 // Ensure all possible decoders are built.
 template class InstructionSet::x86::Decoder<InstructionSet::x86::Model::i8086>;
 template class InstructionSet::x86::Decoder<InstructionSet::x86::Model::i80186>;
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 43b6b9b51..7ce295f93 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -36,6 +36,11 @@ template <Model model> class Decoder {
 		*/
 		std::pair<int, InstructionT> decode(const uint8_t *source, size_t length);
 
+		/*!
+			Enables or disables 32-bit protected mode. Meaningful only if the @c Model supports it.
+		*/
+		void set_32bit_protected_mode(bool);
+
 	private:
 		enum class Phase {
 			/// Captures all prefixes and continues until an instruction byte is encountered.
@@ -148,16 +153,22 @@ template <Model model> class Decoder {
 		// Prefix capture fields.
 		Repetition repetition_ = Repetition::None;
 		bool lock_ = false;
-		AddressSize address_size_ = AddressSize::b16;
 		Source segment_override_ = Source::None;
 
+		// 32-bit/16-bit selection.
+		AddressSize default_address_size_ = AddressSize::b16;
+		DataSize default_data_size_ = DataSize::Word;
+		AddressSize address_size_ = AddressSize::b16;
+		DataSize data_size_ = DataSize::Word;
+
 		/// Resets size capture and all fields with default values.
 		void reset_parsing() {
 			consumed_ = operand_bytes_ = 0;
 			displacement_size_ = operand_size_ = 0;
 			displacement_ = operand_ = 0;
 			lock_ = false;
-			address_size_ = AddressSize::b16;
+			address_size_ = default_address_size_;
+			data_size_ = default_data_size_;
 			segment_override_ = Source::None;
 			repetition_ = Repetition::None;
 			phase_ = Phase::Instruction;

From dfb312fee68534341bfdc31e84381dc8bd0913d8 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sat, 5 Mar 2022 11:56:08 -0500
Subject: [PATCH 043/104] Make column and row meanings overt.

---
 .../x86/Documentation/80386 opcode map.html   | 152 +++++++-----------
 1 file changed, 59 insertions(+), 93 deletions(-)

diff --git a/InstructionSets/x86/Documentation/80386 opcode map.html b/InstructionSets/x86/Documentation/80386 opcode map.html
index 3bd234a0e..fde2485ec 100644
--- a/InstructionSets/x86/Documentation/80386 opcode map.html	
+++ b/InstructionSets/x86/Documentation/80386 opcode map.html	
@@ -148,25 +148,25 @@
 		<table class="optable">
 			<tr>
 				<th></th>
-				<th>0</th>
-				<th>1</th>
-				<th>2</th>
-				<th>3</th>
-				<th>4</th>
-				<th>5</th>
-				<th>6</th>
-				<th>7</th>
-				<th>8</th>
-				<th>9</th>
-				<th>A</th>
-				<th>B</th>
-				<th>C</th>
-				<th>D</th>
-				<th>E</th>
-				<th>F</th>
+				<th>x0</th>
+				<th>x1</th>
+				<th>x2</th>
+				<th>x3</th>
+				<th>x4</th>
+				<th>x5</th>
+				<th>x6</th>
+				<th>x7</th>
+				<th>x8</th>
+				<th>x9</th>
+				<th>xA</th>
+				<th>xB</th>
+				<th>xC</th>
+				<th>xD</th>
+				<th>xE</th>
+				<th>xF</th>
 			</tr>
 			<tr>
-				<th rowspan=2>0</th>
+				<th rowspan=2>0x</th>
 
 				<td colspan=6>ADD</td>
 				<td rowspan=2>PUSH ES</td>
@@ -193,7 +193,7 @@
 				<td>eAX, Iv</td>
 			</tr>
 			<tr>
-				<th rowspan=2>1</th>
+				<th rowspan=2>1x</th>
 
 				<td colspan=6>ADC</td>
 				<td rowspan=2>PUSH SS</td>
@@ -220,7 +220,7 @@
 				<td>eAX, Iv</td>
 			</tr>
 			<tr>
-				<th rowspan=2>2</th>
+				<th rowspan=2>2x</th>
 
 				<td colspan=6>AND</td>
 				<td rowspan=2>SEG =ES</td>
@@ -247,7 +247,7 @@
 				<td>eAX, Iv</td>
 			</tr>
 			<tr>
-				<th rowspan=2>3</th>
+				<th rowspan=2>3x</th>
 
 				<td colspan=6>XOR</td>
 				<td rowspan=2>SEG =SS</td>
@@ -274,7 +274,7 @@
 				<td>eAX, Iv</td>
 			</tr>
 			<tr>
-				<th rowspan=2>4</th>
+				<th rowspan=2>4x</th>
 
 				<td colspan=8>INC general register</td>
 				<td colspan=8>DEC general register</td>
@@ -301,7 +301,7 @@
 				<td>eDI</td>
 			</tr>
 			<tr>
-				<th rowspan=2>5</th>
+				<th rowspan=2>5x</th>
 
 				<td colspan=8>PUSH general register</td>
 				<td colspan=8>POP general register</td>
@@ -328,7 +328,7 @@
 				<td>eDI</td>
 			</tr>
 			<tr>
-				<th rowspan=2>6</th>
+				<th rowspan=2>6x</th>
 
 				<td rowspan=2>PUSHA</td>
 				<td rowspan=2>POPA</td>
@@ -349,7 +349,7 @@
 			</tr>
 			<tr></tr>
 			<tr>
-				<th rowspan=2>7</th>
+				<th rowspan=2>7x</th>
 
 				<td colspan=16>Short-displacement jump on condition (Jb)</td>
 			</tr>
@@ -373,7 +373,7 @@
 				<td>JNLE</td>
 			</tr>
 			<tr>
-				<th rowspan=2>8</th>
+				<th rowspan=2>8x</th>
 
 				<td colspan=2>Immediate Grp1</td>
 				<td rowspan=2></td>
@@ -406,7 +406,7 @@
 				<td>Gv, Ev</td>
 			</tr>
 			<tr>
-				<th rowspan=2>9</th>
+				<th rowspan=2>9x</th>
 
 				<td rowspan=2>NOP</td>
 				<td colspan=7>XCHG word or double-word register with eAX</td>
@@ -430,7 +430,7 @@
 				<td>eDI</td>
 			</tr>
 			<tr>
-				<th rowspan=2>A</th>
+				<th rowspan=2>Ax</th>
 
 				<td colspan=4>MOV</td>
 				<td rowspan=2>MOVSB Xb, Yv</td>
@@ -457,7 +457,7 @@
 				<td>eAX, Iv</td>
 			</tr>
 			<tr>
-				<th rowspan=2>B</th>
+				<th rowspan=2>Bx</th>
 
 				<td colspan=8>MOV immediate byte into byte register</td>
 				<td colspan=8>MOV immediate word or double into word or double register</td>
@@ -482,7 +482,7 @@
 				<td>eDI</td>
 			</tr>
 			<tr>
-				<th rowspan=2>C</th>
+				<th rowspan=2>Cx</th>
 
 				<td colspan=2>Shift Grp2</td>
 				<td colspan=2>RET near</td>
@@ -508,7 +508,7 @@
 				<td></td>
 			</tr>
 			<tr>
-				<th rowspan=2>D</th>
+				<th rowspan=2>Dx</th>
 
 				<td colspan=4>Shift Grp2</td>
 				<td rowspan=2>AAM</td>
@@ -524,7 +524,7 @@
 				<td>Ev, CL</td>
 			</tr>
 			<tr>
-				<th rowspan=2>E</th>
+				<th rowspan=2>Ex</th>
 
 				<td rowspan=2>LOOPNE Jb</td>
 				<td rowspan=2>LOOPE Jb</td>
@@ -560,7 +560,7 @@
 				<td>DX, eAX</td>
 			</tr>
 			<tr>
-				<th rowspan=2>F</th>
+				<th rowspan=2>Fx</th>
 				
 				<td rowspan=2>LOCK</td>
 				<td rowspan=2></td>
@@ -589,25 +589,25 @@
 		<table class="optable">
 			<tr>
 				<th></th>
-				<th>0</th>
-				<th>1</th>
-				<th>2</th>
-				<th>3</th>
-				<th>4</th>
-				<th>5</th>
-				<th>6</th>
-				<th>7</th>
-				<th>8</th>
-				<th>9</th>
-				<th>A</th>
-				<th>B</th>
-				<th>C</th>
-				<th>D</th>
-				<th>E</th>
-				<th>F</th>
+				<th>x0</th>
+				<th>x1</th>
+				<th>x2</th>
+				<th>x3</th>
+				<th>x4</th>
+				<th>x5</th>
+				<th>x6</th>
+				<th>x7</th>
+				<th>x8</th>
+				<th>x9</th>
+				<th>xA</th>
+				<th>xB</th>
+				<th>xC</th>
+				<th>xD</th>
+				<th>xE</th>
+				<th>xF</th>
 			</tr>
 			<tr>
-				<th rowspan=2>0</th>
+				<th rowspan=2>0x</th>
 				
 				<td rowspan=2>Grp6</td>
 				<td rowspan=2>Grp7</td>
@@ -628,7 +628,7 @@
 			</tr>
 			<tr></tr>
 			<tr>
-				<th rowspan=2>1</th>
+				<th rowspan=2>1x</th>
 				
 				<td rowspan=2></td>
 				<td rowspan=2></td>
@@ -649,7 +649,7 @@
 			</tr>
 			<tr></tr>
 			<tr>
-				<th rowspan=2>2</th>
+				<th rowspan=2>2x</th>
 				
 				<td rowspan=2>MOV Cr, Rd</td>
 				<td rowspan=2>MOV Dd, Rd</td>
@@ -670,28 +670,11 @@
 			</tr>
 			<tr></tr>
 			<tr class="skiprow">
-				<th rowspan=2>≈</th>
-				
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
+				<th rowspan=2 colspan=17>≈</th>
 			</tr>
 			<tr></tr>
 			<tr>
-				<th rowspan=2>8</th>
+				<th rowspan=2>8x</th>
 				
 				<td colspan=16>Long-displacement jump on condition (Jv)</td>
 			</tr>
@@ -715,7 +698,7 @@
 				<td>JNLE</td>
 			</tr>
 			<tr>
-				<th rowspan=2>9</th>
+				<th rowspan=2>9x</th>
 				
 				<td colspan=8>Byte set on condition (Eb)</td>
 				<td rowspan=2>SETS</td>
@@ -739,7 +722,7 @@
 				<td>SETNBE</td>
 			</tr>
 			<tr>
-				<th rowspan=2>A</th>
+				<th rowspan=2>Ax</th>
 				
 				<td rowspan=2>PUSH FS</td>
 				<td rowspan=2>POP FS</td>
@@ -760,7 +743,7 @@
 			</tr>
 			<tr></tr>
 			<tr>
-				<th rowspan=2>B</th>
+				<th rowspan=2>Bx</th>
 				
 				<td rowspan=2></td>
 				<td rowspan=2></td>
@@ -787,28 +770,11 @@
 				<td>Gv, Ew</td>
 			</tr>
 			<tr class="skiprow">
-				<th rowspan=2>≈</th>
-				
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
-				<td rowspan=2>≈</td>
+				<th rowspan=2 colspan=17>≈</th>
 			</tr>
 			<tr></tr>
 			<tr>
-				<th rowspan=2>F</th>
+				<th rowspan=2>Fx</th>
 				
 				<td rowspan=2></td>
 				<td rowspan=2></td>
@@ -948,4 +914,4 @@
 			</tr>
 		</table>
 	</body>
-</html>
\ No newline at end of file
+</html>

From 8a0902a83ba4afc62018acd1de1de2415aae1a2d Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sat, 5 Mar 2022 13:52:07 -0500
Subject: [PATCH 044/104] Adapts existing opcodes for 32-bit parsing.

---
 InstructionSets/x86/Decoder.cpp               | 492 +++++++++---------
 InstructionSets/x86/Decoder.hpp               |  22 +-
 InstructionSets/x86/Instruction.hpp           |  24 +-
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 106 ++--
 4 files changed, 332 insertions(+), 312 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index a3f442ca3..616602e33 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -57,27 +57,28 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 	operation_ = Operation::op;					\
 	phase_ = Phase::ModRegRM;					\
 	modregrm_format_ = ModRegRMFormat::format;	\
-	operand_size_ = 0;							\
+	operand_size_ = DataSize::None;				\
 	operation_size_ = size
 
 /// Handles JO, JNO, JB, etc — jumps with a single byte displacement.
-#define Jump(op)									\
+#define Jump(op, size)								\
 	operation_ = Operation::op;						\
 	phase_ = Phase::DisplacementOrOperand;			\
-	displacement_size_ = 1
+	displacement_size_ = size
 
 /// Handles far CALL and far JMP — fixed four byte operand operations.
 #define Far(op)										\
 	operation_ = Operation::op;						\
 	phase_ = Phase::DisplacementOrOperand;			\
-	operand_size_ = 4;								\
+	operand_size_ = data_size_;						\
+	displacement_size_ = DataSize::Word
 
 /// Handles ENTER — a fixed three-byte operation.
 #define Displacement16Operand8(op)					\
 	operation_ = Operation::op;						\
 	phase_ = Phase::DisplacementOrOperand;			\
-	displacement_size_ = 2;							\
-	operand_size_ = 1;								\
+	displacement_size_ = DataSize::Word;			\
+	operand_size_ = DataSize::Byte
 
 #define undefined()	{												\
 	const auto result = std::make_pair(consumed_, InstructionT());	\
@@ -89,29 +90,27 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 #define RequiresMin(x)	if constexpr (model < Model::x) undefined();
 
 	while(phase_ == Phase::Instruction && source != end) {
-		// Retain the instruction byte, in case additional decoding is deferred
-		// to the ModRegRM byte.
-		instr_ = *source;
+		const uint8_t instr = *source;
 		++source;
 		++consumed_;
 
-		switch(instr_) {
+		switch(instr) {
 			default: undefined();
 
-#define PartialBlock(start, operation)								\
-	case start + 0x00: MemRegReg(operation, MemReg_Reg, 1);	break;	\
-	case start + 0x01: MemRegReg(operation, MemReg_Reg, 2);	break;	\
-	case start + 0x02: MemRegReg(operation, Reg_MemReg, 1);	break;	\
-	case start + 0x03: MemRegReg(operation, Reg_MemReg, 2);	break;	\
-	case start + 0x04: RegData(operation, eAX, 1);			break;	\
-	case start + 0x05: RegData(operation, eAX, 2)
+#define PartialBlock(start, operation)												\
+	case start + 0x00: MemRegReg(operation, MemReg_Reg, DataSize::Byte);	break;	\
+	case start + 0x01: MemRegReg(operation, MemReg_Reg, data_size_);		break;	\
+	case start + 0x02: MemRegReg(operation, Reg_MemReg, DataSize::Byte);	break;	\
+	case start + 0x03: MemRegReg(operation, Reg_MemReg, data_size_);		break;	\
+	case start + 0x04: RegData(operation, eAX, DataSize::Byte);				break;	\
+	case start + 0x05: RegData(operation, eAX, data_size_)
 
-			PartialBlock(0x00, ADD);					break;
-			case 0x06: Complete(PUSH, ES, None, 2);		break;
-			case 0x07: Complete(POP, None, ES, 2);		break;
+			PartialBlock(0x00, ADD);							break;
+			case 0x06: Complete(PUSH, ES, None, data_size_);	break;
+			case 0x07: Complete(POP, None, ES, data_size_);		break;
 
-			PartialBlock(0x08, OR);						break;
-			case 0x0e: Complete(PUSH, CS, None, 2);		break;
+			PartialBlock(0x08, OR);								break;
+			case 0x0e: Complete(PUSH, CS, None, data_size_);	break;
 
 			// The 286 onwards have a further set of instructions
 			// prefixed with $0f.
@@ -120,41 +119,41 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				phase_ = Phase::InstructionPageF;
 			break;
 
-			PartialBlock(0x10, ADC);					break;
-			case 0x16: Complete(PUSH, SS, None, 2);		break;
-			case 0x17: Complete(POP, None, SS, 2);		break;
+			PartialBlock(0x10, ADC);								break;
+			case 0x16: Complete(PUSH, SS, None, DataSize::Word);	break;
+			case 0x17: Complete(POP, None, SS, DataSize::Word);		break;
 
-			PartialBlock(0x18, SBB);					break;
-			case 0x1e: Complete(PUSH, DS, None, 2);		break;
-			case 0x1f: Complete(POP, None, DS, 2);		break;
+			PartialBlock(0x18, SBB);								break;
+			case 0x1e: Complete(PUSH, DS, None, DataSize::Word);	break;
+			case 0x1f: Complete(POP, None, DS, DataSize::Word);		break;
 
-			PartialBlock(0x20, AND);					break;
-			case 0x26: segment_override_ = Source::ES;	break;
-			case 0x27: Complete(DAA, eAX, eAX, 1);		break;
+			PartialBlock(0x20, AND);								break;
+			case 0x26: segment_override_ = Source::ES;				break;
+			case 0x27: Complete(DAA, eAX, eAX, DataSize::Byte);		break;
 
-			PartialBlock(0x28, SUB);					break;
-			case 0x2e: segment_override_ = Source::CS;	break;
-			case 0x2f: Complete(DAS, eAX, eAX, 1);		break;
+			PartialBlock(0x28, SUB);								break;
+			case 0x2e: segment_override_ = Source::CS;				break;
+			case 0x2f: Complete(DAS, eAX, eAX, DataSize::Byte);		break;
 
-			PartialBlock(0x30, XOR);					break;
-			case 0x36: segment_override_ = Source::SS;	break;
-			case 0x37: Complete(AAA, eAX, eAX, 2);		break;
+			PartialBlock(0x30, XOR);								break;
+			case 0x36: segment_override_ = Source::SS;				break;
+			case 0x37: Complete(AAA, eAX, eAX, DataSize::Word);		break;
 
-			PartialBlock(0x38, CMP);					break;
-			case 0x3e: segment_override_ = Source::DS;	break;
-			case 0x3f: Complete(AAS, eAX, eAX, 2);		break;
+			PartialBlock(0x38, CMP);								break;
+			case 0x3e: segment_override_ = Source::DS;				break;
+			case 0x3f: Complete(AAS, eAX, eAX, DataSize::Word);		break;
 
 #undef PartialBlock
 
-#define RegisterBlock(start, operation)								\
-	case start + 0x00: Complete(operation, eAX, eAX, 2);	break;	\
-	case start + 0x01: Complete(operation, eCX, eCX, 2);	break;	\
-	case start + 0x02: Complete(operation, eDX, eDX, 2);	break;	\
-	case start + 0x03: Complete(operation, eBX, eBX, 2);	break;	\
-	case start + 0x04: Complete(operation, eSP, eSP, 2);	break;	\
-	case start + 0x05: Complete(operation, eBP, eBP, 2);	break;	\
-	case start + 0x06: Complete(operation, eSI, eSI, 2);	break;	\
-	case start + 0x07: Complete(operation, eDI, eDI, 2)
+#define RegisterBlock(start, operation)										\
+	case start + 0x00: Complete(operation, eAX, eAX, data_size_);	break;	\
+	case start + 0x01: Complete(operation, eCX, eCX, data_size_);	break;	\
+	case start + 0x02: Complete(operation, eDX, eDX, data_size_);	break;	\
+	case start + 0x03: Complete(operation, eBX, eBX, data_size_);	break;	\
+	case start + 0x04: Complete(operation, eSP, eSP, data_size_);	break;	\
+	case start + 0x05: Complete(operation, eBP, eBP, data_size_);	break;	\
+	case start + 0x06: Complete(operation, eSI, eSI, data_size_);	break;	\
+	case start + 0x07: Complete(operation, eDI, eDI, data_size_)
 
 			RegisterBlock(0x40, INC);	break;
 			RegisterBlock(0x48, DEC);	break;
@@ -165,19 +164,19 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 
 			case 0x60:
 				RequiresMin(i80186);
-				Complete(PUSHA, None, None, 2);
+				Complete(PUSHA, None, None, data_size_);
 			break;
 			case 0x61:
 				RequiresMin(i80186);
-				Complete(POPA, None, None, 2);
+				Complete(POPA, None, None, data_size_);
 			break;
 			case 0x62:
 				RequiresMin(i80186);
-				MemRegReg(BOUND, Reg_MemReg, 2);
+				MemRegReg(BOUND, Reg_MemReg, data_size_);
 			break;
 			case 0x63:
 				RequiresMin(i80286);
-				MemRegReg(ARPL, MemReg_Reg, 2);
+				MemRegReg(ARPL, MemReg_Reg, DataSize::Word);
 			break;
 			case 0x66:
 				RequiresMin(i80386);
@@ -189,115 +188,121 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			break;
 			case 0x6c:	// INSB
 				RequiresMin(i80186);
-				Complete(INS, None, None, 1);
+				Complete(INS, None, None, DataSize::Byte);
 			break;
 			case 0x6d:	// INSW
 				RequiresMin(i80186);
-				Complete(INS, None, None, 2);
+				Complete(INS, None, None, data_size_);
 			break;
 			case 0x6e:	// OUTSB
 				RequiresMin(i80186);
-				Complete(OUTS, None, None, 1);
+				Complete(OUTS, None, None, DataSize::Byte);
 			break;
 			case 0x6f:	// OUTSW
 				RequiresMin(i80186);
-				Complete(OUTS, None, None, 2);
+				Complete(OUTS, None, None, data_size_);
 			break;
 
-			case 0x70: Jump(JO);	break;
-			case 0x71: Jump(JNO);	break;
-			case 0x72: Jump(JB);	break;
-			case 0x73: Jump(JNB);	break;
-			case 0x74: Jump(JE);	break;
-			case 0x75: Jump(JNE);	break;
-			case 0x76: Jump(JBE);	break;
-			case 0x77: Jump(JNBE);	break;
-			case 0x78: Jump(JS);	break;
-			case 0x79: Jump(JNS);	break;
-			case 0x7a: Jump(JP);	break;
-			case 0x7b: Jump(JNP);	break;
-			case 0x7c: Jump(JL);	break;
-			case 0x7d: Jump(JNL);	break;
-			case 0x7e: Jump(JLE);	break;
-			case 0x7f: Jump(JNLE);	break;
+			case 0x70: Jump(JO, DataSize::Byte);	break;
+			case 0x71: Jump(JNO, DataSize::Byte);	break;
+			case 0x72: Jump(JB, DataSize::Byte);	break;
+			case 0x73: Jump(JNB, DataSize::Byte);	break;
+			case 0x74: Jump(JE, DataSize::Byte);	break;
+			case 0x75: Jump(JNE, DataSize::Byte);	break;
+			case 0x76: Jump(JBE, DataSize::Byte);	break;
+			case 0x77: Jump(JNBE, DataSize::Byte);	break;
+			case 0x78: Jump(JS, DataSize::Byte);	break;
+			case 0x79: Jump(JNS, DataSize::Byte);	break;
+			case 0x7a: Jump(JP, DataSize::Byte);	break;
+			case 0x7b: Jump(JNP, DataSize::Byte);	break;
+			case 0x7c: Jump(JL, DataSize::Byte);	break;
+			case 0x7d: Jump(JNL, DataSize::Byte);	break;
+			case 0x7e: Jump(JLE, DataSize::Byte);	break;
+			case 0x7f: Jump(JNLE, DataSize::Byte);	break;
 
-			case 0x80: MemRegReg(Invalid, MemRegADD_to_CMP, 1);	break;
-			case 0x81: MemRegReg(Invalid, MemRegADD_to_CMP, 2);	break;
-			case 0x82: MemRegReg(Invalid, MemRegADC_to_CMP, 1);	break;
-			case 0x83: MemRegReg(Invalid, MemRegADC_to_CMP, 2);	break;
+			case 0x80: MemRegReg(Invalid, MemRegADD_to_CMP, DataSize::Byte);	break;
+			case 0x81: MemRegReg(Invalid, MemRegADD_to_CMP, data_size_);		break;
+			case 0x82:
+				MemRegReg(Invalid, MemRegADC_to_CMP, DataSize::Byte);
+				sign_extend_ = true;
+			break;
+			case 0x83:
+				MemRegReg(Invalid, MemRegADC_to_CMP, data_size_);
+				sign_extend_ = true;
+			break;
 
-			case 0x84: MemRegReg(TEST, MemReg_Reg, 1);	break;
-			case 0x85: MemRegReg(TEST, MemReg_Reg, 2);	break;
-			case 0x86: MemRegReg(XCHG, Reg_MemReg, 1);	break;
-			case 0x87: MemRegReg(XCHG, Reg_MemReg, 2);	break;
-			case 0x88: MemRegReg(MOV, MemReg_Reg, 1);	break;
-			case 0x89: MemRegReg(MOV, MemReg_Reg, 2);	break;
-			case 0x8a: MemRegReg(MOV, Reg_MemReg, 1);	break;
-			case 0x8b: MemRegReg(MOV, Reg_MemReg, 2);	break;
+			case 0x84: MemRegReg(TEST, MemReg_Reg, DataSize::Byte);	break;
+			case 0x85: MemRegReg(TEST, MemReg_Reg, data_size_);		break;
+			case 0x86: MemRegReg(XCHG, Reg_MemReg, DataSize::Byte);	break;
+			case 0x87: MemRegReg(XCHG, Reg_MemReg, data_size_);		break;
+			case 0x88: MemRegReg(MOV, MemReg_Reg, DataSize::Byte);	break;
+			case 0x89: MemRegReg(MOV, MemReg_Reg, data_size_);		break;
+			case 0x8a: MemRegReg(MOV, Reg_MemReg, DataSize::Byte);	break;
+			case 0x8b: MemRegReg(MOV, Reg_MemReg, data_size_);		break;
 			// 0x8c: not used.
-			case 0x8d: MemRegReg(LEA, Reg_MemReg, 2);	break;
-			case 0x8e: MemRegReg(MOV, SegReg, 2);		break;
-			case 0x8f: MemRegReg(POP, MemRegPOP, 2);	break;
+			case 0x8d: MemRegReg(LEA, Reg_MemReg, data_size_);		break;
+			case 0x8e: MemRegReg(MOV, SegReg, data_size_);			break;
+			case 0x8f: MemRegReg(POP, MemRegPOP, data_size_);		break;
 
-			case 0x90: Complete(NOP, None, None, 0);	break;	// Or XCHG AX, AX?
-			case 0x91: Complete(XCHG, eAX, eCX, 2);		break;
-			case 0x92: Complete(XCHG, eAX, eDX, 2);		break;
-			case 0x93: Complete(XCHG, eAX, eBX, 2);		break;
-			case 0x94: Complete(XCHG, eAX, eSP, 2);		break;
-			case 0x95: Complete(XCHG, eAX, eBP, 2);		break;
-			case 0x96: Complete(XCHG, eAX, eSI, 2);		break;
-			case 0x97: Complete(XCHG, eAX, eDI, 2);		break;
+			case 0x90: Complete(NOP, None, None, DataSize::None);	break;	// Or XCHG AX, AX?
+			case 0x91: Complete(XCHG, eAX, eCX, data_size_);		break;
+			case 0x92: Complete(XCHG, eAX, eDX, data_size_);		break;
+			case 0x93: Complete(XCHG, eAX, eBX, data_size_);		break;
+			case 0x94: Complete(XCHG, eAX, eSP, data_size_);		break;
+			case 0x95: Complete(XCHG, eAX, eBP, data_size_);		break;
+			case 0x96: Complete(XCHG, eAX, eSI, data_size_);		break;
+			case 0x97: Complete(XCHG, eAX, eDI, data_size_);		break;
 
-			case 0x98: Complete(CBW, eAX, AH, 1);		break;
-			case 0x99: Complete(CWD, eAX, eDX, 2);		break;
-			case 0x9a: Far(CALLF);						break;
-			case 0x9b: Complete(WAIT, None, None, 0);	break;
-			case 0x9c: Complete(PUSHF, None, None, 2);	break;
-			case 0x9d: Complete(POPF, None, None, 2);	break;
-			case 0x9e: Complete(SAHF, None, None, 1);	break;
-			case 0x9f: Complete(LAHF, None, None, 1);	break;
+			case 0x98: Complete(CBW, eAX, AH, DataSize::Byte);		break;
+			case 0x99: Complete(CWD, eAX, eDX, data_size_);			break;
+			case 0x9a: Far(CALLF);									break;
+			case 0x9b: Complete(WAIT, None, None, DataSize::None);	break;
+			case 0x9c: Complete(PUSHF, None, None, data_size_);		break;
+			case 0x9d: Complete(POPF, None, None, data_size_);		break;
+			case 0x9e: Complete(SAHF, None, None, DataSize::Byte);	break;
+			case 0x9f: Complete(LAHF, None, None, DataSize::Byte);	break;
 
-			case 0xa0: RegAddr(MOV, eAX, 1, 1);	break;
-			case 0xa1: RegAddr(MOV, eAX, 2, 2);	break;
-			case 0xa2: AddrReg(MOV, eAX, 1, 1);	break;
-			case 0xa3: AddrReg(MOV, eAX, 2, 2);	break;
+			case 0xa0: RegAddr(MOV, eAX, DataSize::Byte, DataSize::Byte);	break;
+			case 0xa1: RegAddr(MOV, eAX, data_size_, data_size_);			break;
+			case 0xa2: AddrReg(MOV, eAX, DataSize::Byte, DataSize::Byte);	break;
+			case 0xa3: AddrReg(MOV, eAX, data_size_, data_size_);			break;
 
-			case 0xa4: Complete(MOVS, None, None, 1);	break;
-			case 0xa5: Complete(MOVS, None, None, 2);	break;
-			case 0xa6: Complete(CMPS, None, None, 1);	break;
-			case 0xa7: Complete(CMPS, None, None, 2);	break;
-			case 0xa8: RegData(TEST, eAX, 1);			break;
-			case 0xa9: RegData(TEST, eAX, 2);			break;
-			case 0xaa: Complete(STOS, None, None, 1);	break;
-			case 0xab: Complete(STOS, None, None, 2);	break;
-			case 0xac: Complete(LODS, None, None, 1);	break;
-			case 0xad: Complete(LODS, None, None, 2);	break;
-			case 0xae: Complete(SCAS, None, None, 1);	break;
-			case 0xaf: Complete(SCAS, None, None, 2);	break;
+			case 0xa4: Complete(MOVS, None, None, DataSize::Byte);	break;
+			case 0xa5: Complete(MOVS, None, None, data_size_);		break;
+			case 0xa6: Complete(CMPS, None, None, DataSize::Byte);	break;
+			case 0xa7: Complete(CMPS, None, None, data_size_);		break;
+			case 0xa8: RegData(TEST, eAX, DataSize::Byte);			break;
+			case 0xa9: RegData(TEST, eAX, data_size_);				break;
+			case 0xaa: Complete(STOS, None, None, DataSize::Byte);	break;
+			case 0xab: Complete(STOS, None, None, data_size_);		break;
+			case 0xac: Complete(LODS, None, None, DataSize::Byte);	break;
+			case 0xad: Complete(LODS, None, None, data_size_);		break;
+			case 0xae: Complete(SCAS, None, None, DataSize::Byte);	break;
+			case 0xaf: Complete(SCAS, None, None, data_size_);		break;
 
-			case 0xb0: RegData(MOV, eAX, 1);	break;
-			case 0xb1: RegData(MOV, eCX, 1);	break;
-			case 0xb2: RegData(MOV, eDX, 1);	break;
-			case 0xb3: RegData(MOV, eBX, 1);	break;
-			case 0xb4: RegData(MOV, AH, 1);		break;
-			case 0xb5: RegData(MOV, CH, 1);		break;
-			case 0xb6: RegData(MOV, DH, 1);		break;
-			case 0xb7: RegData(MOV, BH, 1);		break;
-			case 0xb8: RegData(MOV, eAX, 2);	break;
-			case 0xb9: RegData(MOV, eCX, 2);	break;
-			case 0xba: RegData(MOV, eDX, 2);	break;
-			case 0xbb: RegData(MOV, eBX, 2);	break;
-			case 0xbc: RegData(MOV, eSP, 2);	break;
-			case 0xbd: RegData(MOV, eBP, 2);	break;
-			case 0xbe: RegData(MOV, eSI, 2);	break;
-			case 0xbf: RegData(MOV, eDI, 2);	break;
+			case 0xb0: RegData(MOV, eAX, DataSize::Byte);	break;
+			case 0xb1: RegData(MOV, eCX, DataSize::Byte);	break;
+			case 0xb2: RegData(MOV, eDX, DataSize::Byte);	break;
+			case 0xb3: RegData(MOV, eBX, DataSize::Byte);	break;
+			case 0xb4: RegData(MOV, AH, DataSize::Byte);	break;
+			case 0xb5: RegData(MOV, CH, DataSize::Byte);	break;
+			case 0xb6: RegData(MOV, DH, DataSize::Byte);	break;
+			case 0xb7: RegData(MOV, BH, DataSize::Byte);	break;
+			case 0xb8: RegData(MOV, eAX, data_size_);		break;
+			case 0xb9: RegData(MOV, eCX, data_size_);		break;
+			case 0xba: RegData(MOV, eDX, data_size_);		break;
+			case 0xbb: RegData(MOV, eBX, data_size_);		break;
+			case 0xbc: RegData(MOV, eSP, data_size_);		break;
+			case 0xbd: RegData(MOV, eBP, data_size_);		break;
+			case 0xbe: RegData(MOV, eSI, data_size_);		break;
+			case 0xbf: RegData(MOV, eDI, data_size_);		break;
 
-			case 0xc2: RegData(RETN, None, 2);			break;
-			case 0xc3: Complete(RETN, None, None, 2);	break;
-			case 0xc4: MemRegReg(LES, Reg_MemReg, 2);	break;
-			case 0xc5: MemRegReg(LDS, Reg_MemReg, 2);	break;
-			case 0xc6: MemRegReg(MOV, MemRegMOV, 1);	break;
-			case 0xc7: MemRegReg(MOV, MemRegMOV, 2);	break;
+			case 0xc2: RegData(RETN, None, data_size_);				break;
+			case 0xc3: Complete(RETN, None, None, DataSize::None);	break;
+			case 0xc4: MemRegReg(LES, Reg_MemReg, data_size_);		break;
+			case 0xc5: MemRegReg(LDS, Reg_MemReg, data_size_);		break;
+			case 0xc6: MemRegReg(MOV, MemRegMOV, DataSize::Byte);	break;
+			case 0xc7: MemRegReg(MOV, MemRegMOV, data_size_);		break;
 
 			case 0xc8:
 				RequiresMin(i80186);
@@ -305,106 +310,108 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			break;
 			case 0xc9:
 				RequiresMin(i80186);
-				Complete(LEAVE, None, None, 0);
+				Complete(LEAVE, None, None, DataSize::None);
 			break;
 
-			case 0xca: RegData(RETF, None, 2);			break;
-			case 0xcb: Complete(RETF, None, None, 4);	break;
+			case 0xca: RegData(RETF, None, data_size_);				break;
+			case 0xcb: Complete(RETF, None, None, DataSize::DWord);	break;
 
-			case 0xcc: Complete(INT3, None, None, 0);	break;
-			case 0xcd: RegData(INT, None, 1);			break;
-			case 0xce: Complete(INTO, None, None, 0);	break;
-			case 0xcf: Complete(IRET, None, None, 0);	break;
+			case 0xcc: Complete(INT3, None, None, DataSize::None);	break;
+			case 0xcd: RegData(INT, None, DataSize::Byte);			break;
+			case 0xce: Complete(INTO, None, None, DataSize::None);	break;
+			case 0xcf: Complete(IRET, None, None, DataSize::None);	break;
 
-			case 0xd0: case 0xd1:
+			case 0xd0: case 0xd1: {
+				const DataSize sizes[] = {DataSize::Byte, data_size_};
 				phase_ = Phase::ModRegRM;
 				modregrm_format_ = ModRegRMFormat::MemRegROL_to_SAR;
-				operation_size_ = 1 + (instr_ & 1);
+				operation_size_ = sizes[instr & 1];
 				source_ = Source::Immediate;
 				operand_ = 1;
-			break;
-			case 0xd2: case 0xd3:
+			} break;
+			case 0xd2: case 0xd3: {
+				const DataSize sizes[] = {DataSize::Byte, data_size_};
 				phase_ = Phase::ModRegRM;
 				modregrm_format_ = ModRegRMFormat::MemRegROL_to_SAR;
-				operation_size_ = 1 + (instr_ & 1);
+				operation_size_ = sizes[instr & 1];
 				source_ = Source::eCX;
-			break;
-			case 0xd4: RegData(AAM, eAX, 1);			break;
-			case 0xd5: RegData(AAD, eAX, 1);			break;
+			} break;
+			case 0xd4: RegData(AAM, eAX, DataSize::Byte);			break;
+			case 0xd5: RegData(AAD, eAX, DataSize::Byte);			break;
 
-			case 0xd7: Complete(XLAT, None, None, 1);	break;
+			case 0xd7: Complete(XLAT, None, None, DataSize::Byte);	break;
 
-			case 0xd8: MemRegReg(ESC, MemReg_Reg, 0);	break;
-			case 0xd9: MemRegReg(ESC, MemReg_Reg, 0);	break;
-			case 0xda: MemRegReg(ESC, MemReg_Reg, 0);	break;
-			case 0xdb: MemRegReg(ESC, MemReg_Reg, 0);	break;
-			case 0xdc: MemRegReg(ESC, MemReg_Reg, 0);	break;
-			case 0xdd: MemRegReg(ESC, MemReg_Reg, 0);	break;
-			case 0xde: MemRegReg(ESC, MemReg_Reg, 0);	break;
-			case 0xdf: MemRegReg(ESC, MemReg_Reg, 0);	break;
+			case 0xd8: MemRegReg(ESC, MemReg_Reg, DataSize::None);	break;
+			case 0xd9: MemRegReg(ESC, MemReg_Reg, DataSize::None);	break;
+			case 0xda: MemRegReg(ESC, MemReg_Reg, DataSize::None);	break;
+			case 0xdb: MemRegReg(ESC, MemReg_Reg, DataSize::None);	break;
+			case 0xdc: MemRegReg(ESC, MemReg_Reg, DataSize::None);	break;
+			case 0xdd: MemRegReg(ESC, MemReg_Reg, DataSize::None);	break;
+			case 0xde: MemRegReg(ESC, MemReg_Reg, DataSize::None);	break;
+			case 0xdf: MemRegReg(ESC, MemReg_Reg, DataSize::None);	break;
 
-			case 0xe0: Jump(LOOPNE);	break;
-			case 0xe1: Jump(LOOPE);		break;
-			case 0xe2: Jump(LOOP);		break;
-			case 0xe3: Jump(JPCX);		break;
+			case 0xe0: Jump(LOOPNE, DataSize::Byte);	break;
+			case 0xe1: Jump(LOOPE, DataSize::Byte);		break;
+			case 0xe2: Jump(LOOP, DataSize::Byte);		break;
+			case 0xe3: Jump(JPCX, DataSize::Byte);		break;
 
-			case 0xe4: RegAddr(IN, eAX, 1, 1);	break;
-			case 0xe5: RegAddr(IN, eAX, 2, 1);	break;
-			case 0xe6: AddrReg(OUT, eAX, 1, 1);	break;
-			case 0xe7: AddrReg(OUT, eAX, 2, 1);	break;
+			case 0xe4: RegAddr(IN, eAX, DataSize::Byte, DataSize::Byte);	break;
+			case 0xe5: RegAddr(IN, eAX, data_size_, DataSize::Byte);		break;
+			case 0xe6: AddrReg(OUT, eAX, DataSize::Byte, DataSize::Byte);	break;
+			case 0xe7: AddrReg(OUT, eAX, data_size_, DataSize::Byte);		break;
 
-			case 0xe8: RegData(CALLD, None, 2);	break;
-			case 0xe9: RegData(JMPN, None, 2);	break;
-			case 0xea: Far(JMPF);				break;
-			case 0xeb: Jump(JMPN);				break;
+			case 0xe8: RegData(CALLD, None, data_size_);	break;
+			case 0xe9: RegData(JMPN, None, data_size_);		break;
+			case 0xea: Far(JMPF);							break;
+			case 0xeb: Jump(JMPN, DataSize::Byte);			break;
 
-			case 0xec: Complete(IN, eDX, eAX, 1);	break;
-			case 0xed: Complete(IN, eDX, eAX, 1);	break;
-			case 0xee: Complete(OUT, eAX, eDX, 1);	break;
-			case 0xef: Complete(OUT, eAX, eDX, 2);	break;
+			case 0xec: Complete(IN, eDX, eAX, DataSize::Byte);	break;
+			case 0xed: Complete(IN, eDX, eAX, data_size_);		break;
+			case 0xee: Complete(OUT, eAX, eDX, DataSize::Byte);	break;
+			case 0xef: Complete(OUT, eAX, eDX, data_size_);		break;
 
 			case 0xf0: lock_ = true;					break;
 			case 0xf2: repetition_ = Repetition::RepNE;	break;
 			case 0xf3: repetition_ = Repetition::RepE;	break;
 
-			case 0xf4: Complete(HLT, None, None, 1);				break;
-			case 0xf5: Complete(CMC, None, None, 1);				break;
-			case 0xf6: MemRegReg(Invalid, MemRegTEST_to_IDIV, 1);	break;
-			case 0xf7: MemRegReg(Invalid, MemRegTEST_to_IDIV, 2);	break;
+			case 0xf4: Complete(HLT, None, None, DataSize::None);				break;
+			case 0xf5: Complete(CMC, None, None, DataSize::None);				break;
+			case 0xf6: MemRegReg(Invalid, MemRegTEST_to_IDIV, DataSize::Byte);	break;
+			case 0xf7: MemRegReg(Invalid, MemRegTEST_to_IDIV, data_size_);		break;
 
-			case 0xf8: Complete(CLC, None, None, 1);	break;
-			case 0xf9: Complete(STC, None, None, 1);	break;
-			case 0xfa: Complete(CLI, None, None, 1);	break;
-			case 0xfb: Complete(STI, None, None, 1);	break;
-			case 0xfc: Complete(CLD, None, None, 1);	break;
-			case 0xfd: Complete(STD, None, None, 1);	break;
+			case 0xf8: Complete(CLC, None, None, DataSize::None);	break;
+			case 0xf9: Complete(STC, None, None, DataSize::None);	break;
+			case 0xfa: Complete(CLI, None, None, DataSize::None);	break;
+			case 0xfb: Complete(STI, None, None, DataSize::None);	break;
+			case 0xfc: Complete(CLD, None, None, DataSize::None);	break;
+			case 0xfd: Complete(STD, None, None, DataSize::None);	break;
 
-			case 0xfe: MemRegReg(Invalid, MemRegINC_DEC, 1);		break;
-			case 0xff: MemRegReg(Invalid, MemRegINC_to_PUSH, 1);	break;
+			case 0xfe: MemRegReg(Invalid, MemRegINC_DEC, DataSize::Byte);	break;
+			case 0xff: MemRegReg(Invalid, MemRegINC_to_PUSH, data_size_);	break;
 		}
 	}
 
 	// MARK: - Additional F page of instructions.
 	if(phase_ == Phase::InstructionPageF && source != end) {
 		// Update the instruction acquired.
-		instr_ = 0x0f00 | *source;
+		const uint8_t instr = *source;
 		++source;
 		++consumed_;
 
 		// NB: to reach here, the instruction set must be at least
 		// that of an 80286.
-		switch(instr_) {
+		switch(instr) {
 			default: undefined();
 
-			case 0x00:	MemRegReg(Invalid, MemRegSLDT_to_VERW, 2);	break;
-			case 0x01:	MemRegReg(Invalid, MemRegSGDT_to_LMSW, 2);	break;
-			case 0x02:	MemRegReg(LAR, Reg_MemReg, 2);				break;
-			case 0x03:	MemRegReg(LSL, Reg_MemReg, 2);				break;
+			case 0x00:	MemRegReg(Invalid, MemRegSLDT_to_VERW, data_size_);	break;
+			case 0x01:	MemRegReg(Invalid, MemRegSGDT_to_LMSW, data_size_);	break;
+			case 0x02:	MemRegReg(LAR, Reg_MemReg, data_size_);				break;
+			case 0x03:	MemRegReg(LSL, Reg_MemReg, data_size_);				break;
 			case 0x05:
 				Requires(i80286);
-				Complete(LOADALL, None, None, 0);
+				Complete(LOADALL, None, None, DataSize::None);
 			break;
-			case 0x06:	Complete(CLTS, None, None, 1);				break;
+			case 0x06:	Complete(CLTS, None, None, DataSize::Byte);			break;
 		}
 	}
 
@@ -429,19 +436,15 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 		++consumed_;
 
 		Source memreg;
-		constexpr Source reg_table[3][8] = {
-			{},
-			{
-				Source::eAX,	Source::eCX,	Source::eDX,	Source::eBX,
-				Source::AH,		Source::CH,		Source::DH,		Source::BH,
-			}, {
-				Source::eAX,	Source::eCX,	Source::eDX,	Source::eBX,
-				Source::eSP,	Source::eBP,	Source::eSI,	Source::eDI,
-			}
+		constexpr Source reg_table[8] = {
+			Source::eAX,		Source::eCX,		Source::eDX,		Source::eBX,
+			Source::eSPorAH,	Source::eBPorCH,	Source::eSIorDH,	Source::eDIorBH,
 		};
 		switch(mod) {
-			default:
-				displacement_size_ = 1 + (mod == 2);
+			default: {
+				const DataSize sizes[] = {DataSize::Byte, data_size_};
+				displacement_size_ = sizes[mod == 2];
+			}
 				[[fallthrough]];
 			case 0: {
 				constexpr ScaleIndexBase rm_table[8] = {
@@ -461,7 +464,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 
 			// Other operand is just a register.
 			case 3:
-				memreg = reg_table[operation_size_][rm];
+				memreg = reg_table[rm];
 
 				// LES and LDS accept a memory argument only, not a register.
 				if(operation_ == Operation::LES || operation_ == Operation::LDS) {
@@ -475,9 +478,9 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case ModRegRMFormat::MemReg_Reg: {
 				if(modregrm_format_ == ModRegRMFormat::Reg_MemReg) {
 					source_ = memreg;
-					destination_ = reg_table[operation_size_][reg];
+					destination_ = reg_table[reg];
 				} else {
-					source_ = reg_table[operation_size_][reg];
+					source_ = reg_table[reg];
 					destination_ = memreg;
 				}
 			} break;
@@ -551,13 +554,13 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 					case 2:		operation_ = Operation::CALLN;	break;
 					case 3:
 						operation_ = Operation::CALLF;
-						operand_size_ = 4;
+						operand_size_ = DataSize::DWord;
 						source_ = Source::Immediate;
 					break;
 					case 4:		operation_ = Operation::JMPN;	break;
 					case 5:
 						operation_ = Operation::JMPF;
-						operand_size_ = 4;
+						operand_size_ = DataSize::DWord;
 						source_ = Source::Immediate;
 					break;
 					case 6:	operation_ = Operation::PUSH;		break;
@@ -597,8 +600,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case ModRegRMFormat::MemRegADC_to_CMP:
 				destination_ = memreg;
 				source_ = Source::Immediate;
-				operand_size_ = 1;	// ... and always 1; it'll be sign extended if
-									// the operation requires it.
+				operand_size_ = DataSize::Byte;	// ... and always a byte; it'll be sign extended if
+												// the operation requires it.
 
 				switch(reg) {
 					default: undefined();
@@ -642,7 +645,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			default: assert(false);
 		}
 
-		phase_ = (displacement_size_ + operand_size_) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
+		phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
 	}
 
 #undef undefined
@@ -658,15 +661,15 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 	// MARK: - Displacement and operand.
 
 	if(phase_ == Phase::DisplacementOrOperand && source != end) {
-		const int required_bytes = displacement_size_ + operand_size_;
+		const auto required_bytes = int(byte_size(displacement_size_) + byte_size(operand_size_));
 
 		const int outstanding_bytes = required_bytes - operand_bytes_;
 		const int bytes_to_consume = std::min(int(end - source), outstanding_bytes);
 
-		// TODO: I can surely do better than this?
 		for(int c = 0; c < bytes_to_consume; c++) {
-			inward_data_ = (inward_data_ >> 8) | (uint64_t(source[0]) << 56);
+			inward_data_ |= decltype(inward_data_)(source[0]) << next_inward_data_shift_;
 			++source;
+			next_inward_data_shift_ += 8;
 		}
 
 		consumed_ += bytes_to_consume;
@@ -675,25 +678,28 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 		if(bytes_to_consume == outstanding_bytes) {
 			phase_ = Phase::ReadyToPost;
 
-			switch(operand_size_) {
-				default:	operand_ = 0;										break;
-				case 1:
-					operand_ = inward_data_ >> 56; inward_data_ <<= 8;
-
-					// Sign extend if a single byte operand is feeding a two-byte instruction.
-					if(operation_size_ == 2 && operation_ != Operation::IN && operation_ != Operation::OUT) {
-						operand_ |= (operand_ & 0x80) ? 0xff00 : 0x0000;
-					}
-				break;
-				case 4:		displacement_size_ = 2;								[[fallthrough]];
-				case 2:		operand_ = inward_data_ >> 48; inward_data_ <<= 16;	break;
-				break;
-			}
 			switch(displacement_size_) {
-				default:	displacement_ = 0;									break;
-				case 1:		displacement_ = int8_t(inward_data_ >> 56);			break;
-				case 2:		displacement_ = int16_t(inward_data_ >> 48);		break;
+				case DataSize::None:	displacement_ = 0;						break;
+				case DataSize::Byte:	displacement_ = int8_t(inward_data_);	break;
+				case DataSize::Word:	displacement_ = int16_t(inward_data_);	break;
+				case DataSize::DWord:	displacement_ = int32_t(inward_data_);	break;
 			}
+			inward_data_ >>= bit_size(displacement_size_);
+
+			// Use inequality of sizes as a test for necessary sign extension.
+			if(operand_size_ == data_size_ || !sign_extend_) {
+				operand_ = decltype(operand_)(inward_data_);
+			} else {
+				switch(operand_size_) {
+					case DataSize::None:	operand_ = 0;											break;
+					case DataSize::Byte:	operand_ = decltype(operand_)(int8_t(inward_data_));	break;
+					case DataSize::Word:	operand_ = decltype(operand_)(int16_t(inward_data_));	break;
+					case DataSize::DWord:	operand_ = decltype(operand_)(int32_t(inward_data_));	break;
+				}
+			}
+
+			// TODO: split differently for far jumps/etc. But that information is
+			// no longer retained now that it's not implied by a DWord-sized operand.
 		} else {
 			// Provide a genuine measure of further bytes required.
 			return std::make_pair(-(outstanding_bytes - bytes_to_consume), InstructionT());
@@ -715,8 +721,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				segment_override_,
 				repetition_,
 				DataSize(operation_size_),
-				displacement_,
-				operand_)
+				static_cast<typename InstructionT::DisplacementT>(displacement_),
+				static_cast<typename InstructionT::ImmediateT>(operand_))
 		);
 		reset_parsing();
 		return result;
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 7ce295f93..b50f7acd3 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -130,7 +130,6 @@ template <Model model> class Decoder {
 
 		// Ephemeral decoding state.
 		Operation operation_ = Operation::Invalid;
-		uint16_t instr_ = 0x0000;	// TODO: is this desired, versus loading more context into ModRegRMFormat?
 		int consumed_ = 0, operand_bytes_ = 0;
 
 		// Source and destination locations.
@@ -138,17 +137,21 @@ template <Model model> class Decoder {
 		Source destination_ = Source::None;
 
 		// Immediate fields.
-		int16_t displacement_ = 0;
-		uint16_t operand_ = 0;
+		int32_t displacement_ = 0;
+		uint32_t operand_ = 0;
 		uint64_t inward_data_ = 0;
+		int next_inward_data_shift_ = 0;
 
 		// Indirection style.
 		ScaleIndexBase sib_;
 
 		// Facts about the instruction.
-		int displacement_size_ = 0;		// i.e. size of in-stream displacement, if any.
-		int operand_size_ = 0;			// i.e. size of in-stream operand, if any.
-		int operation_size_ = 0;		// i.e. size of data manipulated by the operation.
+		DataSize displacement_size_ = DataSize::None;	// i.e. size of in-stream displacement, if any.
+		DataSize operand_size_ = DataSize::None;		// i.e. size of in-stream operand, if any.
+		DataSize operation_size_ = DataSize::None;		// i.e. size of data manipulated by the operation.
+
+		bool sign_extend_ = false;						// If set then sign extend the operand up to the operation size;
+														// otherwise it'll be zero-padded.
 
 		// Prefix capture fields.
 		Repetition repetition_ = Repetition::None;
@@ -164,7 +167,7 @@ template <Model model> class Decoder {
 		/// Resets size capture and all fields with default values.
 		void reset_parsing() {
 			consumed_ = operand_bytes_ = 0;
-			displacement_size_ = operand_size_ = 0;
+			displacement_size_ = operand_size_ = operation_size_ = DataSize::None;
 			displacement_ = operand_ = 0;
 			lock_ = false;
 			address_size_ = default_address_size_;
@@ -173,7 +176,10 @@ template <Model model> class Decoder {
 			repetition_ = Repetition::None;
 			phase_ = Phase::Instruction;
 			source_ = destination_ = Source::None;
-			sib_ = 0;
+			sib_ = ScaleIndexBase();
+			next_inward_data_shift_ = 0;
+			inward_data_ = 0;
+			sign_extend_ = false;
 		}
 };
 
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 1240756a6..85832be60 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -322,17 +322,33 @@ enum class Operation: uint8_t {
 };
 
 enum class DataSize: uint8_t {
-	Implied = 0,
-	Byte = 1,
-	Word = 2,
-	DWord = 3,
+	Byte = 0,
+	Word = 1,
+	DWord = 2,
+	None = 3,
 };
 
+constexpr int byte_size(DataSize size) {
+	return (1 << int(size)) & 7;
+}
+
+constexpr int bit_size(DataSize size) {
+	return (8 << int(size)) & 0x3f;
+}
+
 enum class AddressSize: uint8_t {
 	b16 = 0,
 	b32 = 1,
 };
 
+constexpr int byte_size(AddressSize size) {
+	return 2 << int(size);
+}
+
+constexpr int bit_size(AddressSize size) {
+	return 16 << int(size);
+}
+
 enum class Source: uint8_t {
 	// These are in SIB order; this matters for packing later on.
 
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index cc91cb5ed..a9ef904df 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -14,25 +14,20 @@
 #include "../../../InstructionSets/x86/Decoder.hpp"
 #include "../../../InstructionSets/x86/DataPointerResolver.hpp"
 
-namespace {
+using namespace InstructionSet::x86;
 
-using Operation = InstructionSet::x86::Operation;
-using Instruction = InstructionSet::x86::Instruction<false>;
-using Model = InstructionSet::x86::Model;
-using Source = InstructionSet::x86::Source;
-using Size = InstructionSet::x86::DataSize;
-using ScaleIndexBase = InstructionSet::x86::ScaleIndexBase;
+namespace {
 
 // MARK: - Specific instruction asserts.
 
-template <typename InstructionT> void test(const InstructionT &instruction, int size, Operation operation) {
+template <typename InstructionT> void test(const InstructionT &instruction, DataSize size, Operation operation) {
 	XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::DataSize(size));
 	XCTAssertEqual(instruction.operation, operation);
 }
 
 template <typename InstructionT> void test(
 	const InstructionT &instruction,
-	int size,
+	DataSize size,
 	Operation operation,
 	InstructionSet::x86::DataPointer source,
 	std::optional<InstructionSet::x86::DataPointer> destination = std::nullopt,
@@ -134,10 +129,10 @@ template <Model model> std::vector<typename InstructionSet::x86::Decoder<model>:
 	// jb		0x00000001
 	// dec		%bx
 	// mov		$0x28,%ch
-	test(instructions[0], 2, Operation::SUB, Source::Immediate, Source::eAX, 0xea77);
+	test(instructions[0], DataSize::Word, Operation::SUB, Source::Immediate, Source::eAX, 0xea77);
 	test(instructions[1], Operation::JB, std::nullopt, 0xfffc);
-	test(instructions[2], 2, Operation::DEC, Source::eBX, Source::eBX);
-	test(instructions[3], 1, Operation::MOV, Source::Immediate, Source::CH, 0x28);
+	test(instructions[2], DataSize::Word, Operation::DEC, Source::eBX, Source::eBX);
+	test(instructions[3], DataSize::Byte, Operation::MOV, Source::Immediate, Source::CH, 0x28);
 
 	// ret
 	// lret		$0x4826
@@ -153,10 +148,10 @@ template <Model model> std::vector<typename InstructionSet::x86::Decoder<model>:
 	// out		%ax,(%dx)
 	// jo		0x00000037
 	// xchg		%ax,%sp
-	test(instructions[8], 2, Operation::DEC, Source::eSI, Source::eSI);
-	test(instructions[9], 2, Operation::OUT, Source::eAX, Source::eDX);
+	test(instructions[8], DataSize::Word, Operation::DEC, Source::eSI, Source::eSI);
+	test(instructions[9], DataSize::Word, Operation::OUT, Source::eAX, Source::eDX);
 	test(instructions[10], Operation::JO, std::nullopt, 0x20);
-	test(instructions[11], 2, Operation::XCHG, Source::eAX, Source::eSP);
+	test(instructions[11], DataSize::Word, Operation::XCHG, Source::eAX, Source::eSP);
 
 	// ODA has:
 	// 	c4		(bad)
@@ -168,25 +163,25 @@ template <Model model> std::vector<typename InstructionSet::x86::Decoder<model>:
 	//	c4 d4	(bad)
 	//	93		XCHG AX, BX
 	test(instructions[12], Operation::Invalid);
-	test(instructions[13], 2, Operation::XCHG, Source::eAX, Source::eBX);
+	test(instructions[13], DataSize::Word, Operation::XCHG, Source::eAX, Source::eBX);
 
 	// inc		%bx
 	// cmp		$0x8e,%al
 	// [[ omitted: push		$0x65 ]]
 	// sbb		0x45(%bx,%si),%bh
 	// adc		%bh,0x3c(%bx)
-	test(instructions[14], 2, Operation::INC, Source::eBX, Source::eBX);
-	test(instructions[15], 1, Operation::CMP, Source::Immediate, Source::eAX, 0x8e);
-	test(instructions[16], 1, Operation::SBB, ScaleIndexBase(Source::eBX, Source::eSI), Source::BH, std::nullopt, 0x45);
-	test(instructions[17], 1, Operation::ADC, Source::BH, ScaleIndexBase(Source::eBX), std::nullopt, 0x3c);
+	test(instructions[14], DataSize::Word, Operation::INC, Source::eBX, Source::eBX);
+	test(instructions[15], DataSize::Byte, Operation::CMP, Source::Immediate, Source::eAX, 0x8e);
+	test(instructions[16], DataSize::Byte, Operation::SBB, ScaleIndexBase(Source::eBX, Source::eSI), Source::BH, std::nullopt, 0x45);
+	test(instructions[17], DataSize::Byte, Operation::ADC, Source::BH, ScaleIndexBase(Source::eBX), std::nullopt, 0x3c);
 
 	// sbb		%bx,0x16(%bp,%si)
 	// xor		%sp,0x2c(%si)
 	// out		%ax,$0xc6
 	// jge		0xffffffe0
-	test(instructions[18], 2, Operation::SBB, Source::eBX, ScaleIndexBase(Source::eBP, Source::eSI), std::nullopt, 0x16);
-	test(instructions[19], 2, Operation::XOR, Source::eSP, ScaleIndexBase(Source::eSI), std::nullopt, 0x2c);
-	test(instructions[20], 2, Operation::OUT, Source::eAX, Source::DirectAddress, 0xc6);
+	test(instructions[18], DataSize::Word, Operation::SBB, Source::eBX, ScaleIndexBase(Source::eBP, Source::eSI), std::nullopt, 0x16);
+	test(instructions[19], DataSize::Word, Operation::XOR, Source::eSP, ScaleIndexBase(Source::eSI), std::nullopt, 0x2c);
+	test(instructions[20], DataSize::Word, Operation::OUT, Source::eAX, Source::DirectAddress, 0xc6);
 	test(instructions[21], Operation::JNL, std::nullopt, 0xffb0);
 
 	// mov		$0x49,%ch
@@ -194,38 +189,38 @@ template <Model model> std::vector<typename InstructionSet::x86::Decoder<model>:
 	// mov		$0xcbc0,%dx
 	// adc		$0x7e,%al
 	// jno		0x0000000b
-	test(instructions[22], 1, Operation::MOV, Source::Immediate, Source::CH, 0x49);
-	test(instructions[23], 2, Operation::MOV, Source::Immediate, Source::eDX, 0xcbc0);
-	test(instructions[24], 1, Operation::ADC, Source::Immediate, Source::eAX, 0x7e);
+	test(instructions[22], DataSize::Byte, Operation::MOV, Source::Immediate, Source::CH, 0x49);
+	test(instructions[23], DataSize::Word, Operation::MOV, Source::Immediate, Source::eDX, 0xcbc0);
+	test(instructions[24], DataSize::Byte, Operation::ADC, Source::Immediate, Source::eAX, 0x7e);
 	test(instructions[25], Operation::JNO, std::nullopt, 0xffd0);
 
 	// push		%ax
 	// js		0x0000007b
 	// add		(%di),%bx
 	// in		$0xc9,%ax
-	test(instructions[26], 2, Operation::PUSH, Source::eAX);
+	test(instructions[26], DataSize::Word, Operation::PUSH, Source::eAX);
 	test(instructions[27], Operation::JS, std::nullopt, 0x3d);
-	test(instructions[28], 2, Operation::ADD, ScaleIndexBase(Source::eDI), Source::eBX);
-	test(instructions[29], 2, Operation::IN, Source::DirectAddress, Source::eAX, 0xc9);
+	test(instructions[28], DataSize::Word, Operation::ADD, ScaleIndexBase(Source::eDI), Source::eBX);
+	test(instructions[29], DataSize::Word, Operation::IN, Source::DirectAddress, Source::eAX, 0xc9);
 
 	// xchg		%ax,%di
 	// ret
 	// fwait
 	// out		%al,$0xd3
-	test(instructions[30], 2, Operation::XCHG, Source::eAX, Source::eDI);
+	test(instructions[30], DataSize::Word, Operation::XCHG, Source::eAX, Source::eDI);
 	test(instructions[31], Operation::RETN);
 	test(instructions[32], Operation::WAIT);
-	test(instructions[33], 1, Operation::OUT, Source::eAX, Source::DirectAddress, 0xd3);
+	test(instructions[33], DataSize::Byte, Operation::OUT, Source::eAX, Source::DirectAddress, 0xd3);
 
 	// [[ omitted: insb		(%dx),%es:(%di) ]]
 	// pop		%ax
 	// dec		%bp
 	// jbe		0xffffffcc
 	// inc		%sp
-	test(instructions[34], 2, Operation::POP, Source::eAX);
-	test(instructions[35], 2, Operation::DEC, Source::eBP, Source::eBP);
+	test(instructions[34], DataSize::Word, Operation::POP, Source::eAX);
+	test(instructions[35], DataSize::Word, Operation::DEC, Source::eBP, Source::eBP);
 	test(instructions[36], Operation::JBE, std::nullopt, 0xff80);
-	test(instructions[37], 2, Operation::INC, Source::eSP, Source::eSP);
+	test(instructions[37], DataSize::Word, Operation::INC, Source::eSP, Source::eSP);
 
 	// (bad)
 	// lahf
@@ -233,15 +228,15 @@ template <Model model> std::vector<typename InstructionSet::x86::Decoder<model>:
 	// mov		$0x12a1,%bp
 	test(instructions[38], Operation::Invalid);
 	test(instructions[39], Operation::LAHF);
-	test(instructions[40], 2, Operation::MOVS); /* Arguments are implicit. */
-	test(instructions[41], 2, Operation::MOV, Source::Immediate, Source::eBP, 0x12a1);
+	test(instructions[40], DataSize::Word, Operation::MOVS); // Arguments are implicit.
+	test(instructions[41], DataSize::Word, Operation::MOV, Source::Immediate, Source::eBP, 0x12a1);
 
 	// lds		(%bx,%di),%bp
 	// [[ omitted: leave ]]
 	// sahf
 	// fdiv		%st(3),%st
 	// iret
-	test(instructions[42], 2, Operation::LDS);
+	test(instructions[42], DataSize::Word, Operation::LDS);
 	test(instructions[43], Operation::SAHF);
 	test(instructions[44], Operation::ESC);
 	test(instructions[45], Operation::IRET);
@@ -250,40 +245,40 @@ template <Model model> std::vector<typename InstructionSet::x86::Decoder<model>:
 	// cmp		%bx,-0x70(%di)
 	// adc		$0xb8c3,%ax
 	// lods		%ds:(%si),%ax
-	test(instructions[46], 2, Operation::XCHG, Source::eAX, Source::eDX);
-	test(instructions[47], 2, Operation::CMP, Source::eBX, ScaleIndexBase(Source::eDI), std::nullopt, 0xff90);
-	test(instructions[48], 2, Operation::ADC, Source::Immediate, Source::eAX, 0xb8c3);
-	test(instructions[49], 2, Operation::LODS);
+	test(instructions[46], DataSize::Word, Operation::XCHG, Source::eAX, Source::eDX);
+	test(instructions[47], DataSize::Word, Operation::CMP, Source::eBX, ScaleIndexBase(Source::eDI), std::nullopt, 0xff90);
+	test(instructions[48], DataSize::Word, Operation::ADC, Source::Immediate, Source::eAX, 0xb8c3);
+	test(instructions[49], DataSize::Word, Operation::LODS);
 
 	// call		0x0000172d
 	// dec		%dx
 	// mov		$0x9e,%al
 	// stc
 	test(instructions[50], Operation::CALLD, uint16_t(0x16c8));
-	test(instructions[51], 2, Operation::DEC, Source::eDX, Source::eDX);
-	test(instructions[52], 1, Operation::MOV, Source::Immediate, Source::eAX, 0x9e);
+	test(instructions[51], DataSize::Word, Operation::DEC, Source::eDX, Source::eDX);
+	test(instructions[52], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eAX, 0x9e);
 	test(instructions[53], Operation::STC);
 
 	// mov		$0xea56,%di
 	// dec		%si
 	// std
 	// in		$0x5a,%al
-	test(instructions[54], 2, Operation::MOV, Source::Immediate, Source::eDI, 0xea56);
-	test(instructions[55], 2, Operation::DEC, Source::eSI, Source::eSI);
+	test(instructions[54], DataSize::Word, Operation::MOV, Source::Immediate, Source::eDI, 0xea56);
+	test(instructions[55], DataSize::Word, Operation::DEC, Source::eSI, Source::eSI);
 	test(instructions[56], Operation::STD);
-	test(instructions[57], 1, Operation::IN, Source::DirectAddress, Source::eAX, 0x5a);
+	test(instructions[57], DataSize::Byte, Operation::IN, Source::DirectAddress, Source::eAX, 0x5a);
 
 	// and		0x5b2c(%bp,%si),%bp
 	// sub		%dl,%dl
 	// negw		0x18(%bx)
 	// xchg		%dl,0x6425(%bx,%si)
-	test(instructions[58], 2, Operation::AND, ScaleIndexBase(Source::eBP, Source::eSI), Source::eBP, std::nullopt, 0x5b2c);
-	test(instructions[59], 1, Operation::SUB, Source::eDX, Source::eDX);
-	test(instructions[60], 2, Operation::NEG, ScaleIndexBase(Source::eBX), ScaleIndexBase(Source::eBX), std::nullopt, 0x18);
-	test(instructions[61], 1, Operation::XCHG, ScaleIndexBase(Source::eBX, Source::eSI), Source::eDX, std::nullopt, 0x6425);
+	test(instructions[58], DataSize::Word, Operation::AND, ScaleIndexBase(Source::eBP, Source::eSI), Source::eBP, std::nullopt, 0x5b2c);
+	test(instructions[59], DataSize::Byte, Operation::SUB, Source::eDX, Source::eDX);
+	test(instructions[60], DataSize::Word, Operation::NEG, ScaleIndexBase(Source::eBX), ScaleIndexBase(Source::eBX), std::nullopt, 0x18);
+	test(instructions[61], DataSize::Byte, Operation::XCHG, ScaleIndexBase(Source::eBX, Source::eSI), Source::eDX, std::nullopt, 0x6425);
 
 	// mov		$0xc3,%bh
-	test(instructions[62], 1, Operation::MOV, Source::Immediate, Source::BH, 0xc3);
+	test(instructions[62], DataSize::Byte, Operation::MOV, Source::Immediate, Source::BH, 0xc3);
 }
 
 - (void)test83 {
@@ -294,9 +289,9 @@ template <Model model> std::vector<typename InstructionSet::x86::Decoder<model>:
 	});
 
 	XCTAssertEqual(instructions.size(), 3);
-	test(instructions[0], 2, Operation::ADC, Source::Immediate, ScaleIndexBase(Source::eBX, Source::eSI), 0xff80);
-	test(instructions[1], 2, Operation::CMP, Source::Immediate, ScaleIndexBase(Source::eBP, Source::eDI), 0x4);
-	test(instructions[2], 2, Operation::SUB, Source::Immediate, ScaleIndexBase(Source::eBX), 0x9);
+	test(instructions[0], DataSize::Word, Operation::ADC, Source::Immediate, ScaleIndexBase(Source::eBX, Source::eSI), 0xff80);
+	test(instructions[1], DataSize::Word, Operation::CMP, Source::Immediate, ScaleIndexBase(Source::eBP, Source::eDI), 0x4);
+	test(instructions[2], DataSize::Word, Operation::SUB, Source::Immediate, ScaleIndexBase(Source::eBX), 0x9);
 }
 
 - (void)testFar {
@@ -308,7 +303,4 @@ template <Model model> std::vector<typename InstructionSet::x86::Decoder<model>:
 	test_far(instructions[0], Operation::CALLF, 0x7856, 0x3412);
 }
 
-- (void)testSequence2 {
-}
-
 @end

From 1afcbba218c0e5d205bd69e72615b9d11a16b633 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sat, 5 Mar 2022 16:44:26 -0500
Subject: [PATCH 045/104] Clarify sign extension availability.

---
 InstructionSets/x86/Decoder.cpp | 36 ++++--------------
 InstructionSets/x86/Decoder.hpp | 65 +++++++++++++++++++++++++--------
 2 files changed, 57 insertions(+), 44 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 616602e33..b641a4edc 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -220,16 +220,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0x7e: Jump(JLE, DataSize::Byte);	break;
 			case 0x7f: Jump(JNLE, DataSize::Byte);	break;
 
-			case 0x80: MemRegReg(Invalid, MemRegADD_to_CMP, DataSize::Byte);	break;
-			case 0x81: MemRegReg(Invalid, MemRegADD_to_CMP, data_size_);		break;
-			case 0x82:
-				MemRegReg(Invalid, MemRegADC_to_CMP, DataSize::Byte);
-				sign_extend_ = true;
-			break;
-			case 0x83:
-				MemRegReg(Invalid, MemRegADC_to_CMP, data_size_);
-				sign_extend_ = true;
-			break;
+			case 0x80: MemRegReg(Invalid, MemRegADD_to_CMP, DataSize::Byte);			break;
+			case 0x81: MemRegReg(Invalid, MemRegADD_to_CMP, data_size_);				break;
+			case 0x82: MemRegReg(Invalid, MemRegADD_to_CMP_SignExtend, DataSize::Byte);	break;
+			case 0x83: MemRegReg(Invalid, MemRegADD_to_CMP_SignExtend, data_size_);		break;
 
 			case 0x84: MemRegReg(TEST, MemReg_Reg, DataSize::Byte);	break;
 			case 0x85: MemRegReg(TEST, MemReg_Reg, data_size_);		break;
@@ -582,8 +576,11 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			break;
 
 			case ModRegRMFormat::MemRegADD_to_CMP:
+			case ModRegRMFormat::MemRegADD_to_CMP_SignExtend:
+				source_ = Source::Immediate;
 				destination_ = memreg;
-				operand_size_ = operation_size_;
+				operand_size_ = (modregrm_format_ == ModRegRMFormat::MemRegADD_to_CMP_SignExtend) ? DataSize::Byte : operation_size_;
+				sign_extend_ = true;	// Will be effective only if modregrm_format_ == ModRegRMFormat::MemRegADD_to_CMP_SignExtend.
 
 				switch(reg) {
 					default:	operation_ = Operation::ADD;	break;
@@ -597,23 +594,6 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				}
 			break;
 
-			case ModRegRMFormat::MemRegADC_to_CMP:
-				destination_ = memreg;
-				source_ = Source::Immediate;
-				operand_size_ = DataSize::Byte;	// ... and always a byte; it'll be sign extended if
-												// the operation requires it.
-
-				switch(reg) {
-					default: undefined();
-
-					case 0: 	operation_ = Operation::ADD;	break;
-					case 2: 	operation_ = Operation::ADC;	break;
-					case 3: 	operation_ = Operation::SBB;	break;
-					case 5: 	operation_ = Operation::SUB;	break;
-					case 7: 	operation_ = Operation::CMP;	break;
-				}
-			break;
-
 			case ModRegRMFormat::MemRegSLDT_to_VERW:
 				destination_ = source_ = memreg;
 
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index b50f7acd3..387bb3e05 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -69,11 +69,6 @@ template <Model model> class Decoder {
 			MemReg_Reg,
 			Reg_MemReg,
 
-			// Parse for mode and register/memory fields, populating both
-			// source_ and destination_ fields with the result. Use the 'register'
-			// field to pick an operation from the TEST/NOT/NEG/MUL/IMUL/DIV/IDIV group.
-			MemRegTEST_to_IDIV,
-
 			// Parse for mode and register/memory fields, populating both
 			// source_ and destination_ fields with the result. Use the 'register'
 			// field to check for the POP operation.
@@ -85,9 +80,13 @@ template <Model model> class Decoder {
 			MemRegMOV,
 
 			// Parse for mode and register/memory fields, populating the
-			// destination_ field with the result. Use the 'register' field
-			// to pick an operation from the ROL/ROR/RCL/RCR/SAL/SHR/SAR group.
-			MemRegROL_to_SAR,
+			// source_ field with the result. Fills destination_ with a segment
+			// register based on the reg field.
+			SegReg,
+
+			//
+			//	'Group 1'
+			//
 
 			// Parse for mode and register/memory fields, populating the
 			// destination_ field with the result. Use the 'register' field
@@ -95,37 +94,71 @@ template <Model model> class Decoder {
 			// waits for an operand equal to the operation size.
 			MemRegADD_to_CMP,
 
+			// Acts exactly as MemRegADD_to_CMP but the operand is fixed in size
+			// at a single byte, which is sign extended to the operation size.
+			MemRegADD_to_CMP_SignExtend,
+
+			//
+			//	'Group 2'
+			//
+
 			// Parse for mode and register/memory fields, populating the
-			// source_ field with the result. Fills destination_ with a segment
-			// register based on the reg field.
-			SegReg,
+			// destination_ field with the result. Use the 'register' field
+			// to pick an operation from the ROL/ROR/RCL/RCR/SAL/SHR/SAR group.
+			MemRegROL_to_SAR,
+
+			//
+			//	'Group 3'
+			//
+
+			// Parse for mode and register/memory fields, populating both
+			// source_ and destination_ fields with the result. Use the 'register'
+			// field to pick an operation from the TEST/NOT/NEG/MUL/IMUL/DIV/IDIV group.
+			MemRegTEST_to_IDIV,
+
+			//
+			//	'Group 4'
+			//
 
 			// Parse for mode and register/memory fields, populating the
 			// source_ and destination_ fields with the result. Uses the
 			// 'register' field to pick INC or DEC.
 			MemRegINC_DEC,
 
+			//
+			//	'Group 5'
+			//
+
 			// Parse for mode and register/memory fields, populating the
 			// source_ and destination_ fields with the result. Uses the
 			// 'register' field to pick from INC/DEC/CALL/JMP/PUSH, altering
 			// the source to ::Immediate and setting an operand size if necessary.
 			MemRegINC_to_PUSH,
 
-			// Parse for mode and register/memory fields, populating the
-			// source_ and destination_ fields with the result. Uses the
-			// 'register' field to pick from ADD/ADC/SBB/SUB/CMP, altering
-			// the source to ::Immediate and setting an appropriate operand size.
-			MemRegADC_to_CMP,
+			//
+			//	'Group 6'
+			//
 
 			// Parse for mode and register/memory field, populating both source_
 			// and destination_ fields with the result. Uses the 'register' field
 			// to pick from SLDT/STR/LLDT/LTR/VERR/VERW.
 			MemRegSLDT_to_VERW,
 
+			//
+			//	'Group 7'
+			//
+
 			// Parse for mode and register/memory field, populating both source_
 			// and destination_ fields with the result. Uses the 'register' field
 			// to pick from SGDT/LGDT/SMSW/LMSW.
 			MemRegSGDT_to_LMSW,
+
+			//
+			//	'Group 8'
+			//
+
+			// TODO.
+			MemRegBT_to_BTC,
 		} modregrm_format_ = ModRegRMFormat::MemReg_Reg;
 
 		// Ephemeral decoding state.

From eb180656bb10bc32ce0964649a37b4f2c9a70abd Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sat, 5 Mar 2022 17:00:48 -0500
Subject: [PATCH 046/104] Fix $8e data size, add $8c.

---
 InstructionSets/x86/Decoder.cpp | 43 +++++++++++++++++++++++----------
 InstructionSets/x86/Decoder.hpp |  5 ++--
 2 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index b641a4edc..5f1913e8d 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -233,9 +233,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0x89: MemRegReg(MOV, MemReg_Reg, data_size_);		break;
 			case 0x8a: MemRegReg(MOV, Reg_MemReg, DataSize::Byte);	break;
 			case 0x8b: MemRegReg(MOV, Reg_MemReg, data_size_);		break;
-			// 0x8c: not used.
+			case 0x8c:
+				RequiresMin(i80286);	// TODO: or is this 80386?
+				MemRegReg(MOV, MemReg_Seg, DataSize::Word);
+			break;
 			case 0x8d: MemRegReg(LEA, Reg_MemReg, data_size_);		break;
-			case 0x8e: MemRegReg(MOV, SegReg, data_size_);			break;
+			case 0x8e: MemRegReg(MOV, Seg_MemReg, DataSize::Word);	break;
 			case 0x8f: MemRegReg(POP, MemRegPOP, data_size_);		break;
 
 			case 0x90: Complete(NOP, None, None, DataSize::None);	break;	// Or XCHG AX, AX?
@@ -430,10 +433,15 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 		++consumed_;
 
 		Source memreg;
+
+		// TODO: can I just eliminate these lookup tables given the deliberate ordering within Source?
 		constexpr Source reg_table[8] = {
 			Source::eAX,		Source::eCX,		Source::eDX,		Source::eBX,
 			Source::eSPorAH,	Source::eBPorCH,	Source::eSIorDH,	Source::eDIorBH,
 		};
+		constexpr Source seg_table[6] = {
+			Source::ES,	Source::CS,	Source::SS,	Source::DS,	Source::FS,	Source::GS
+		};
 		switch(mod) {
 			default: {
 				const DataSize sizes[] = {DataSize::Byte, data_size_};
@@ -495,20 +503,29 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				}
 			break;
 
-			case ModRegRMFormat::SegReg: {
-				source_ = memreg;
-
-				constexpr Source seg_table[4] = {
-					Source::ES,	Source::CS,
-					Source::SS,	Source::DS,
-				};
-
-				if(reg & 4) {
+			case ModRegRMFormat::Seg_MemReg:
+			case ModRegRMFormat::MemReg_Seg:
+				// The 16-bit chips have four segment registers;
+				// the 80386 onwards has six.
+				if(!is_32bit(model) && reg > 3) {
+					undefined();
+				} else if(reg > 5) {
 					undefined();
 				}
 
-				destination_ = seg_table[reg];
-			} break;
+				if(modregrm_format_ == ModRegRMFormat::Seg_MemReg) {
+					source_ = memreg;
+					destination_ = seg_table[reg];
+
+					// 80286 and later disallow MOV to CS.
+					if(model >= Model::i80286 && destination_ == Source::CS) {
+						undefined();
+					}
+				} else {
+					source_ = seg_table[reg];
+					destination_ = memreg;
+				}
+			break;
 
 			case ModRegRMFormat::MemRegROL_to_SAR:
 				destination_ = memreg;
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 387bb3e05..041937fb6 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -65,7 +65,7 @@ template <Model model> class Decoder {
 		/// are packaged into an Instruction.
 		enum class ModRegRMFormat: uint8_t {
 			// Parse the ModRegRM for mode, register and register/memory fields
-			// and populate the source_ and destination_ fields appropriate.
+			// and populate the source_ and destination_ fields appropriately.
 			MemReg_Reg,
 			Reg_MemReg,
 
@@ -82,7 +82,8 @@ template <Model model> class Decoder {
 			// Parse for mode and register/memory fields, populating the
 			// source_ field with the result. Fills destination_ with a segment
 			// register based on the reg field.
-			SegReg,
+			Seg_MemReg,
+			MemReg_Seg,
 
 			//
 			//	'Group 1'

From 3a8eb4a4f05688b5fc9438b3b4aa87f98a7fbec5 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sat, 5 Mar 2022 17:03:46 -0500
Subject: [PATCH 047/104] Add 80386 segment overrides.

---
 InstructionSets/x86/Decoder.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 5f1913e8d..e22857dc0 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -178,6 +178,14 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				RequiresMin(i80286);
 				MemRegReg(ARPL, MemReg_Reg, DataSize::Word);
 			break;
+			case 0x64:
+				RequiresMin(i80386);
+				segment_override_ = Source::FS;
+			break;
+			case 0x65:
+				RequiresMin(i80386);
+				segment_override_ = Source::GS;
+			break;
 			case 0x66:
 				RequiresMin(i80386);
 				data_size_ = DataSize(int(default_data_size_) ^ int(DataSize::Word) ^ int(DataSize::DWord));

From 65f578fe614b216c1065c2732f6e827ef61a7dc7 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sat, 5 Mar 2022 17:16:13 -0500
Subject: [PATCH 048/104] Add notes on all missing opcodes.

---
 InstructionSets/x86/Decoder.cpp | 47 ++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index e22857dc0..b60f806c6 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -194,6 +194,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				RequiresMin(i80386);
 				address_size_ = AddressSize(int(default_address_size_) ^ int(AddressSize::b16) ^ int(AddressSize::b32));
 			break;
+			// 0x68: PUSH Iv
+			// 0x69: PUSH GvEvIv
+			// 0x6a: PUSH Ib
+			// 0x6b: IMUL GvEvIv
 			case 0x6c:	// INSB
 				RequiresMin(i80186);
 				Complete(INS, None, None, DataSize::Byte);
@@ -302,6 +306,9 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xbe: RegData(MOV, eSI, data_size_);		break;
 			case 0xbf: RegData(MOV, eDI, data_size_);		break;
 
+			// 0xc0: shift group 2, Eb, Ib
+			// 0xc1: shift group 2, Ev, Iv
+
 			case 0xc2: RegData(RETN, None, data_size_);				break;
 			case 0xc3: Complete(RETN, None, None, DataSize::None);	break;
 			case 0xc4: MemRegReg(LES, Reg_MemReg, data_size_);		break;
@@ -343,7 +350,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			} break;
 			case 0xd4: RegData(AAM, eAX, DataSize::Byte);			break;
 			case 0xd5: RegData(AAD, eAX, DataSize::Byte);			break;
-
+			// Unused: 0xd6.
 			case 0xd7: Complete(XLAT, None, None, DataSize::Byte);	break;
 
 			case 0xd8: MemRegReg(ESC, MemReg_Reg, DataSize::None);	break;
@@ -376,6 +383,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xef: Complete(OUT, eAX, eDX, data_size_);		break;
 
 			case 0xf0: lock_ = true;					break;
+			// Unused: 0xf1
 			case 0xf2: repetition_ = Repetition::RepNE;	break;
 			case 0xf3: repetition_ = Repetition::RepE;	break;
 
@@ -417,6 +425,43 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				Complete(LOADALL, None, None, DataSize::None);
 			break;
 			case 0x06:	Complete(CLTS, None, None, DataSize::Byte);			break;
+
+			// 0x20: MOV Cr, Rd
+			// 0x21: MOV Dd, Rd
+			// 0x22: MOV Rd, Cd
+			// 0x23: MOV Rd, Dd
+			// 0x24: MOV Td, Rd
+			// 0x26: MOV Rd, Td
+
+			// 0x8x: long-displacement jumps: JO, JNO, JB, JNB, JZ, JNZ, JBE, JNBE, JS, JNS, JP, JNP, JL, JNL, JLE, JNLE
+
+			// [0x90, 0x97]: byte set on condition Eb: SETO, SETNO, SETB, SETNB, SETZ, SETNZ, SETBE, SETNBE
+			// [0x98, 0x9f]: SETS, SETNS, SETP, SETNP, SETL, SETNL, SETLE, SETNLE
+
+			// 0xa0: PUSH FS
+			// 0xa1: POP FS
+			// 0xa3: BT Ev, Gv
+			// 0xa4: SHLD EvGvIb
+			// 0xa5: SHLD EvGcCL
+			// 0xa8: PUSH GS
+			// 0xa9: POP GS
+			// 0xab: BTS Ev, Gv
+			// 0xac: SHRD EvGvIb
+			// 0xad: SHRD EvGvCL
+			// 0xaf: IMUL Gv, Ev
+
+			// 0xb2: LSS Mp
+			// 0xb3: BTR Ev, Gv
+			// 0xb4: LFS Mp
+			// 0xb5: LGS Mp
+			// 0xb6: MOVZX Gv, Eb
+			// 0xb7: MOVZX Gv, Ew
+			// 0xba: Grp8 Ev, Ib
+			// 0xbb: BTC Ev, Gv
+			// 0xbc: BSF Gv, Ev
+			// 0xbd: BSR Gv, Ev
+			// 0xbe: MOVSX Gv, Eb
+			// 0xbf: MOVSX Gv, Ew
 		}
 	}
 

From 6432521b9db2ed33ed85b062c7e0a4fb4789db1e Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sat, 5 Mar 2022 17:16:32 -0500
Subject: [PATCH 049/104] Correct two references to JP that should be JL.

---
 InstructionSets/x86/Documentation/80386 opcode map.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/InstructionSets/x86/Documentation/80386 opcode map.html b/InstructionSets/x86/Documentation/80386 opcode map.html
index fde2485ec..e1dcbe5bc 100644
--- a/InstructionSets/x86/Documentation/80386 opcode map.html	
+++ b/InstructionSets/x86/Documentation/80386 opcode map.html	
@@ -367,7 +367,7 @@
 				<td>JNS</td>
 				<td>JP</td>
 				<td>JNP</td>
-				<td>JP</td>
+				<td>JL</td>
 				<td>JNL</td>
 				<td>JLE</td>
 				<td>JNLE</td>
@@ -692,7 +692,7 @@
 				<td>JNS</td>
 				<td>JP</td>
 				<td>JNP</td>
-				<td>JP</td>
+				<td>JL</td>
 				<td>JNL</td>
 				<td>JLE</td>
 				<td>JNLE</td>

From 7a791117677ed6ff0f6d35dcb04416231485f586 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sat, 5 Mar 2022 17:32:21 -0500
Subject: [PATCH 050/104] Add the easiest 80386 extensions: PUSH/POP FS/GS and
 longer conditional jumps.

---
 InstructionSets/x86/Decoder.cpp | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index b60f806c6..cb2e31e96 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -433,18 +433,33 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			// 0x24: MOV Td, Rd
 			// 0x26: MOV Rd, Td
 
-			// 0x8x: long-displacement jumps: JO, JNO, JB, JNB, JZ, JNZ, JBE, JNBE, JS, JNS, JP, JNP, JL, JNL, JLE, JNLE
+			case 0x70: RequiresMin(i80386);	Jump(JO, data_size_);	break;
+			case 0x71: RequiresMin(i80386);	Jump(JNO, data_size_);	break;
+			case 0x72: RequiresMin(i80386);	Jump(JB, data_size_);	break;
+			case 0x73: RequiresMin(i80386);	Jump(JNB, data_size_);	break;
+			case 0x74: RequiresMin(i80386);	Jump(JE, data_size_);	break;
+			case 0x75: RequiresMin(i80386);	Jump(JNE, data_size_);	break;
+			case 0x76: RequiresMin(i80386);	Jump(JBE, data_size_);	break;
+			case 0x77: RequiresMin(i80386);	Jump(JNBE, data_size_);	break;
+			case 0x78: RequiresMin(i80386);	Jump(JS, data_size_);	break;
+			case 0x79: RequiresMin(i80386);	Jump(JNS, data_size_);	break;
+			case 0x7a: RequiresMin(i80386);	Jump(JP, data_size_);	break;
+			case 0x7b: RequiresMin(i80386);	Jump(JNP, data_size_);	break;
+			case 0x7c: RequiresMin(i80386);	Jump(JL, data_size_);	break;
+			case 0x7d: RequiresMin(i80386);	Jump(JNL, data_size_);	break;
+			case 0x7e: RequiresMin(i80386);	Jump(JLE, data_size_);	break;
+			case 0x7f: RequiresMin(i80386);	Jump(JNLE, data_size_);	break;
 
 			// [0x90, 0x97]: byte set on condition Eb: SETO, SETNO, SETB, SETNB, SETZ, SETNZ, SETBE, SETNBE
 			// [0x98, 0x9f]: SETS, SETNS, SETP, SETNP, SETL, SETNL, SETLE, SETNLE
 
-			// 0xa0: PUSH FS
-			// 0xa1: POP FS
+			case 0xa0:	RequiresMin(i80386);	Complete(PUSH, FS, None, data_size_);	break;
+			case 0xa1:	RequiresMin(i80386);	Complete(POP, FS, None, data_size_);	break;
 			// 0xa3: BT Ev, Gv
 			// 0xa4: SHLD EvGvIb
 			// 0xa5: SHLD EvGcCL
-			// 0xa8: PUSH GS
-			// 0xa9: POP GS
+			case 0xa8:	RequiresMin(i80386);	Complete(PUSH, GS, None, data_size_);	break;
+			case 0xa9:	RequiresMin(i80386);	Complete(POP, GS, None, data_size_);	break;
 			// 0xab: BTS Ev, Gv
 			// 0xac: SHRD EvGvIb
 			// 0xad: SHRD EvGvCL

From ede61ae13068f65ee4b46b72ccaee3d70f28891e Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sat, 5 Mar 2022 17:48:01 -0500
Subject: [PATCH 051/104] Flag up TODOs, for easier in-editor navigation.

---
 InstructionSets/x86/Decoder.cpp | 68 +++++++++++++++++----------------
 1 file changed, 35 insertions(+), 33 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index cb2e31e96..311e2fddb 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -194,10 +194,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				RequiresMin(i80386);
 				address_size_ = AddressSize(int(default_address_size_) ^ int(AddressSize::b16) ^ int(AddressSize::b32));
 			break;
-			// 0x68: PUSH Iv
-			// 0x69: PUSH GvEvIv
-			// 0x6a: PUSH Ib
-			// 0x6b: IMUL GvEvIv
+			// TODO: 0x68: PUSH Iv
+			// TODO: 0x69: PUSH GvEvIv
+			// TODO: 0x6a: PUSH Ib
+			// TODO: 0x6b: IMUL GvEvIv
 			case 0x6c:	// INSB
 				RequiresMin(i80186);
 				Complete(INS, None, None, DataSize::Byte);
@@ -306,8 +306,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xbe: RegData(MOV, eSI, data_size_);		break;
 			case 0xbf: RegData(MOV, eDI, data_size_);		break;
 
-			// 0xc0: shift group 2, Eb, Ib
-			// 0xc1: shift group 2, Ev, Iv
+			// TODO: 0xc0: shift group 2, Eb, Ib
+			// TODO: 0xc1: shift group 2, Ev, Iv
 
 			case 0xc2: RegData(RETN, None, data_size_);				break;
 			case 0xc3: Complete(RETN, None, None, DataSize::None);	break;
@@ -426,12 +426,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			break;
 			case 0x06:	Complete(CLTS, None, None, DataSize::Byte);			break;
 
-			// 0x20: MOV Cr, Rd
-			// 0x21: MOV Dd, Rd
-			// 0x22: MOV Rd, Cd
-			// 0x23: MOV Rd, Dd
-			// 0x24: MOV Td, Rd
-			// 0x26: MOV Rd, Td
+			// TODO: 0x20: MOV Cr, Rd
+			// TODO: 0x21: MOV Dd, Rd
+			// TODO: 0x22: MOV Rd, Cd
+			// TODO: 0x23: MOV Rd, Dd
+			// TODO: 0x24: MOV Td, Rd
+			// TODO: 0x26: MOV Rd, Td
 
 			case 0x70: RequiresMin(i80386);	Jump(JO, data_size_);	break;
 			case 0x71: RequiresMin(i80386);	Jump(JNO, data_size_);	break;
@@ -450,33 +450,33 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0x7e: RequiresMin(i80386);	Jump(JLE, data_size_);	break;
 			case 0x7f: RequiresMin(i80386);	Jump(JNLE, data_size_);	break;
 
-			// [0x90, 0x97]: byte set on condition Eb: SETO, SETNO, SETB, SETNB, SETZ, SETNZ, SETBE, SETNBE
-			// [0x98, 0x9f]: SETS, SETNS, SETP, SETNP, SETL, SETNL, SETLE, SETNLE
+			// TODO: [0x90, 0x97]: byte set on condition Eb: SETO, SETNO, SETB, SETNB, SETZ, SETNZ, SETBE, SETNBE
+			// TODO: [0x98, 0x9f]: SETS, SETNS, SETP, SETNP, SETL, SETNL, SETLE, SETNLE
 
 			case 0xa0:	RequiresMin(i80386);	Complete(PUSH, FS, None, data_size_);	break;
 			case 0xa1:	RequiresMin(i80386);	Complete(POP, FS, None, data_size_);	break;
-			// 0xa3: BT Ev, Gv
-			// 0xa4: SHLD EvGvIb
-			// 0xa5: SHLD EvGcCL
+			// TODO: 0xa3: BT Ev, Gv
+			// TODO: 0xa4: SHLD EvGvIb
+			// TODO: 0xa5: SHLD EvGcCL
 			case 0xa8:	RequiresMin(i80386);	Complete(PUSH, GS, None, data_size_);	break;
 			case 0xa9:	RequiresMin(i80386);	Complete(POP, GS, None, data_size_);	break;
-			// 0xab: BTS Ev, Gv
-			// 0xac: SHRD EvGvIb
-			// 0xad: SHRD EvGvCL
-			// 0xaf: IMUL Gv, Ev
+			// TODO: 0xab: BTS Ev, Gv
+			// TODO: 0xac: SHRD EvGvIb
+			// TODO: 0xad: SHRD EvGvCL
+			// TODO: 0xaf: IMUL Gv, Ev
 
-			// 0xb2: LSS Mp
-			// 0xb3: BTR Ev, Gv
-			// 0xb4: LFS Mp
-			// 0xb5: LGS Mp
-			// 0xb6: MOVZX Gv, Eb
-			// 0xb7: MOVZX Gv, Ew
-			// 0xba: Grp8 Ev, Ib
-			// 0xbb: BTC Ev, Gv
-			// 0xbc: BSF Gv, Ev
-			// 0xbd: BSR Gv, Ev
-			// 0xbe: MOVSX Gv, Eb
-			// 0xbf: MOVSX Gv, Ew
+			// TODO: 0xb2: LSS Mp
+			// TODO: 0xb3: BTR Ev, Gv
+			// TODO: 0xb4: LFS Mp
+			// TODO: 0xb5: LGS Mp
+			// TODO: 0xb6: MOVZX Gv, Eb
+			// TODO: 0xb7: MOVZX Gv, Ew
+			// TODO: 0xba: Grp8 Ev, Ib
+			// TODO: 0xbb: BTC Ev, Gv
+			// TODO: 0xbc: BSF Gv, Ev
+			// TODO: 0xbd: BSR Gv, Ev
+			// TODO: 0xbe: MOVSX Gv, Eb
+			// TODO: 0xbf: MOVSX Gv, Ew
 		}
 	}
 
@@ -502,6 +502,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 
 		Source memreg;
 
+		// TODO: the below currently has no way to segue into fetching a SIB.
+
 		// TODO: can I just eliminate these lookup tables given the deliberate ordering within Source?
 		constexpr Source reg_table[8] = {
 			Source::eAX,		Source::eCX,		Source::eDX,		Source::eBX,

From 49b5889d9eb495bb3be8ee3f900f706a71464817 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sun, 6 Mar 2022 09:24:59 -0500
Subject: [PATCH 052/104] 0x8c is available on the 8086.

---
 InstructionSets/x86/Decoder.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 311e2fddb..542a4f697 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -245,10 +245,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0x89: MemRegReg(MOV, MemReg_Reg, data_size_);		break;
 			case 0x8a: MemRegReg(MOV, Reg_MemReg, DataSize::Byte);	break;
 			case 0x8b: MemRegReg(MOV, Reg_MemReg, data_size_);		break;
-			case 0x8c:
-				RequiresMin(i80286);	// TODO: or is this 80386?
-				MemRegReg(MOV, MemReg_Seg, DataSize::Word);
-			break;
+			case 0x8c: MemRegReg(MOV, MemReg_Seg, DataSize::Word);	break;
 			case 0x8d: MemRegReg(LEA, Reg_MemReg, data_size_);		break;
 			case 0x8e: MemRegReg(MOV, Seg_MemReg, DataSize::Word);	break;
 			case 0x8f: MemRegReg(POP, MemRegPOP, data_size_);		break;

From 91a6bf671d624b8302798c652eeca81912c1ba50 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sun, 6 Mar 2022 09:28:43 -0500
Subject: [PATCH 053/104] Also 'easy': LSS, LFS, LGS.

Though perhaps I'm off on LES and LDS?
---
 InstructionSets/x86/Decoder.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 542a4f697..03a82a915 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -450,22 +450,22 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			// TODO: [0x90, 0x97]: byte set on condition Eb: SETO, SETNO, SETB, SETNB, SETZ, SETNZ, SETBE, SETNBE
 			// TODO: [0x98, 0x9f]: SETS, SETNS, SETP, SETNP, SETL, SETNL, SETLE, SETNLE
 
-			case 0xa0:	RequiresMin(i80386);	Complete(PUSH, FS, None, data_size_);	break;
-			case 0xa1:	RequiresMin(i80386);	Complete(POP, FS, None, data_size_);	break;
+			case 0xa0: RequiresMin(i80386);	Complete(PUSH, FS, None, data_size_);	break;
+			case 0xa1: RequiresMin(i80386);	Complete(POP, FS, None, data_size_);	break;
 			// TODO: 0xa3: BT Ev, Gv
 			// TODO: 0xa4: SHLD EvGvIb
 			// TODO: 0xa5: SHLD EvGcCL
-			case 0xa8:	RequiresMin(i80386);	Complete(PUSH, GS, None, data_size_);	break;
-			case 0xa9:	RequiresMin(i80386);	Complete(POP, GS, None, data_size_);	break;
+			case 0xa8: RequiresMin(i80386);	Complete(PUSH, GS, None, data_size_);	break;
+			case 0xa9: RequiresMin(i80386);	Complete(POP, GS, None, data_size_);	break;
 			// TODO: 0xab: BTS Ev, Gv
 			// TODO: 0xac: SHRD EvGvIb
 			// TODO: 0xad: SHRD EvGvCL
 			// TODO: 0xaf: IMUL Gv, Ev
 
-			// TODO: 0xb2: LSS Mp
+			case 0xb2: RequiresMin(i80386);	MemRegReg(LSS, Reg_MemReg, data_size_);	break;
 			// TODO: 0xb3: BTR Ev, Gv
-			// TODO: 0xb4: LFS Mp
-			// TODO: 0xb5: LGS Mp
+			case 0xb4: RequiresMin(i80386);	MemRegReg(LFS, Reg_MemReg, data_size_);	break;
+			case 0xb5: RequiresMin(i80386);	MemRegReg(LGS, Reg_MemReg, data_size_);	break;
 			// TODO: 0xb6: MOVZX Gv, Eb
 			// TODO: 0xb7: MOVZX Gv, Ew
 			// TODO: 0xba: Grp8 Ev, Ib

From e7aaf4dd2edb7044d0846ff38ed3cfc99e03218d Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sun, 6 Mar 2022 12:10:25 -0500
Subject: [PATCH 054/104] Add LDS, LES, LSS test.

---
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 23 ++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index a9ef904df..1ef7c754f 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -60,10 +60,12 @@ template <typename InstructionT> void test_far(const InstructionT &instruction,
 
 // MARK: - Decoder
 
-template <Model model> std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(const std::initializer_list<uint8_t> &stream) {
+template <Model model>
+std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(const std::initializer_list<uint8_t> &stream, bool set_32_bit = false) {
 	// Decode by offering up all data at once.
 	std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> instructions;
 	InstructionSet::x86::Decoder<model> decoder;
+	decoder.set_32bit_protected_mode(set_32_bit);
 	instructions.clear();
 	const uint8_t *byte = stream.begin();
 	while(byte != stream.end()) {
@@ -76,6 +78,7 @@ template <Model model> std::vector<typename InstructionSet::x86::Decoder<model>:
 	// Grab a byte-at-a-time decoding and check that it matches the previous.
 	{
 		InstructionSet::x86::Decoder<model> decoder;
+		decoder.set_32bit_protected_mode(set_32_bit);
 
 		auto previous_instruction = instructions.begin();
 		for(auto item: stream) {
@@ -303,4 +306,22 @@ template <Model model> std::vector<typename InstructionSet::x86::Decoder<model>:
 	test_far(instructions[0], Operation::CALLF, 0x7856, 0x3412);
 }
 
+- (void)testLDSLESEtc {
+	auto run_test = [](bool is_32, DataSize size) {
+		const auto instructions = decode<Model::i80386>({
+			0xc5, 0x33,			// lds (%bp, %di), %si
+			0xc4, 0x17,			// les (%bx), %dx
+			0x0f, 0xb2, 0x17,	// lss edx, (edi)
+		}, is_32);
+
+		XCTAssertEqual(instructions.size(), 3);
+		test(instructions[0], size, Operation::LDS, ScaleIndexBase(Source::eBP, Source::eDI), Source::eSI);
+		test(instructions[1], size, Operation::LES, ScaleIndexBase(Source::eBX), Source::eDX);
+		test(instructions[2], size, Operation::LSS, ScaleIndexBase(Source::eBX), Source::eDX);
+	};
+
+	run_test(false, DataSize::Word);
+	run_test(true, DataSize::DWord);
+}
+
 @end

From 4479be4fd0f9579cdfedd02338fee220d531c8ba Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sun, 6 Mar 2022 14:28:41 -0500
Subject: [PATCH 055/104] Add the two immediate PUSHes.

---
 InstructionSets/x86/Decoder.cpp | 100 ++++++++++++++++++--------------
 1 file changed, 57 insertions(+), 43 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 03a82a915..10b6b9113 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -60,12 +60,19 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 	operand_size_ = DataSize::None;				\
 	operation_size_ = size
 
-/// Handles JO, JNO, JB, etc — jumps with a single byte displacement.
-#define Jump(op, size)								\
+/// Handles JO, JNO, JB, etc — anything with only a displacement.
+#define Displacement(op, size)						\
 	operation_ = Operation::op;						\
 	phase_ = Phase::DisplacementOrOperand;			\
 	displacement_size_ = size
 
+/// Handles PUSH [immediate], etc — anything with only an immediate operand.
+#define Immediate(op, size)							\
+	operation_ = Operation::op;						\
+	source_ = Source::Immediate;					\
+	phase_ = Phase::DisplacementOrOperand;			\
+	operand_size_ = size
+
 /// Handles far CALL and far JMP — fixed four byte operand operations.
 #define Far(op)										\
 	operation_ = Operation::op;						\
@@ -194,9 +201,15 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				RequiresMin(i80386);
 				address_size_ = AddressSize(int(default_address_size_) ^ int(AddressSize::b16) ^ int(AddressSize::b32));
 			break;
-			// TODO: 0x68: PUSH Iv
-			// TODO: 0x69: PUSH GvEvIv
-			// TODO: 0x6a: PUSH Ib
+			case 0x68:
+				RequiresMin(i80286);
+				Immediate(PUSH, data_size_);
+			break;
+			// TODO: 0x69: IMUL GvEvIv
+			case 0x6a:
+				RequiresMin(i80286);
+				Immediate(PUSH, DataSize::Byte);
+			break;
 			// TODO: 0x6b: IMUL GvEvIv
 			case 0x6c:	// INSB
 				RequiresMin(i80186);
@@ -215,22 +228,22 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				Complete(OUTS, None, None, data_size_);
 			break;
 
-			case 0x70: Jump(JO, DataSize::Byte);	break;
-			case 0x71: Jump(JNO, DataSize::Byte);	break;
-			case 0x72: Jump(JB, DataSize::Byte);	break;
-			case 0x73: Jump(JNB, DataSize::Byte);	break;
-			case 0x74: Jump(JE, DataSize::Byte);	break;
-			case 0x75: Jump(JNE, DataSize::Byte);	break;
-			case 0x76: Jump(JBE, DataSize::Byte);	break;
-			case 0x77: Jump(JNBE, DataSize::Byte);	break;
-			case 0x78: Jump(JS, DataSize::Byte);	break;
-			case 0x79: Jump(JNS, DataSize::Byte);	break;
-			case 0x7a: Jump(JP, DataSize::Byte);	break;
-			case 0x7b: Jump(JNP, DataSize::Byte);	break;
-			case 0x7c: Jump(JL, DataSize::Byte);	break;
-			case 0x7d: Jump(JNL, DataSize::Byte);	break;
-			case 0x7e: Jump(JLE, DataSize::Byte);	break;
-			case 0x7f: Jump(JNLE, DataSize::Byte);	break;
+			case 0x70: Displacement(JO, DataSize::Byte);	break;
+			case 0x71: Displacement(JNO, DataSize::Byte);	break;
+			case 0x72: Displacement(JB, DataSize::Byte);	break;
+			case 0x73: Displacement(JNB, DataSize::Byte);	break;
+			case 0x74: Displacement(JE, DataSize::Byte);	break;
+			case 0x75: Displacement(JNE, DataSize::Byte);	break;
+			case 0x76: Displacement(JBE, DataSize::Byte);	break;
+			case 0x77: Displacement(JNBE, DataSize::Byte);	break;
+			case 0x78: Displacement(JS, DataSize::Byte);	break;
+			case 0x79: Displacement(JNS, DataSize::Byte);	break;
+			case 0x7a: Displacement(JP, DataSize::Byte);	break;
+			case 0x7b: Displacement(JNP, DataSize::Byte);	break;
+			case 0x7c: Displacement(JL, DataSize::Byte);	break;
+			case 0x7d: Displacement(JNL, DataSize::Byte);	break;
+			case 0x7e: Displacement(JLE, DataSize::Byte);	break;
+			case 0x7f: Displacement(JNLE, DataSize::Byte);	break;
 
 			case 0x80: MemRegReg(Invalid, MemRegADD_to_CMP, DataSize::Byte);			break;
 			case 0x81: MemRegReg(Invalid, MemRegADD_to_CMP, data_size_);				break;
@@ -359,10 +372,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xde: MemRegReg(ESC, MemReg_Reg, DataSize::None);	break;
 			case 0xdf: MemRegReg(ESC, MemReg_Reg, DataSize::None);	break;
 
-			case 0xe0: Jump(LOOPNE, DataSize::Byte);	break;
-			case 0xe1: Jump(LOOPE, DataSize::Byte);		break;
-			case 0xe2: Jump(LOOP, DataSize::Byte);		break;
-			case 0xe3: Jump(JPCX, DataSize::Byte);		break;
+			case 0xe0: Displacement(LOOPNE, DataSize::Byte);	break;
+			case 0xe1: Displacement(LOOPE, DataSize::Byte);		break;
+			case 0xe2: Displacement(LOOP, DataSize::Byte);		break;
+			case 0xe3: Displacement(JPCX, DataSize::Byte);		break;
 
 			case 0xe4: RegAddr(IN, eAX, DataSize::Byte, DataSize::Byte);	break;
 			case 0xe5: RegAddr(IN, eAX, data_size_, DataSize::Byte);		break;
@@ -372,7 +385,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xe8: RegData(CALLD, None, data_size_);	break;
 			case 0xe9: RegData(JMPN, None, data_size_);		break;
 			case 0xea: Far(JMPF);							break;
-			case 0xeb: Jump(JMPN, DataSize::Byte);			break;
+			case 0xeb: Displacement(JMPN, DataSize::Byte);	break;
 
 			case 0xec: Complete(IN, eDX, eAX, DataSize::Byte);	break;
 			case 0xed: Complete(IN, eDX, eAX, data_size_);		break;
@@ -430,22 +443,22 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			// TODO: 0x24: MOV Td, Rd
 			// TODO: 0x26: MOV Rd, Td
 
-			case 0x70: RequiresMin(i80386);	Jump(JO, data_size_);	break;
-			case 0x71: RequiresMin(i80386);	Jump(JNO, data_size_);	break;
-			case 0x72: RequiresMin(i80386);	Jump(JB, data_size_);	break;
-			case 0x73: RequiresMin(i80386);	Jump(JNB, data_size_);	break;
-			case 0x74: RequiresMin(i80386);	Jump(JE, data_size_);	break;
-			case 0x75: RequiresMin(i80386);	Jump(JNE, data_size_);	break;
-			case 0x76: RequiresMin(i80386);	Jump(JBE, data_size_);	break;
-			case 0x77: RequiresMin(i80386);	Jump(JNBE, data_size_);	break;
-			case 0x78: RequiresMin(i80386);	Jump(JS, data_size_);	break;
-			case 0x79: RequiresMin(i80386);	Jump(JNS, data_size_);	break;
-			case 0x7a: RequiresMin(i80386);	Jump(JP, data_size_);	break;
-			case 0x7b: RequiresMin(i80386);	Jump(JNP, data_size_);	break;
-			case 0x7c: RequiresMin(i80386);	Jump(JL, data_size_);	break;
-			case 0x7d: RequiresMin(i80386);	Jump(JNL, data_size_);	break;
-			case 0x7e: RequiresMin(i80386);	Jump(JLE, data_size_);	break;
-			case 0x7f: RequiresMin(i80386);	Jump(JNLE, data_size_);	break;
+			case 0x70: RequiresMin(i80386);	Displacement(JO, data_size_);	break;
+			case 0x71: RequiresMin(i80386);	Displacement(JNO, data_size_);	break;
+			case 0x72: RequiresMin(i80386);	Displacement(JB, data_size_);	break;
+			case 0x73: RequiresMin(i80386);	Displacement(JNB, data_size_);	break;
+			case 0x74: RequiresMin(i80386);	Displacement(JE, data_size_);	break;
+			case 0x75: RequiresMin(i80386);	Displacement(JNE, data_size_);	break;
+			case 0x76: RequiresMin(i80386);	Displacement(JBE, data_size_);	break;
+			case 0x77: RequiresMin(i80386);	Displacement(JNBE, data_size_);	break;
+			case 0x78: RequiresMin(i80386);	Displacement(JS, data_size_);	break;
+			case 0x79: RequiresMin(i80386);	Displacement(JNS, data_size_);	break;
+			case 0x7a: RequiresMin(i80386);	Displacement(JP, data_size_);	break;
+			case 0x7b: RequiresMin(i80386);	Displacement(JNP, data_size_);	break;
+			case 0x7c: RequiresMin(i80386);	Displacement(JL, data_size_);	break;
+			case 0x7d: RequiresMin(i80386);	Displacement(JNL, data_size_);	break;
+			case 0x7e: RequiresMin(i80386);	Displacement(JLE, data_size_);	break;
+			case 0x7f: RequiresMin(i80386);	Displacement(JNLE, data_size_);	break;
 
 			// TODO: [0x90, 0x97]: byte set on condition Eb: SETO, SETNO, SETB, SETNB, SETZ, SETNZ, SETBE, SETNBE
 			// TODO: [0x98, 0x9f]: SETS, SETNS, SETP, SETNP, SETL, SETNL, SETLE, SETNLE
@@ -480,7 +493,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 #undef Requires
 #undef RequiresMin
 #undef Far
-#undef Jump
+#undef Immediate
+#undef Displacement
 #undef MemRegReg
 #undef AddrReg
 #undef RegAddr

From 1ea9d3faf8fec46422e345cc9c433d859413fe68 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 7 Mar 2022 09:05:22 -0500
Subject: [PATCH 056/104] Introduce additional forms of IMUL.

---
 InstructionSets/x86/Decoder.cpp     | 20 ++++++++++++++++----
 InstructionSets/x86/Instruction.hpp | 10 ++++++++--
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 10b6b9113..c1617a841 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -205,12 +205,21 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				RequiresMin(i80286);
 				Immediate(PUSH, data_size_);
 			break;
-			// TODO: 0x69: IMUL GvEvIv
+			case 0x69:
+				RequiresMin(i80286);
+				MemRegReg(IMUL_3, Reg_MemReg, data_size_);
+				operand_size_ = data_size_;
+			break;
 			case 0x6a:
 				RequiresMin(i80286);
 				Immediate(PUSH, DataSize::Byte);
 			break;
-			// TODO: 0x6b: IMUL GvEvIv
+			case 0x6b:
+				RequiresMin(i80286);
+				MemRegReg(IMUL_3, Reg_MemReg, data_size_);
+				operand_size_ = DataSize::Byte;
+				sign_extend_ = true;
+			break;
 			case 0x6c:	// INSB
 				RequiresMin(i80186);
 				Complete(INS, None, None, DataSize::Byte);
@@ -473,7 +482,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			// TODO: 0xab: BTS Ev, Gv
 			// TODO: 0xac: SHRD EvGvIb
 			// TODO: 0xad: SHRD EvGvCL
-			// TODO: 0xaf: IMUL Gv, Ev
+			case 0xaf:
+				RequiresMin(i80386);
+				MemRegReg(IMUL_2, Reg_MemReg, data_size_);
+			break;
 
 			case 0xb2: RequiresMin(i80386);	MemRegReg(LSS, Reg_MemReg, data_size_);	break;
 			// TODO: 0xb3: BTR Ev, Gv
@@ -578,7 +590,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 					case 2: 	operation_ = Operation::NOT;	break;
 					case 3: 	operation_ = Operation::NEG;	break;
 					case 4: 	operation_ = Operation::MUL;	break;
-					case 5: 	operation_ = Operation::IMUL;	break;
+					case 5: 	operation_ = Operation::IMUL_1;	break;
 					case 6: 	operation_ = Operation::DIV;	break;
 					case 7: 	operation_ = Operation::IDIV;	break;
 				}
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 85832be60..618545ab7 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -65,8 +65,8 @@ enum class Operation: uint8_t {
 	SUB,
 	/// Unsigned multiply; multiplies the source value by AX or AL, storing the result in DX:AX or AX.
 	MUL,
-	/// Signed multiply; multiplies the source value by AX or AL, storing the result in DX:AX or AX.
-	IMUL,
+	/// Single operand signed multiply; multiplies the source value by AX or AL, storing the result in DX:AX or AX.
+	IMUL_1,
 	/// Unsigned divide; divide the source value by AX or AL, storing the quotient in AL and the remainder in AH.
 	DIV,
 	/// Signed divide; divide the source value by AX or AL, storing the quotient in AL and the remainder in AH.
@@ -265,6 +265,9 @@ enum class Operation: uint8_t {
 	/// Stores the task register.
 	STR,
 
+	/// Three-operand form of IMUL; multiply the immediate by the source and write to the destination.
+	IMUL_3,
+
 	/// Undocumented (but used); loads all registers, including internal ones.
 	LOADALL,
 
@@ -316,6 +319,9 @@ enum class Operation: uint8_t {
 	/// Move with sign extension.
 	MOVSX,
 
+	/// Two-operand form of IMUL; multiply the source by the destination and write to the destination.
+	IMUL_2,
+
 	IRETD,
 	JECXZ,
 	LODSD,

From bbb3168bae5581f7ad919ed1f2a206141b23a60a Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 7 Mar 2022 09:18:59 -0500
Subject: [PATCH 057/104] Adds the missing shift group segues at c0 and c1.

---
 InstructionSets/x86/Decoder.cpp | 37 ++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index c1617a841..b22edc312 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -87,6 +87,14 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 	displacement_size_ = DataSize::Word;			\
 	operand_size_ = DataSize::Byte
 
+/// Sets up the operation size, oncoming phase and modregrm format for a member of the shift group (i.e. 'group 2').
+#define ShiftGroup() {										\
+	const DataSize sizes[] = {DataSize::Byte, data_size_};	\
+	phase_ = Phase::ModRegRM;								\
+	modregrm_format_ = ModRegRMFormat::MemRegROL_to_SAR;	\
+	operation_size_ = sizes[instr & 1];						\
+}
+
 #define undefined()	{												\
 	const auto result = std::make_pair(consumed_, InstructionT());	\
 	reset_parsing();												\
@@ -325,9 +333,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xbe: RegData(MOV, eSI, data_size_);		break;
 			case 0xbf: RegData(MOV, eDI, data_size_);		break;
 
-			// TODO: 0xc0: shift group 2, Eb, Ib
-			// TODO: 0xc1: shift group 2, Ev, Iv
-
+			case 0xc0: case 0xc1:
+				RequiresMin(i80186);
+				ShiftGroup();
+				source_ = Source::Immediate;
+				operand_size_ = operation_size_;
+			break;
 			case 0xc2: RegData(RETN, None, data_size_);				break;
 			case 0xc3: Complete(RETN, None, None, DataSize::None);	break;
 			case 0xc4: MemRegReg(LES, Reg_MemReg, data_size_);		break;
@@ -352,21 +363,15 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xce: Complete(INTO, None, None, DataSize::None);	break;
 			case 0xcf: Complete(IRET, None, None, DataSize::None);	break;
 
-			case 0xd0: case 0xd1: {
-				const DataSize sizes[] = {DataSize::Byte, data_size_};
-				phase_ = Phase::ModRegRM;
-				modregrm_format_ = ModRegRMFormat::MemRegROL_to_SAR;
-				operation_size_ = sizes[instr & 1];
+			case 0xd0: case 0xd1:
+				ShiftGroup();
 				source_ = Source::Immediate;
 				operand_ = 1;
-			} break;
-			case 0xd2: case 0xd3: {
-				const DataSize sizes[] = {DataSize::Byte, data_size_};
-				phase_ = Phase::ModRegRM;
-				modregrm_format_ = ModRegRMFormat::MemRegROL_to_SAR;
-				operation_size_ = sizes[instr & 1];
+			break;
+			case 0xd2: case 0xd3:
+				ShiftGroup();
 				source_ = Source::eCX;
-			} break;
+			break;
 			case 0xd4: RegData(AAM, eAX, DataSize::Byte);			break;
 			case 0xd5: RegData(AAD, eAX, DataSize::Byte);			break;
 			// Unused: 0xd6.
@@ -504,6 +509,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 
 #undef Requires
 #undef RequiresMin
+#undef ShiftGroup
+#undef Displacement16Operand8
 #undef Far
 #undef Immediate
 #undef Displacement

From 35a66c03c24664d229bbd20fc3ed67539806a022 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 7 Mar 2022 10:32:34 -0500
Subject: [PATCH 058/104] Add the SETs.

---
 InstructionSets/x86/Decoder.cpp               | 59 +++++++++++++------
 InstructionSets/x86/Decoder.hpp               |  6 +-
 .../x86/Documentation/80386 opcode map.html   | 18 +++---
 InstructionSets/x86/Instruction.hpp           |  3 +
 4 files changed, 56 insertions(+), 30 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index b22edc312..b0ca271cf 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -267,18 +267,18 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0x82: MemRegReg(Invalid, MemRegADD_to_CMP_SignExtend, DataSize::Byte);	break;
 			case 0x83: MemRegReg(Invalid, MemRegADD_to_CMP_SignExtend, data_size_);		break;
 
-			case 0x84: MemRegReg(TEST, MemReg_Reg, DataSize::Byte);	break;
-			case 0x85: MemRegReg(TEST, MemReg_Reg, data_size_);		break;
-			case 0x86: MemRegReg(XCHG, Reg_MemReg, DataSize::Byte);	break;
-			case 0x87: MemRegReg(XCHG, Reg_MemReg, data_size_);		break;
-			case 0x88: MemRegReg(MOV, MemReg_Reg, DataSize::Byte);	break;
-			case 0x89: MemRegReg(MOV, MemReg_Reg, data_size_);		break;
-			case 0x8a: MemRegReg(MOV, Reg_MemReg, DataSize::Byte);	break;
-			case 0x8b: MemRegReg(MOV, Reg_MemReg, data_size_);		break;
-			case 0x8c: MemRegReg(MOV, MemReg_Seg, DataSize::Word);	break;
-			case 0x8d: MemRegReg(LEA, Reg_MemReg, data_size_);		break;
-			case 0x8e: MemRegReg(MOV, Seg_MemReg, DataSize::Word);	break;
-			case 0x8f: MemRegReg(POP, MemRegPOP, data_size_);		break;
+			case 0x84: MemRegReg(TEST, MemReg_Reg, DataSize::Byte);		break;
+			case 0x85: MemRegReg(TEST, MemReg_Reg, data_size_);			break;
+			case 0x86: MemRegReg(XCHG, Reg_MemReg, DataSize::Byte);		break;
+			case 0x87: MemRegReg(XCHG, Reg_MemReg, data_size_);			break;
+			case 0x88: MemRegReg(MOV, MemReg_Reg, DataSize::Byte);		break;
+			case 0x89: MemRegReg(MOV, MemReg_Reg, data_size_);			break;
+			case 0x8a: MemRegReg(MOV, Reg_MemReg, DataSize::Byte);		break;
+			case 0x8b: MemRegReg(MOV, Reg_MemReg, data_size_);			break;
+			case 0x8c: MemRegReg(MOV, MemReg_Seg, DataSize::Word);		break;
+			case 0x8d: MemRegReg(LEA, Reg_MemReg, data_size_);			break;
+			case 0x8e: MemRegReg(MOV, Seg_MemReg, DataSize::Word);		break;
+			case 0x8f: MemRegReg(POP, MemRegSingleOperand, data_size_);	break;
 
 			case 0x90: Complete(NOP, None, None, DataSize::None);	break;	// Or XCHG AX, AX?
 			case 0x91: Complete(XCHG, eAX, eCX, data_size_);		break;
@@ -474,8 +474,28 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0x7e: RequiresMin(i80386);	Displacement(JLE, data_size_);	break;
 			case 0x7f: RequiresMin(i80386);	Displacement(JNLE, data_size_);	break;
 
-			// TODO: [0x90, 0x97]: byte set on condition Eb: SETO, SETNO, SETB, SETNB, SETZ, SETNZ, SETBE, SETNBE
-			// TODO: [0x98, 0x9f]: SETS, SETNS, SETP, SETNP, SETL, SETNL, SETLE, SETNLE
+#define Set(x)												\
+	RequiresMin(i80386);									\
+	MemRegReg(SET##x, MemRegSingleOperand, DataSize::Byte);
+
+			case 0x90: Set(O);		break;
+			case 0x91: Set(NO);		break;
+			case 0x92: Set(B);		break;
+			case 0x93: Set(NB);		break;
+			case 0x94: Set(Z);		break;
+			case 0x95: Set(NZ);		break;
+			case 0x96: Set(BE);		break;
+			case 0x97: Set(NBE);	break;
+			case 0x98: Set(S);		break;
+			case 0x99: Set(NS);		break;
+			case 0x9a: Set(P);		break;
+			case 0x9b: Set(NP);		break;
+			case 0x9c: Set(L);		break;
+			case 0x9d: Set(NL);		break;
+			case 0x9e: Set(LE);		break;
+			case 0x9f: Set(NLE);	break;
+
+#undef Set
 
 			case 0xa0: RequiresMin(i80386);	Complete(PUSH, FS, None, data_size_);	break;
 			case 0xa1: RequiresMin(i80386);	Complete(POP, FS, None, data_size_);	break;
@@ -568,8 +588,13 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 3:
 				memreg = reg_table[rm];
 
-				// LES and LDS accept a memory argument only, not a register.
-				if(operation_ == Operation::LES || operation_ == Operation::LDS) {
+				// LES, LDS, etc accept a memory argument only, not a register.
+				if(
+					operation_ == Operation::LES ||
+					operation_ == Operation::LDS ||
+					operation_ == Operation::LGS ||
+					operation_ == Operation::LSS ||
+					operation_ == Operation::LFS) {
 					undefined();
 				}
 			break;
@@ -678,7 +703,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				}
 			break;
 
-			case ModRegRMFormat::MemRegPOP:
+			case ModRegRMFormat::MemRegSingleOperand:
 				source_ = destination_ = memreg;
 
 				if(reg != 0) {
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 041937fb6..6f0c4a6aa 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -70,13 +70,11 @@ template <Model model> class Decoder {
 			Reg_MemReg,
 
 			// Parse for mode and register/memory fields, populating both
-			// source_ and destination_ fields with the result. Use the 'register'
-			// field to check for the POP operation.
-			MemRegPOP,
+			// source_ and destination_ fields with the single register/memory result.
+			MemRegSingleOperand,
 
 			// Parse for mode and register/memory fields, populating both
 			// the destination_ field with the result and setting source_ to Immediate.
-			// Use the 'register' field to check for the MOV operation.
 			MemRegMOV,
 
 			// Parse for mode and register/memory fields, populating the
diff --git a/InstructionSets/x86/Documentation/80386 opcode map.html b/InstructionSets/x86/Documentation/80386 opcode map.html
index e1dcbe5bc..9b9b05252 100644
--- a/InstructionSets/x86/Documentation/80386 opcode map.html	
+++ b/InstructionSets/x86/Documentation/80386 opcode map.html	
@@ -700,15 +700,7 @@
 			<tr>
 				<th rowspan=2>9x</th>
 				
-				<td colspan=8>Byte set on condition (Eb)</td>
-				<td rowspan=2>SETS</td>
-				<td rowspan=2>SETNS</td>
-				<td rowspan=2>SETP</td>
-				<td rowspan=2>SETNP</td>
-				<td rowspan=2>SETL</td>
-				<td rowspan=2>SETNL</td>
-				<td rowspan=2>SETLE</td>
-				<td rowspan=2>SETNLE</td>
+				<td colspan=16>Byte set on condition (Eb)</td>
 			</tr>
 			<tr>
 				<!-- Byte set on condition (Eb) -->
@@ -720,6 +712,14 @@
 				<td>SETNZ</td>
 				<td>SETBE</td>
 				<td>SETNBE</td>
+				<td>SETS</td>
+				<td>SETNS</td>
+				<td>SETP</td>
+				<td>SETNP</td>
+				<td>SETL</td>
+				<td>SETNL</td>
+				<td>SETLE</td>
+				<td>SETNLE</td>
 			</tr>
 			<tr>
 				<th rowspan=2>Ax</th>
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 618545ab7..ba55df6fe 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -325,6 +325,9 @@ enum class Operation: uint8_t {
 	IRETD,
 	JECXZ,
 	LODSD,
+
+	SETO, SETNO,	SETB, SETNB,	SETZ, SETNZ,	SETBE, SETNBE,
+	SETS, SETNS,	SETP, SETNP,	SETL, SETNL,	SETLE, SETNLE,
 };
 
 enum class DataSize: uint8_t {

From 7313c89dec566e44f2fec365651be5d652b64229 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 7 Mar 2022 16:23:25 -0500
Subject: [PATCH 059/104] Add BT, BTS, BTR, BTC, BSF, BSR.

---
 InstructionSets/x86/Decoder.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index b0ca271cf..c79efe8c2 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -499,12 +499,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 
 			case 0xa0: RequiresMin(i80386);	Complete(PUSH, FS, None, data_size_);	break;
 			case 0xa1: RequiresMin(i80386);	Complete(POP, FS, None, data_size_);	break;
-			// TODO: 0xa3: BT Ev, Gv
+			case 0xa3: RequiresMin(i80386);	MemRegReg(BT, MemReg_Reg, data_size_);	break;
 			// TODO: 0xa4: SHLD EvGvIb
 			// TODO: 0xa5: SHLD EvGcCL
 			case 0xa8: RequiresMin(i80386);	Complete(PUSH, GS, None, data_size_);	break;
 			case 0xa9: RequiresMin(i80386);	Complete(POP, GS, None, data_size_);	break;
-			// TODO: 0xab: BTS Ev, Gv
+			case 0xab: RequiresMin(i80386);	MemRegReg(BTS, MemReg_Reg, data_size_);	break;
 			// TODO: 0xac: SHRD EvGvIb
 			// TODO: 0xad: SHRD EvGvCL
 			case 0xaf:
@@ -513,15 +513,15 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			break;
 
 			case 0xb2: RequiresMin(i80386);	MemRegReg(LSS, Reg_MemReg, data_size_);	break;
-			// TODO: 0xb3: BTR Ev, Gv
+			case 0xb3: RequiresMin(i80386);	MemRegReg(BTR, MemReg_Reg, data_size_);	break;
 			case 0xb4: RequiresMin(i80386);	MemRegReg(LFS, Reg_MemReg, data_size_);	break;
 			case 0xb5: RequiresMin(i80386);	MemRegReg(LGS, Reg_MemReg, data_size_);	break;
 			// TODO: 0xb6: MOVZX Gv, Eb
 			// TODO: 0xb7: MOVZX Gv, Ew
 			// TODO: 0xba: Grp8 Ev, Ib
-			// TODO: 0xbb: BTC Ev, Gv
-			// TODO: 0xbc: BSF Gv, Ev
-			// TODO: 0xbd: BSR Gv, Ev
+			case 0xbb: RequiresMin(i80386);	MemRegReg(BTC, MemReg_Reg, data_size_);	break;
+			case 0xbc: RequiresMin(i80386);	MemRegReg(BSF, MemReg_Reg, data_size_);	break;
+			case 0xbd: RequiresMin(i80386);	MemRegReg(BSR, MemReg_Reg, data_size_);	break;
 			// TODO: 0xbe: MOVSX Gv, Eb
 			// TODO: 0xbf: MOVSX Gv, Ew
 		}

From 7ea84d9a4e146d537708e45ef9863b3b8d27ede7 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 7 Mar 2022 16:25:44 -0500
Subject: [PATCH 060/104] Add MOVZX, MOVSX.

---
 InstructionSets/x86/Decoder.cpp | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index c79efe8c2..e16668563 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -516,14 +516,26 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xb3: RequiresMin(i80386);	MemRegReg(BTR, MemReg_Reg, data_size_);	break;
 			case 0xb4: RequiresMin(i80386);	MemRegReg(LFS, Reg_MemReg, data_size_);	break;
 			case 0xb5: RequiresMin(i80386);	MemRegReg(LGS, Reg_MemReg, data_size_);	break;
-			// TODO: 0xb6: MOVZX Gv, Eb
-			// TODO: 0xb7: MOVZX Gv, Ew
+			case 0xb6:
+				RequiresMin(i80386);
+				MemRegReg(MOVZX, Reg_MemReg, DataSize::Byte);
+			break;
+			case 0xb7:
+				RequiresMin(i80386);
+				MemRegReg(MOVZX, Reg_MemReg, DataSize::Word);
+			break;
 			// TODO: 0xba: Grp8 Ev, Ib
 			case 0xbb: RequiresMin(i80386);	MemRegReg(BTC, MemReg_Reg, data_size_);	break;
 			case 0xbc: RequiresMin(i80386);	MemRegReg(BSF, MemReg_Reg, data_size_);	break;
 			case 0xbd: RequiresMin(i80386);	MemRegReg(BSR, MemReg_Reg, data_size_);	break;
-			// TODO: 0xbe: MOVSX Gv, Eb
-			// TODO: 0xbf: MOVSX Gv, Ew
+			case 0xbe:
+				RequiresMin(i80386);
+				MemRegReg(MOVSX, Reg_MemReg, DataSize::Byte);
+			break;
+			case 0xbf:
+				RequiresMin(i80386);
+				MemRegReg(MOVSX, Reg_MemReg, DataSize::Word);
+			break;
 		}
 	}
 

From 0e16e7935e1f0e79525d8d40f396b27a9f3d3bd6 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 7 Mar 2022 16:26:17 -0500
Subject: [PATCH 061/104] Correct double reference to Group 6.

---
 InstructionSets/x86/Documentation/80386 opcode map.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InstructionSets/x86/Documentation/80386 opcode map.html b/InstructionSets/x86/Documentation/80386 opcode map.html
index 9b9b05252..f7e2481d8 100644
--- a/InstructionSets/x86/Documentation/80386 opcode map.html	
+++ b/InstructionSets/x86/Documentation/80386 opcode map.html	
@@ -901,7 +901,7 @@
 				<td></td>
 			</tr>
 			<tr>
-				<th>Group 6</th>
+				<th>Group 8</th>
 				
 				<td></td>
 				<td></td>

From 8e669a32a3f76069416a002d9f622d63552cfe70 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 7 Mar 2022 16:34:56 -0500
Subject: [PATCH 062/104] Take a stab at group 8.

---
 InstructionSets/x86/Decoder.cpp | 23 +++++++++++++++++++----
 InstructionSets/x86/Decoder.hpp |  3 ++-
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index e16668563..521f1c4dd 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -524,10 +524,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				RequiresMin(i80386);
 				MemRegReg(MOVZX, Reg_MemReg, DataSize::Word);
 			break;
-			// TODO: 0xba: Grp8 Ev, Ib
-			case 0xbb: RequiresMin(i80386);	MemRegReg(BTC, MemReg_Reg, data_size_);	break;
-			case 0xbc: RequiresMin(i80386);	MemRegReg(BSF, MemReg_Reg, data_size_);	break;
-			case 0xbd: RequiresMin(i80386);	MemRegReg(BSR, MemReg_Reg, data_size_);	break;
+			case 0xba: RequiresMin(i80386);	MemRegReg(Invalid, MemRegBT_to_BTC, data_size_);	break;
+			case 0xbb: RequiresMin(i80386);	MemRegReg(BTC, MemReg_Reg, data_size_);				break;
+			case 0xbc: RequiresMin(i80386);	MemRegReg(BSF, MemReg_Reg, data_size_);				break;
+			case 0xbd: RequiresMin(i80386);	MemRegReg(BSR, MemReg_Reg, data_size_);				break;
 			case 0xbe:
 				RequiresMin(i80386);
 				MemRegReg(MOVSX, Reg_MemReg, DataSize::Byte);
@@ -776,6 +776,21 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				}
 			break;
 
+			case ModRegRMFormat::MemRegBT_to_BTC:
+				destination_ = memreg;
+				source_ = Source::Immediate;
+				operand_size_ = DataSize::Byte;
+
+				switch(reg) {
+					default:	undefined();
+
+					case 4:		operation_ = Operation::BT;		break;
+					case 5:		operation_ = Operation::BTS;	break;
+					case 6:		operation_ = Operation::BTR;	break;
+					case 7:		operation_ = Operation::BTC;	break;
+				}
+			break;
+
 			default: assert(false);
 		}
 
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 6f0c4a6aa..6c837df2a 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -156,7 +156,8 @@ template <Model model> class Decoder {
 			//	'Group 8'
 			//
 
-			// TODO.
+			// Parse for mode and register/memory field, populating destination,
+			// and prepare to read a single byte as source.
 			MemRegBT_to_BTC,
 		} modregrm_format_ = ModRegRMFormat::MemReg_Reg;
 

From f0b4971c7b1a805f29814097e8dcfd14e18f0abe Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 7 Mar 2022 16:39:02 -0500
Subject: [PATCH 063/104] Correct SHLD format.

---
 InstructionSets/x86/Documentation/80386 opcode map.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InstructionSets/x86/Documentation/80386 opcode map.html b/InstructionSets/x86/Documentation/80386 opcode map.html
index f7e2481d8..3517de47c 100644
--- a/InstructionSets/x86/Documentation/80386 opcode map.html	
+++ b/InstructionSets/x86/Documentation/80386 opcode map.html	
@@ -729,7 +729,7 @@
 				<td rowspan=2></td>
 				<td rowspan=2>BT Ev, Gv</td>
 				<td rowspan=2>SHLD EvGvIb</td>
-				<td rowspan=2>SHLD EvGcCL</td>
+				<td rowspan=2>SHLD EvGvCL</td>
 				<td rowspan=2></td>
 				<td rowspan=2></td>
 				<td rowspan=2>PUSH GS</td>

From 41a104cc10aaf574b7cf03e84fc47bf941bc95ca Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Mon, 7 Mar 2022 17:04:05 -0500
Subject: [PATCH 064/104] Adds special test/control/debug MOVs.

This'll do; it's not ideal but avoids bloating up the `Source` enum.
---
 InstructionSets/x86/Decoder.cpp               | 55 ++++++++++++++-----
 .../x86/Documentation/80386 opcode map.html   |  2 +-
 InstructionSets/x86/Instruction.hpp           | 14 +++--
 3 files changed, 51 insertions(+), 20 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 521f1c4dd..a316786d5 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -450,12 +450,30 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			break;
 			case 0x06:	Complete(CLTS, None, None, DataSize::Byte);			break;
 
-			// TODO: 0x20: MOV Cr, Rd
-			// TODO: 0x21: MOV Dd, Rd
-			// TODO: 0x22: MOV Rd, Cd
-			// TODO: 0x23: MOV Rd, Dd
-			// TODO: 0x24: MOV Td, Rd
-			// TODO: 0x26: MOV Rd, Td
+			case 0x20:
+				RequiresMin(i80386);
+				MemRegReg(MOVfromCr, Reg_MemReg, DataSize::DWord);
+			break;
+			case 0x21:
+				RequiresMin(i80386);
+				MemRegReg(MOVfromDr, Reg_MemReg, DataSize::DWord);
+			break;
+			case 0x22:
+				RequiresMin(i80386);
+				MemRegReg(MOVtoCr, Reg_MemReg, DataSize::DWord);
+			break;
+			case 0x23:
+				RequiresMin(i80386);
+				MemRegReg(MOVtoDr, Reg_MemReg, DataSize::DWord);
+			break;
+			case 0x24:
+				RequiresMin(i80386);
+				MemRegReg(MOVfromTr, Reg_MemReg, DataSize::DWord);
+			break;
+			case 0x26:
+				RequiresMin(i80386);
+				MemRegReg(MOVtoTr, Reg_MemReg, DataSize::DWord);
+			break;
 
 			case 0x70: RequiresMin(i80386);	Displacement(JO, data_size_);	break;
 			case 0x71: RequiresMin(i80386);	Displacement(JNO, data_size_);	break;
@@ -500,13 +518,27 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xa0: RequiresMin(i80386);	Complete(PUSH, FS, None, data_size_);	break;
 			case 0xa1: RequiresMin(i80386);	Complete(POP, FS, None, data_size_);	break;
 			case 0xa3: RequiresMin(i80386);	MemRegReg(BT, MemReg_Reg, data_size_);	break;
-			// TODO: 0xa4: SHLD EvGvIb
-			// TODO: 0xa5: SHLD EvGcCL
+			case 0xa4:
+				RequiresMin(i80386);
+				MemRegReg(SHLDimm, Reg_MemReg, data_size_);
+				operand_size_ = DataSize::Byte;
+			break;
+			case 0xa5:
+				RequiresMin(i80386);
+				MemRegReg(SHLDCL, MemReg_Reg, data_size_);
+			break;
 			case 0xa8: RequiresMin(i80386);	Complete(PUSH, GS, None, data_size_);	break;
 			case 0xa9: RequiresMin(i80386);	Complete(POP, GS, None, data_size_);	break;
 			case 0xab: RequiresMin(i80386);	MemRegReg(BTS, MemReg_Reg, data_size_);	break;
-			// TODO: 0xac: SHRD EvGvIb
-			// TODO: 0xad: SHRD EvGvCL
+			case 0xac:
+				RequiresMin(i80386);
+				MemRegReg(SHRDimm, Reg_MemReg, data_size_);
+				operand_size_ = DataSize::Byte;
+			break;
+			case 0xad:
+				RequiresMin(i80386);
+				MemRegReg(SHRDCL, MemReg_Reg, data_size_);
+			break;
 			case 0xaf:
 				RequiresMin(i80386);
 				MemRegReg(IMUL_2, Reg_MemReg, data_size_);
@@ -846,9 +878,6 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 					case DataSize::DWord:	operand_ = decltype(operand_)(int32_t(inward_data_));	break;
 				}
 			}
-
-			// TODO: split differently for far jumps/etc. But that information is
-			// no longer retained now that it's not implied by a DWord-sized operand.
 		} else {
 			// Provide a genuine measure of further bytes required.
 			return std::make_pair(-(outstanding_bytes - bytes_to_consume), InstructionT());
diff --git a/InstructionSets/x86/Documentation/80386 opcode map.html b/InstructionSets/x86/Documentation/80386 opcode map.html
index 3517de47c..ce5c62204 100644
--- a/InstructionSets/x86/Documentation/80386 opcode map.html	
+++ b/InstructionSets/x86/Documentation/80386 opcode map.html	
@@ -651,7 +651,7 @@
 			<tr>
 				<th rowspan=2>2x</th>
 				
-				<td rowspan=2>MOV Cr, Rd</td>
+				<td rowspan=2>MOV Cd, Rd</td>
 				<td rowspan=2>MOV Dd, Rd</td>
 				<td rowspan=2>MOV Rd, Cd</td>
 				<td rowspan=2>MOV Rd, Dd</td>
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index ba55df6fe..702341fa3 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -283,9 +283,11 @@ enum class Operation: uint8_t {
 	LSS,
 
 	/// Shift left double.
-	SHLD,
+	SHLDimm,
+	SHLDCL,
 	/// Shift right double.
-	SHRD,
+	SHRDimm,
+	SHRDCL,
 
 	/// Bit scan forwards.
 	BSF,
@@ -322,12 +324,12 @@ enum class Operation: uint8_t {
 	/// Two-operand form of IMUL; multiply the source by the destination and write to the destination.
 	IMUL_2,
 
-	IRETD,
-	JECXZ,
-	LODSD,
-
 	SETO, SETNO,	SETB, SETNB,	SETZ, SETNZ,	SETBE, SETNBE,
 	SETS, SETNS,	SETP, SETNP,	SETL, SETNL,	SETLE, SETNLE,
+
+	MOVtoCr, MOVfromCr,
+	MOVtoDr, MOVfromDr,
+	MOVtoTr, MOVfromTr,
 };
 
 enum class DataSize: uint8_t {

From a954f236429abc6a22b16bf72a54e44e80242dbb Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Tue, 8 Mar 2022 14:39:49 -0500
Subject: [PATCH 065/104] Attempt 32-bit modregrm + SIB parsing.

---
 InstructionSets/x86/Decoder.cpp               | 68 ++++++++++++-------
 InstructionSets/x86/Decoder.hpp               |  1 +
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 18 +++--
 3 files changed, 55 insertions(+), 32 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index a316786d5..432ec8507 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -591,13 +591,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 		const uint8_t mod = *source >> 6;		// i.e. mode.
 		const uint8_t reg = (*source >> 3) & 7;	// i.e. register.
 		const uint8_t rm = *source & 7;			// i.e. register/memory.
+		bool expects_sib = false;
 		++source;
 		++consumed_;
 
 		Source memreg;
 
-		// TODO: the below currently has no way to segue into fetching a SIB.
-
 		// TODO: can I just eliminate these lookup tables given the deliberate ordering within Source?
 		constexpr Source reg_table[8] = {
 			Source::eAX,		Source::eCX,		Source::eDX,		Source::eBX,
@@ -606,13 +605,39 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 		constexpr Source seg_table[6] = {
 			Source::ES,	Source::CS,	Source::SS,	Source::DS,	Source::FS,	Source::GS
 		};
-		switch(mod) {
-			default: {
-				const DataSize sizes[] = {DataSize::Byte, data_size_};
-				displacement_size_ = sizes[mod == 2];
+
+		// Mode 3 is the same regardless of 16/32-bit mode. So deal with that up front.
+		if(mod == 3) {
+			// Other operand is just a register.
+			memreg = reg_table[rm];
+
+			// LES, LDS, etc accept a memory argument only, not a register.
+			if(
+				operation_ == Operation::LES ||
+				operation_ == Operation::LDS ||
+				operation_ == Operation::LGS ||
+				operation_ == Operation::LSS ||
+				operation_ == Operation::LFS) {
+				undefined();
 			}
-				[[fallthrough]];
-			case 0: {
+		} else {
+			const DataSize sizes[] = {
+				DataSize::None,
+				DataSize::Byte,
+				address_size_ == AddressSize::b16 ? DataSize::Word : DataSize::DWord
+			};
+			displacement_size_ = sizes[mod];
+			memreg = Source::Indirect;
+
+			if(allow_sib_) {
+				// 32-bit decoding: the range of potential indirections is expanded,
+				// and may segue into obtaining a SIB.
+				sib_ = ScaleIndexBase(0, Source::None, reg_table[rm]);
+				expects_sib = rm == 4;	// Indirect via eSP isn't directly supported; it's the
+										// escape indicator for reading a SIB.
+			} else {
+				// Classic 16-bit decoding: mode picks a displacement size,
+				// and a few fixed index+base pairs are defined.
 				constexpr ScaleIndexBase rm_table[8] = {
 					ScaleIndexBase(0, Source::eBX, Source::eSI),
 					ScaleIndexBase(0, Source::eBX, Source::eDI),
@@ -624,24 +649,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 					ScaleIndexBase(0, Source::None, Source::eBX),
 				};
 
-				memreg = Source::Indirect;
 				sib_ = rm_table[rm];
-			} break;
-
-			// Other operand is just a register.
-			case 3:
-				memreg = reg_table[rm];
-
-				// LES, LDS, etc accept a memory argument only, not a register.
-				if(
-					operation_ == Operation::LES ||
-					operation_ == Operation::LDS ||
-					operation_ == Operation::LGS ||
-					operation_ == Operation::LSS ||
-					operation_ == Operation::LFS) {
-					undefined();
-				}
-			break;
+			}
 		}
 
 		switch(modregrm_format_) {
@@ -826,7 +835,11 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			default: assert(false);
 		}
 
-		phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
+		if(expects_sib && (source_ == Source::Indirect | destination_ == Source::Indirect)) {
+			phase_ = Phase::ScaleIndexBase;
+		} else {
+			phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
+		}
 	}
 
 #undef undefined
@@ -837,6 +850,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 		sib_ = *source;
 		++source;
 		++consumed_;
+
+		phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
 	}
 
 	// MARK: - Displacement and operand.
@@ -916,6 +931,7 @@ template <Model model> void Decoder<model>::set_32bit_protected_mode(bool enable
 		return;
 	}
 
+	allow_sib_ = enabled;
 	if(enabled) {
 		default_address_size_ = address_size_ = AddressSize::b32;
 		default_data_size_ = data_size_ = DataSize::DWord;
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 6c837df2a..7c411423e 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -196,6 +196,7 @@ template <Model model> class Decoder {
 		DataSize default_data_size_ = DataSize::Word;
 		AddressSize address_size_ = AddressSize::b16;
 		DataSize data_size_ = DataSize::Word;
+		bool allow_sib_ = false;
 
 		/// Resets size capture and all fields with default values.
 		void reset_parsing() {
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 1ef7c754f..206070194 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -309,15 +309,21 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 - (void)testLDSLESEtc {
 	auto run_test = [](bool is_32, DataSize size) {
 		const auto instructions = decode<Model::i80386>({
-			0xc5, 0x33,			// lds (%bp, %di), %si
-			0xc4, 0x17,			// les (%bx), %dx
-			0x0f, 0xb2, 0x17,	// lss edx, (edi)
+			0xc5, 0x33,			// 16-bit: lds si, (bp, di);	32-bit: lds esi, (ebx)
+			0xc4, 0x17,			// 16-bit: les dx, (bx);		32-bit: les edx, (edi)
+			0x0f, 0xb2, 0x17,	// 16-bit: lss dx, (bx);		32-bit: lss edx, (edi)
 		}, is_32);
 
 		XCTAssertEqual(instructions.size(), 3);
-		test(instructions[0], size, Operation::LDS, ScaleIndexBase(Source::eBP, Source::eDI), Source::eSI);
-		test(instructions[1], size, Operation::LES, ScaleIndexBase(Source::eBX), Source::eDX);
-		test(instructions[2], size, Operation::LSS, ScaleIndexBase(Source::eBX), Source::eDX);
+		if(is_32) {
+			test(instructions[0], size, Operation::LDS, ScaleIndexBase(Source::eBX), Source::eSI);
+			test(instructions[1], size, Operation::LES, ScaleIndexBase(Source::eDI), Source::eDX);
+			test(instructions[2], size, Operation::LSS, ScaleIndexBase(Source::eDI), Source::eDX);
+		} else {
+			test(instructions[0], size, Operation::LDS, ScaleIndexBase(Source::eBP, Source::eDI), Source::eSI);
+			test(instructions[1], size, Operation::LES, ScaleIndexBase(Source::eBX), Source::eDX);
+			test(instructions[2], size, Operation::LSS, ScaleIndexBase(Source::eBX), Source::eDX);
+		}
 	};
 
 	run_test(false, DataSize::Word);

From 0cbb481fa41c47f1bec95fe96fb734ec11e38ae1 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Tue, 8 Mar 2022 14:56:27 -0500
Subject: [PATCH 066/104] Add a formal SIB test.

---
 OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 206070194..4cc5bdbbe 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -330,4 +330,16 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	run_test(true, DataSize::DWord);
 }
 
+- (void)testSIB {
+	const auto instructions = decode<Model::i80386>({
+		// add edx, -0x7d(ebp + eax*2)
+		0x01, 0x54, 0x45, 0x83
+	}, true);
+
+	XCTAssertEqual(instructions.size(), 1);
+	test(instructions[0], DataSize::DWord, Operation::ADD, Source::eDX, ScaleIndexBase(1, Source::eAX, Source::eBP), 0x00, -125);
+	// Noting that a multiplier of 2 is a scale of 1,
+	// since the scale is in log2.
+}
+
 @end

From 926a37359107e4146f92be485e771934304b9866 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Tue, 8 Mar 2022 15:03:37 -0500
Subject: [PATCH 067/104] Extend SIB test, correct decoder.

---
 InstructionSets/x86/Decoder.cpp                     |  3 +--
 InstructionSets/x86/Decoder.hpp                     |  1 -
 OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm | 11 +++++++----
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 432ec8507..19b75c4b7 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -629,7 +629,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			displacement_size_ = sizes[mod];
 			memreg = Source::Indirect;
 
-			if(allow_sib_) {
+			if(address_size_ == AddressSize::b32) {
 				// 32-bit decoding: the range of potential indirections is expanded,
 				// and may segue into obtaining a SIB.
 				sib_ = ScaleIndexBase(0, Source::None, reg_table[rm]);
@@ -931,7 +931,6 @@ template <Model model> void Decoder<model>::set_32bit_protected_mode(bool enable
 		return;
 	}
 
-	allow_sib_ = enabled;
 	if(enabled) {
 		default_address_size_ = address_size_ = AddressSize::b32;
 		default_data_size_ = data_size_ = DataSize::DWord;
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 7c411423e..6c837df2a 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -196,7 +196,6 @@ template <Model model> class Decoder {
 		DataSize default_data_size_ = DataSize::Word;
 		AddressSize address_size_ = AddressSize::b16;
 		DataSize data_size_ = DataSize::Word;
-		bool allow_sib_ = false;
 
 		/// Resets size capture and all fields with default values.
 		void reset_parsing() {
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 4cc5bdbbe..78f0689f1 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -333,13 +333,16 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 - (void)testSIB {
 	const auto instructions = decode<Model::i80386>({
 		// add edx, -0x7d(ebp + eax*2)
-		0x01, 0x54, 0x45, 0x83
+		0x01, 0x54, 0x45, 0x83,
+
+		// add edx, -0x80(si)
+		0x67, 0x01, 0x54, 0x80,
 	}, true);
 
-	XCTAssertEqual(instructions.size(), 1);
+	XCTAssertEqual(instructions.size(), 2);
 	test(instructions[0], DataSize::DWord, Operation::ADD, Source::eDX, ScaleIndexBase(1, Source::eAX, Source::eBP), 0x00, -125);
-	// Noting that a multiplier of 2 is a scale of 1,
-	// since the scale is in log2.
+	test(instructions[1], DataSize::DWord, Operation::ADD, Source::eDX, ScaleIndexBase(Source::eSI), 0x00, -128);
+	XCTAssertEqual(instructions[1].address_size(), AddressSize::b16);
 }
 
 @end

From 21d4838322acc887b59a4764792fe0294a6e6c66 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Tue, 8 Mar 2022 17:08:21 -0500
Subject: [PATCH 068/104] Fix current implementation of `data_segment`.

As far as it goes.
---
 InstructionSets/x86/Instruction.hpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 702341fa3..4af89c411 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -611,7 +611,10 @@ template<bool is_32bit> class Instruction {
 			return AddressSize(address_size_);
 		}
 		Source data_segment() const {
-			const auto segment_override = Source((sources_ >> 12) & 7);
+			const auto segment_override = Source(
+				int(Source::ES) +
+				((sources_ >> 12) & 7)
+			);
 			if(segment_override != Source::None) return segment_override;
 
 			// TODO: default source should be SS for anything touching the stack.
@@ -646,7 +649,7 @@ template<bool is_32bit> class Instruction {
 				sources_(uint16_t(
 					int(source) |
 					(int(destination) << 6) |
-					(int(segment_override) << 12) |
+					((int(segment_override) & 7) << 12) |
 					(int(lock) << 15)
 				)),
 				displacement_(displacement),

From ebed4cd728e69ce627440178617e16225457a085 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Tue, 8 Mar 2022 19:57:10 -0500
Subject: [PATCH 069/104] Introduce failing 32-bit parsing test.

---
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 91 +++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 78f0689f1..3a8375356 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -345,4 +345,95 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	XCTAssertEqual(instructions[1].address_size(), AddressSize::b16);
 }
 
+- (void)test32bitSequence {
+	const auto instructions = decode<Model::i80386>({
+		0x2e, 0x42, 0x0c, 0x09, 0x81, 0x47, 0xbe, 0xa9, 0x3a, 0x68, 0x9f, 0xf0, 0x7a, 0xe2, 0x3e, 0xb4,
+		0xc1, 0x1f, 0xaa, 0x60, 0xb4, 0xe1, 0x91, 0xdc, 0xf6, 0x62, 0x90, 0x90, 0xdf, 0xcd, 0xf9, 0x0f,
+		0xbb, 0x71, 0x4b, 0x58, 0x55, 0x38, 0x2c, 0xf9, 0x50, 0xfe, 0xce, 0xe0, 0xc1, 0xda, 0x83, 0x8c,
+		0x19, 0x0c, 0x9b, 0x89, 0x13, 0x34, 0x45, 0xc5, 0x11, 0xa2, 0xd3, 0xa6, 0xdb, 0xe4, 0x1f, 0xa5,
+		0x79, 0xf3, 0x7d, 0x1c, 0xb8, 0xda, 0x6b, 0x76, 0x8a, 0x79, 0x28, 0x52, 0xcd, 0xc4, 0xe9, 0xba,
+		0x11, 0xcf, 0x29, 0x09, 0x46, 0x1a, 0xc0, 0x5d, 0x88, 0x34, 0xa5, 0x83, 0xe2, 0xd0, 0xf5, 0x44,
+		0x9d, 0xa5, 0xc1, 0x5e, 0x4f, 0x07, 0x51, 0xd4, 0xed, 0xb0, 0x69, 0xd7, 0x00, 0xc5, 0x51, 0xfb,
+		0x68, 0x85, 0x3a, 0x8b, 0x69, 0x28, 0x0c, 0xec, 0xb1, 0xb7, 0x3b, 0x8d, 0x5f, 0x44, 0x87, 0x2c,
+		0xe3, 0x02, 0x9e, 0x74, 0x6e, 0x1b, 0x8f, 0x4d, 0xc5, 0x33, 0x04, 0x9f, 0xac, 0xc0, 0xc9, 0x60,
+		0x9a, 0x8a, 0xf5, 0xd0, 0x97, 0x1b, 0xe2, 0x64, 0x60, 0xb0, 0xcf, 0xe3, 0x37,
+	}, true);
+
+	XCTAssertEqual(instructions.size(), 64);
+
+	//cs inc edx
+	//or     al,0x9
+	//add    DWORD PTR [edi-0x42],0x9f683aa9
+	//lock jp 0xfffffff0	(from 0000000e)
+	test(instructions[0], DataSize::DWord, Operation::INC, Source::eDX);
+	XCTAssertEqual(instructions[0].data_segment(), Source::CS);
+	test(instructions[1], DataSize::Byte, Operation::OR, Source::Immediate, Source::eAX, 0x9);
+	test(instructions[2], DataSize::DWord, Operation::ADD, Source::Immediate, ScaleIndexBase(Source::eDI), 0x9f683aa9, -0x42);
+	test(instructions[3], Operation::JP, 0, -30);
+	XCTAssert(instructions[3].lock());
+
+	//ds mov ah,0xc1
+	//pop    ds
+	//stos   BYTE PTR es:[edi],al
+	//pusha
+	//mov    ah,0xe1
+	//xchg   ecx,eax
+	//fdivr  st(6),st
+	//bound  edx,QWORD PTR [eax-0x6322070]
+	//btc    DWORD PTR [ecx+0x4b],esi
+	//pop    eax
+	//push   ebp
+	//cmp    BYTE PTR [ecx+edi*8],ch
+	//push   eax
+	//dec    dh
+	//loopne 0xffffffee
+	//fiadd  DWORD PTR [ebx-0x64f3e674]
+	//mov    DWORD PTR [ebx],edx
+	//xor    al,0x45
+	//lds    edx,FWORD PTR [ecx]
+
+	// Note to self: disassembly currently diverges at or immediately after this MOV:
+	//mov    ds:0xe4dba6d3,al
+	//pop    ds
+	//movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
+	//jns    0x00000035
+	//jge    0x00000060
+	//mov    eax,0x8a766bda
+	//jns    0x00000073
+	//push   edx
+	//int    0xc4
+	//jmp    0x29cf120d
+	//or     DWORD PTR [esi+0x1a],eax
+	//rcr    BYTE PTR [ebp-0x78],0x34
+	//movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
+	//and    edx,0xffffffd0
+	//cmc
+	//inc    esp
+	//popf
+	//movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
+	//rcr    DWORD PTR [esi+0x4f],0x7
+	//push   ecx
+	//aam    0xed
+	//mov    al,0x69
+	//xlat   BYTE PTR ds:[ebx]
+	//add    ch,al
+	//push   ecx
+	//sti
+	//push   0x698b3a85
+	//sub    BYTE PTR [esp+ebp*8],cl
+	//mov    cl,0xb7
+	//cmp    ecx,DWORD PTR [ebp+0x2c87445f]
+	//jecxz  0x00000084
+	//sahf
+	//je     0x000000f3
+	//sbb    ecx,DWORD PTR [edi+0x433c54d]
+	//lahf
+	//lods   al,BYTE PTR ds:[esi]
+	//ror    cl,0x60
+	//call   0xe21b:0x97d0f58a
+	//fs pusha
+	//mov    al,0xcf
+	//jecxz  0x000000d4
+}
+
 @end

From a125bc724287cc32e89b1efcb6b55a1101403438 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Tue, 8 Mar 2022 20:16:19 -0500
Subject: [PATCH 070/104] Fill in more of `test32bitSequence`.

---
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 25 ++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 3a8375356..1333761fe 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -376,18 +376,41 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	//pop    ds
 	//stos   BYTE PTR es:[edi],al
 	//pusha
+	test(instructions[4], DataSize::Byte, Operation::MOV, Source::Immediate, Source::AH, 0xc1);
+	XCTAssertEqual(instructions[4].data_segment(), Source::DS);
+	test(instructions[5], DataSize::Word, Operation::POP, Source::None, Source::DS);
+	test(instructions[6], DataSize::Byte, Operation::STOS);
+	test(instructions[7], Operation::PUSHA);
+
 	//mov    ah,0xe1
 	//xchg   ecx,eax
 	//fdivr  st(6),st
 	//bound  edx,QWORD PTR [eax-0x6322070]
+	test(instructions[8], DataSize::Byte, Operation::MOV, Source::Immediate, Source::AH, 0xe1);
+	test(instructions[9], DataSize::DWord, Operation::XCHG, Source::eAX, Source::eCX);
+	test(instructions[10], DataSize::None, Operation::ESC);
+	test(instructions[11], DataSize::DWord, Operation::BOUND, ScaleIndexBase(Source::eAX), Source::eDX, 0, -0x6322070);
+
 	//btc    DWORD PTR [ecx+0x4b],esi
 	//pop    eax
 	//push   ebp
 	//cmp    BYTE PTR [ecx+edi*8],ch
+	test(instructions[12], DataSize::DWord, Operation::BTC, Source::eSI, ScaleIndexBase(Source::eCX), 0, 0x4b);
+	test(instructions[13], DataSize::DWord, Operation::POP, Source::eAX, Source::eAX);
+	test(instructions[14], DataSize::DWord, Operation::PUSH, Source::eBP);
+	test(instructions[15], DataSize::Byte, Operation::CMP, Source::CH, ScaleIndexBase(3, Source::eDI, Source::eCX));
+
+	// Possibly TODO: pick a lane on whether PUSH/POP duplicate source and destination.
+	// It doesn't really matter outside of these tests though.
+
 	//push   eax
 	//dec    dh
-	//loopne 0xffffffee
+	//loopne 0xffffffee (from 0x2d)
 	//fiadd  DWORD PTR [ebx-0x64f3e674]
+	test(instructions[16], DataSize::DWord, Operation::PUSH, Source::eAX);
+	test(instructions[17], DataSize::Byte, Operation::DEC, Source::DH);
+	test(instructions[18], Operation::LOOPNE, 0, -18);
+
 	//mov    DWORD PTR [ebx],edx
 	//xor    al,0x45
 	//lds    edx,FWORD PTR [ecx]

From de79acc790bfd03f75a1eab0879f541dc5300de5 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 9 Mar 2022 08:38:34 -0500
Subject: [PATCH 071/104] Fix RegAddr/AddrRegs and group 2 decoding.

---
 InstructionSets/x86/Decoder.cpp               | 32 ++++++++---------
 InstructionSets/x86/Instruction.hpp           |  4 +++
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 35 ++++++++++++++-----
 3 files changed, 47 insertions(+), 24 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 19b75c4b7..3a18a11e5 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -42,13 +42,13 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 /// Handles instructions of the form Ax, jjkk where the latter is implicitly an address.
 #define RegAddr(op, dest, op_size, addr_size)			\
 	SetOpSrcDestSize(op, DirectAddress, dest, op_size);	\
-	operand_size_ = addr_size;							\
+	operand_size_ = data_size(addr_size);				\
 	phase_ = Phase::DisplacementOrOperand
 
 /// Handles instructions of the form jjkk, Ax where the former is implicitly an address.
 #define AddrReg(op, source, op_size, addr_size)				\
 	SetOpSrcDestSize(op, source, DirectAddress, op_size);	\
-	operand_size_ = addr_size;								\
+	operand_size_ = data_size(addr_size);					\
 	destination_ = Source::DirectAddress;					\
 	phase_ = Phase::DisplacementOrOperand
 
@@ -298,10 +298,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0x9e: Complete(SAHF, None, None, DataSize::Byte);	break;
 			case 0x9f: Complete(LAHF, None, None, DataSize::Byte);	break;
 
-			case 0xa0: RegAddr(MOV, eAX, DataSize::Byte, DataSize::Byte);	break;
-			case 0xa1: RegAddr(MOV, eAX, data_size_, data_size_);			break;
-			case 0xa2: AddrReg(MOV, eAX, DataSize::Byte, DataSize::Byte);	break;
-			case 0xa3: AddrReg(MOV, eAX, data_size_, data_size_);			break;
+			case 0xa0: RegAddr(MOV, eAX, DataSize::Byte, address_size_);	break;
+			case 0xa1: RegAddr(MOV, eAX, data_size_, address_size_);		break;
+			case 0xa2: AddrReg(MOV, eAX, DataSize::Byte, address_size_);	break;
+			case 0xa3: AddrReg(MOV, eAX, data_size_, address_size_);		break;
 
 			case 0xa4: Complete(MOVS, None, None, DataSize::Byte);	break;
 			case 0xa5: Complete(MOVS, None, None, data_size_);		break;
@@ -391,10 +391,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xe2: Displacement(LOOP, DataSize::Byte);		break;
 			case 0xe3: Displacement(JPCX, DataSize::Byte);		break;
 
-			case 0xe4: RegAddr(IN, eAX, DataSize::Byte, DataSize::Byte);	break;
-			case 0xe5: RegAddr(IN, eAX, data_size_, DataSize::Byte);		break;
-			case 0xe6: AddrReg(OUT, eAX, DataSize::Byte, DataSize::Byte);	break;
-			case 0xe7: AddrReg(OUT, eAX, data_size_, DataSize::Byte);		break;
+			case 0xe4: RegAddr(IN, eAX, DataSize::Byte, address_size_);		break;
+			case 0xe5: RegAddr(IN, eAX, data_size_, address_size_);			break;
+			case 0xe6: AddrReg(OUT, eAX, DataSize::Byte, address_size_);	break;
+			case 0xe7: AddrReg(OUT, eAX, data_size_, address_size_);		break;
 
 			case 0xe8: RegData(CALLD, None, data_size_);	break;
 			case 0xe9: RegData(JMPN, None, data_size_);		break;
@@ -624,7 +624,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			const DataSize sizes[] = {
 				DataSize::None,
 				DataSize::Byte,
-				address_size_ == AddressSize::b16 ? DataSize::Word : DataSize::DWord
+				data_size(address_size_)
 			};
 			displacement_size_ = sizes[mod];
 			memreg = Source::Indirect;
@@ -712,11 +712,11 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 					default: 	undefined();
 
 					case 0: 	operation_ = Operation::ROL;	break;
-					case 2: 	operation_ = Operation::ROR;	break;
-					case 3: 	operation_ = Operation::RCL;	break;
-					case 4: 	operation_ = Operation::RCR;	break;
-					case 5: 	operation_ = Operation::SAL;	break;
-					case 6: 	operation_ = Operation::SHR;	break;
+					case 1: 	operation_ = Operation::ROR;	break;
+					case 2: 	operation_ = Operation::RCL;	break;
+					case 3: 	operation_ = Operation::RCR;	break;
+					case 4: 	operation_ = Operation::SAL;	break;
+					case 5: 	operation_ = Operation::SHR;	break;
 					case 7: 	operation_ = Operation::SAR;	break;
 				}
 			break;
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 4af89c411..d5afd1ec9 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -352,6 +352,10 @@ enum class AddressSize: uint8_t {
 	b32 = 1,
 };
 
+constexpr DataSize data_size(AddressSize size) {
+	return DataSize(int(size) + 1);
+}
+
 constexpr int byte_size(AddressSize size) {
 	return 2 << int(size);
 }
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 1333761fe..f89bdb0fa 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -105,7 +105,7 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 */
 @implementation x86DecoderTests
 
-- (void)testSequence1 {
+- (void)test16BitSequence {
 	// Sequences the Online Disassembler believes to exist but The 8086 Book does not:
 	//
 	// 0x6a 0x65	push $65
@@ -220,7 +220,7 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	// dec		%bp
 	// jbe		0xffffffcc
 	// inc		%sp
-	test(instructions[34], DataSize::Word, Operation::POP, Source::eAX);
+	test(instructions[34], DataSize::Word, Operation::POP, Source::eAX, Source::eAX);
 	test(instructions[35], DataSize::Word, Operation::DEC, Source::eBP, Source::eBP);
 	test(instructions[36], Operation::JBE, std::nullopt, 0xff80);
 	test(instructions[37], DataSize::Word, Operation::INC, Source::eSP, Source::eSP);
@@ -409,31 +409,50 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	//fiadd  DWORD PTR [ebx-0x64f3e674]
 	test(instructions[16], DataSize::DWord, Operation::PUSH, Source::eAX);
 	test(instructions[17], DataSize::Byte, Operation::DEC, Source::DH);
-	test(instructions[18], Operation::LOOPNE, 0, -18);
+	test(instructions[18], Operation::LOOPNE, 0, -63);
+	test(instructions[19], Operation::ESC);
 
 	//mov    DWORD PTR [ebx],edx
 	//xor    al,0x45
 	//lds    edx,FWORD PTR [ecx]
-
-	// Note to self: disassembly currently diverges at or immediately after this MOV:
 	//mov    ds:0xe4dba6d3,al
+	test(instructions[20], DataSize::DWord, Operation::MOV, Source::eDX, ScaleIndexBase(Source::eBX));
+	test(instructions[21], DataSize::Byte, Operation::XOR, Source::Immediate, Source::eAX, 0x45);
+	test(instructions[22], DataSize::DWord, Operation::LDS, ScaleIndexBase(Source::eCX), Source::eDX);
+	test(instructions[23], DataSize::Byte, Operation::MOV, Source::eAX, Source::DirectAddress, 0xe4dba6d3);
+	XCTAssertEqual(instructions[23].data_segment(), Source::DS);
+
 	//pop    ds
 	//movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
-	//jns    0x00000035
-	//jge    0x00000060
+	//jns    0x00000035 (from 0x42)
+	//jge    0x00000060 (from 0x44)
+	test(instructions[24], DataSize::Word, Operation::POP, Source::DS, Source::DS);
+	test(instructions[25], DataSize::DWord, Operation::MOVS);
+	test(instructions[26], Operation::JNS, 0, -0xd);
+	test(instructions[27], Operation::JNL, 0, 0x1c);
+
 	//mov    eax,0x8a766bda
-	//jns    0x00000073
+	//jns    0x00000073 (from 0x4b)
 	//push   edx
 	//int    0xc4
+	test(instructions[28], DataSize::DWord, Operation::MOV, Source::Immediate, Source::eAX, 0x8a766bda);
+	test(instructions[29], Operation::JNS, 0, 0x28);
+	test(instructions[30], DataSize::DWord, Operation::PUSH, Source::eDX);
+	test(instructions[31], Operation::INT, 0xc4);
+
 	//jmp    0x29cf120d
 	//or     DWORD PTR [esi+0x1a],eax
 	//rcr    BYTE PTR [ebp-0x78],0x34
 	//movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
+	test(instructions[32], Operation::JMPN, 0x29cf120d);
+	test(instructions[33], Operation::OR, DataSize::DWord, Source::eAX, ScaleIndexBase(Source::eSI), 0, 0x1a);
+
 	//and    edx,0xffffffd0
 	//cmc
 	//inc    esp
 	//popf
 	//movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
+	// Note to self: divergance at or just after here.
 	//rcr    DWORD PTR [esi+0x4f],0x7
 	//push   ecx
 	//aam    0xed

From 081a2acd61381a1baa05d5521a62a2717c7111b7 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 9 Mar 2022 09:33:25 -0500
Subject: [PATCH 072/104] Fix shift group operand size.

---
 InstructionSets/x86/Decoder.cpp               |  2 +-
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 20 ++++++++++++++-----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 3a18a11e5..ade1820c0 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -337,7 +337,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				RequiresMin(i80186);
 				ShiftGroup();
 				source_ = Source::Immediate;
-				operand_size_ = operation_size_;
+				operand_size_ = DataSize::Byte;
 			break;
 			case 0xc2: RegData(RETN, None, data_size_);				break;
 			case 0xc3: Complete(RETN, None, None, DataSize::None);	break;
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index f89bdb0fa..6f2698ae9 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -426,7 +426,7 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	//movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
 	//jns    0x00000035 (from 0x42)
 	//jge    0x00000060 (from 0x44)
-	test(instructions[24], DataSize::Word, Operation::POP, Source::DS, Source::DS);
+	test(instructions[24], DataSize::Word, Operation::POP, Source::None, Source::DS);
 	test(instructions[25], DataSize::DWord, Operation::MOVS);
 	test(instructions[26], Operation::JNS, 0, -0xd);
 	test(instructions[27], Operation::JNL, 0, 0x1c);
@@ -440,22 +440,32 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	test(instructions[30], DataSize::DWord, Operation::PUSH, Source::eDX);
 	test(instructions[31], Operation::INT, 0xc4);
 
-	//jmp    0x29cf120d
+	//jmp    0x29cf120d (from 0x53)
 	//or     DWORD PTR [esi+0x1a],eax
 	//rcr    BYTE PTR [ebp-0x78],0x34
 	//movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
-	test(instructions[32], Operation::JMPN, 0x29cf120d);
-	test(instructions[33], Operation::OR, DataSize::DWord, Source::eAX, ScaleIndexBase(Source::eSI), 0, 0x1a);
+	test(instructions[32], Operation::JMPN, 0x29cf120d - 0x53);
+//	XCTAssertEqual(instructions[32].source(), Source::None);
+	test(instructions[33], DataSize::DWord, Operation::OR, Source::eAX, ScaleIndexBase(Source::eSI), 0, 0x1a);
+	test(instructions[34], DataSize::Byte, Operation::RCR, Source::Immediate, ScaleIndexBase(Source::eBP), 0x34, -0x78);
+	test(instructions[35], DataSize::DWord, Operation::MOVS);
 
 	//and    edx,0xffffffd0
 	//cmc
 	//inc    esp
 	//popf
+	test(instructions[36], DataSize::DWord, Operation::AND, Source::Immediate, Source::eDX);
+	test(instructions[37], DataSize::None, Operation::CMC);
+	test(instructions[38], DataSize::DWord, Operation::INC, Source::eSP);
+	test(instructions[39], DataSize::DWord, Operation::POPF);
+
 	//movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
-	// Note to self: divergance at or just after here.
 	//rcr    DWORD PTR [esi+0x4f],0x7
 	//push   ecx
 	//aam    0xed
+	test(instructions[40], DataSize::DWord, Operation::MOVS);
+	test(instructions[41], DataSize::DWord, Operation::RCR, Source::Immediate, ScaleIndexBase(Source::eSI), 0x07, 0x4f);
+
 	//mov    al,0x69
 	//xlat   BYTE PTR ds:[ebx]
 	//add    ch,al

From 67b2e40fae4808caf5e01899cad0038d0f66ef2f Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 9 Mar 2022 10:51:16 -0500
Subject: [PATCH 073/104] Fixed: INs and OUTs remain single byte.

---
 InstructionSets/x86/Decoder.cpp               | 20 +++++++++----------
 .../Mac/Clock SignalTests/x86DecoderTests.mm  |  6 ++++++
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index ade1820c0..607f6bdd2 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -42,13 +42,13 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 /// Handles instructions of the form Ax, jjkk where the latter is implicitly an address.
 #define RegAddr(op, dest, op_size, addr_size)			\
 	SetOpSrcDestSize(op, DirectAddress, dest, op_size);	\
-	operand_size_ = data_size(addr_size);				\
+	operand_size_ = addr_size;							\
 	phase_ = Phase::DisplacementOrOperand
 
 /// Handles instructions of the form jjkk, Ax where the former is implicitly an address.
 #define AddrReg(op, source, op_size, addr_size)				\
 	SetOpSrcDestSize(op, source, DirectAddress, op_size);	\
-	operand_size_ = data_size(addr_size);					\
+	operand_size_ = addr_size;								\
 	destination_ = Source::DirectAddress;					\
 	phase_ = Phase::DisplacementOrOperand
 
@@ -298,10 +298,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0x9e: Complete(SAHF, None, None, DataSize::Byte);	break;
 			case 0x9f: Complete(LAHF, None, None, DataSize::Byte);	break;
 
-			case 0xa0: RegAddr(MOV, eAX, DataSize::Byte, address_size_);	break;
-			case 0xa1: RegAddr(MOV, eAX, data_size_, address_size_);		break;
-			case 0xa2: AddrReg(MOV, eAX, DataSize::Byte, address_size_);	break;
-			case 0xa3: AddrReg(MOV, eAX, data_size_, address_size_);		break;
+			case 0xa0: RegAddr(MOV, eAX, DataSize::Byte, data_size(address_size_));	break;
+			case 0xa1: RegAddr(MOV, eAX, data_size_, data_size(address_size_));		break;
+			case 0xa2: AddrReg(MOV, eAX, DataSize::Byte, data_size(address_size_));	break;
+			case 0xa3: AddrReg(MOV, eAX, data_size_, data_size(address_size_));		break;
 
 			case 0xa4: Complete(MOVS, None, None, DataSize::Byte);	break;
 			case 0xa5: Complete(MOVS, None, None, data_size_);		break;
@@ -391,10 +391,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xe2: Displacement(LOOP, DataSize::Byte);		break;
 			case 0xe3: Displacement(JPCX, DataSize::Byte);		break;
 
-			case 0xe4: RegAddr(IN, eAX, DataSize::Byte, address_size_);		break;
-			case 0xe5: RegAddr(IN, eAX, data_size_, address_size_);			break;
-			case 0xe6: AddrReg(OUT, eAX, DataSize::Byte, address_size_);	break;
-			case 0xe7: AddrReg(OUT, eAX, data_size_, address_size_);		break;
+			case 0xe4: RegAddr(IN, eAX, DataSize::Byte, DataSize::Byte);	break;
+			case 0xe5: RegAddr(IN, eAX, data_size_, DataSize::Byte);		break;
+			case 0xe6: AddrReg(OUT, eAX, DataSize::Byte, DataSize::Byte);	break;
+			case 0xe7: AddrReg(OUT, eAX, data_size_, DataSize::Byte);		break;
 
 			case 0xe8: RegData(CALLD, None, data_size_);	break;
 			case 0xe9: RegData(JMPN, None, data_size_);		break;
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 6f2698ae9..02e45b5e5 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -465,11 +465,17 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	//aam    0xed
 	test(instructions[40], DataSize::DWord, Operation::MOVS);
 	test(instructions[41], DataSize::DWord, Operation::RCR, Source::Immediate, ScaleIndexBase(Source::eSI), 0x07, 0x4f);
+	test(instructions[42], DataSize::DWord, Operation::PUSH, Source::eCX);
+	test(instructions[43], Operation::AAM, 0xed);
 
 	//mov    al,0x69
 	//xlat   BYTE PTR ds:[ebx]
 	//add    ch,al
 	//push   ecx
+	test(instructions[44], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eAX, 0x69);
+
+	test(instructions[47], DataSize::DWord, Operation::PUSH, Source::eCX);
+
 	//sti
 	//push   0x698b3a85
 	//sub    BYTE PTR [esp+ebp*8],cl

From f96c05193234adb2a0bd09270e723ed0cd99a004 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 9 Mar 2022 14:24:57 -0500
Subject: [PATCH 074/104] Record PUSH immediate operation size.

---
 InstructionSets/x86/Decoder.cpp                     | 1 +
 OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 607f6bdd2..1736c0d8b 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -212,6 +212,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0x68:
 				RequiresMin(i80286);
 				Immediate(PUSH, data_size_);
+				operation_size_ = data_size_;
 			break;
 			case 0x69:
 				RequiresMin(i80286);
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 02e45b5e5..0438c0b20 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -473,13 +473,19 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	//add    ch,al
 	//push   ecx
 	test(instructions[44], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eAX, 0x69);
-
+	test(instructions[45], Operation::XLAT);
+	test(instructions[46], DataSize::Byte, Operation::ADD, Source::eAX, Source::CH);
 	test(instructions[47], DataSize::DWord, Operation::PUSH, Source::eCX);
 
 	//sti
 	//push   0x698b3a85
 	//sub    BYTE PTR [esp+ebp*8],cl
 	//mov    cl,0xb7
+	test(instructions[48], Operation::STI);
+	test(instructions[49], DataSize::DWord, Operation::PUSH, Source::Immediate, Source::None, 0x698b3a85);
+	test(instructions[50], DataSize::Byte, Operation::SUB, Source::eCX, ScaleIndexBase(3, Source::eBP, Source::eSP));
+	test(instructions[51], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eCX, 0xb7);
+
 	//cmp    ecx,DWORD PTR [ebp+0x2c87445f]
 	//jecxz  0x00000084
 	//sahf

From acd9df674581faf54879d79fbea61525d5398e28 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 9 Mar 2022 15:23:43 -0500
Subject: [PATCH 075/104] Fix segment/offset sizes for far calls.

---
 InstructionSets/x86/Decoder.cpp               |  4 +--
 InstructionSets/x86/Instruction.hpp           |  2 +-
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 26 ++++++++++++++++---
 3 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 1736c0d8b..bf46d5180 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -77,8 +77,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 #define Far(op)										\
 	operation_ = Operation::op;						\
 	phase_ = Phase::DisplacementOrOperand;			\
-	operand_size_ = data_size_;						\
-	displacement_size_ = DataSize::Word
+	operand_size_ = DataSize::Word;					\
+	displacement_size_ = data_size(address_size_)
 
 /// Handles ENTER — a fixed three-byte operation.
 #define Displacement16Operand8(op)					\
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index d5afd1ec9..7eec36268 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -630,7 +630,7 @@ template<bool is_32bit> class Instruction {
 
 		// TODO: confirm whether far call for some reason makes these 32-bit in protected mode.
 		uint16_t segment() const		{	return uint16_t(operand_);					}
-		uint16_t offset() const			{	return uint16_t(displacement_);				}
+		DisplacementT offset() const	{	return displacement_;						}
 
 		DisplacementT displacement() const	{	return displacement_;					}
 		ImmediateT operand() const			{	return operand_;						}
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 0438c0b20..297aaf3cf 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -52,7 +52,11 @@ template <typename InstructionT> void test(
 	if(displacement) XCTAssertEqual(instruction.displacement(), *displacement);
 }
 
-template <typename InstructionT> void test_far(const InstructionT &instruction, Operation operation, uint16_t segment, uint16_t offset) {
+template <typename InstructionT> void test_far(
+	const InstructionT &instruction,
+	Operation operation,
+	uint16_t segment,
+	typename InstructionT::DisplacementT offset) {
 	XCTAssertEqual(instruction.operation, operation);
 	XCTAssertEqual(instruction.segment(), segment);
 	XCTAssertEqual(instruction.offset(), offset);
@@ -487,17 +491,31 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	test(instructions[51], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eCX, 0xb7);
 
 	//cmp    ecx,DWORD PTR [ebp+0x2c87445f]
-	//jecxz  0x00000084
+	//jecxz  0x00000084	(from 0x82)
 	//sahf
-	//je     0x000000f3
+	//je     0x000000f3	(from 0x85)
+	test(instructions[52], DataSize::DWord, Operation::CMP, ScaleIndexBase(Source::eBP), Source::eCX, 0, 0x2c87445f);
+	test(instructions[53], Operation::JPCX, 0, 0x02);
+	test(instructions[54], Operation::SAHF);
+	test(instructions[55], Operation::JE, 0, 0x6e);
+
 	//sbb    ecx,DWORD PTR [edi+0x433c54d]
 	//lahf
 	//lods   al,BYTE PTR ds:[esi]
 	//ror    cl,0x60
+	test(instructions[56], DataSize::DWord, Operation::SBB, ScaleIndexBase(Source::eDI), Source::eCX, 0, 0x433c54d);
+	test(instructions[57], Operation::LAHF);
+	test(instructions[58], Operation::LODS);
+	test(instructions[59], DataSize::Byte, Operation::ROR, Source::Immediate, Source::eCX, 0x60);
+
 	//call   0xe21b:0x97d0f58a
 	//fs pusha
 	//mov    al,0xcf
-	//jecxz  0x000000d4
+	//jecxz  0x000000d4	(from 0x9d)
+	test_far(instructions[60], Operation::CALLF, 0xe21b, 0x97d0f58a);
+	test(instructions[61], Operation::PUSHA);
+	test(instructions[62], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eAX, 0xcf);
+	test(instructions[63], Operation::JPCX, 0, 0xd4 - 0x9d);
 }
 
 @end

From 9f2d18b7bae82da3ffdc0c3836fb2e97f0e7a8cf Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 9 Mar 2022 15:25:46 -0500
Subject: [PATCH 076/104] Improve comment formatting.

---
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 128 +++++++++---------
 1 file changed, 64 insertions(+), 64 deletions(-)

diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 297aaf3cf..c188c75af 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -365,10 +365,10 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 
 	XCTAssertEqual(instructions.size(), 64);
 
-	//cs inc edx
-	//or     al,0x9
-	//add    DWORD PTR [edi-0x42],0x9f683aa9
-	//lock jp 0xfffffff0	(from 0000000e)
+	// cs inc edx
+	// or     al,0x9
+	// add    DWORD PTR [edi-0x42],0x9f683aa9
+	// lock jp 0xfffffff0	(from 0000000e)
 	test(instructions[0], DataSize::DWord, Operation::INC, Source::eDX);
 	XCTAssertEqual(instructions[0].data_segment(), Source::CS);
 	test(instructions[1], DataSize::Byte, Operation::OR, Source::Immediate, Source::eAX, 0x9);
@@ -376,29 +376,29 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	test(instructions[3], Operation::JP, 0, -30);
 	XCTAssert(instructions[3].lock());
 
-	//ds mov ah,0xc1
-	//pop    ds
-	//stos   BYTE PTR es:[edi],al
-	//pusha
+	// ds mov ah,0xc1
+	// pop    ds
+	// stos   BYTE PTR es:[edi],al
+	// pusha
 	test(instructions[4], DataSize::Byte, Operation::MOV, Source::Immediate, Source::AH, 0xc1);
 	XCTAssertEqual(instructions[4].data_segment(), Source::DS);
 	test(instructions[5], DataSize::Word, Operation::POP, Source::None, Source::DS);
 	test(instructions[6], DataSize::Byte, Operation::STOS);
 	test(instructions[7], Operation::PUSHA);
 
-	//mov    ah,0xe1
-	//xchg   ecx,eax
-	//fdivr  st(6),st
-	//bound  edx,QWORD PTR [eax-0x6322070]
+	// mov    ah,0xe1
+	// xchg   ecx,eax
+	// fdivr  st(6),st
+	// bound  edx,QWORD PTR [eax-0x6322070]
 	test(instructions[8], DataSize::Byte, Operation::MOV, Source::Immediate, Source::AH, 0xe1);
 	test(instructions[9], DataSize::DWord, Operation::XCHG, Source::eAX, Source::eCX);
 	test(instructions[10], DataSize::None, Operation::ESC);
 	test(instructions[11], DataSize::DWord, Operation::BOUND, ScaleIndexBase(Source::eAX), Source::eDX, 0, -0x6322070);
 
-	//btc    DWORD PTR [ecx+0x4b],esi
-	//pop    eax
-	//push   ebp
-	//cmp    BYTE PTR [ecx+edi*8],ch
+	// btc    DWORD PTR [ecx+0x4b],esi
+	// pop    eax
+	// push   ebp
+	// cmp    BYTE PTR [ecx+edi*8],ch
 	test(instructions[12], DataSize::DWord, Operation::BTC, Source::eSI, ScaleIndexBase(Source::eCX), 0, 0x4b);
 	test(instructions[13], DataSize::DWord, Operation::POP, Source::eAX, Source::eAX);
 	test(instructions[14], DataSize::DWord, Operation::PUSH, Source::eBP);
@@ -407,111 +407,111 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	// Possibly TODO: pick a lane on whether PUSH/POP duplicate source and destination.
 	// It doesn't really matter outside of these tests though.
 
-	//push   eax
-	//dec    dh
-	//loopne 0xffffffee (from 0x2d)
-	//fiadd  DWORD PTR [ebx-0x64f3e674]
+	// push   eax
+	// dec    dh
+	// loopne 0xffffffee (from 0x2d)
+	// fiadd  DWORD PTR [ebx-0x64f3e674]
 	test(instructions[16], DataSize::DWord, Operation::PUSH, Source::eAX);
 	test(instructions[17], DataSize::Byte, Operation::DEC, Source::DH);
 	test(instructions[18], Operation::LOOPNE, 0, -63);
 	test(instructions[19], Operation::ESC);
 
-	//mov    DWORD PTR [ebx],edx
-	//xor    al,0x45
-	//lds    edx,FWORD PTR [ecx]
-	//mov    ds:0xe4dba6d3,al
+	// mov    DWORD PTR [ebx],edx
+	// xor    al,0x45
+	// lds    edx,FWORD PTR [ecx]
+	// mov    ds:0xe4dba6d3,al
 	test(instructions[20], DataSize::DWord, Operation::MOV, Source::eDX, ScaleIndexBase(Source::eBX));
 	test(instructions[21], DataSize::Byte, Operation::XOR, Source::Immediate, Source::eAX, 0x45);
 	test(instructions[22], DataSize::DWord, Operation::LDS, ScaleIndexBase(Source::eCX), Source::eDX);
 	test(instructions[23], DataSize::Byte, Operation::MOV, Source::eAX, Source::DirectAddress, 0xe4dba6d3);
 	XCTAssertEqual(instructions[23].data_segment(), Source::DS);
 
-	//pop    ds
-	//movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
-	//jns    0x00000035 (from 0x42)
-	//jge    0x00000060 (from 0x44)
+	// pop    ds
+	// movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
+	// jns    0x00000035	(from 0x42)
+	// jge    0x00000060	(from 0x44)
 	test(instructions[24], DataSize::Word, Operation::POP, Source::None, Source::DS);
 	test(instructions[25], DataSize::DWord, Operation::MOVS);
 	test(instructions[26], Operation::JNS, 0, -0xd);
 	test(instructions[27], Operation::JNL, 0, 0x1c);
 
-	//mov    eax,0x8a766bda
-	//jns    0x00000073 (from 0x4b)
-	//push   edx
-	//int    0xc4
+	// mov    eax,0x8a766bda
+	// jns    0x00000073	(from 0x4b)
+	// push   edx
+	// int    0xc4
 	test(instructions[28], DataSize::DWord, Operation::MOV, Source::Immediate, Source::eAX, 0x8a766bda);
 	test(instructions[29], Operation::JNS, 0, 0x28);
 	test(instructions[30], DataSize::DWord, Operation::PUSH, Source::eDX);
 	test(instructions[31], Operation::INT, 0xc4);
 
-	//jmp    0x29cf120d (from 0x53)
-	//or     DWORD PTR [esi+0x1a],eax
-	//rcr    BYTE PTR [ebp-0x78],0x34
-	//movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
+	// jmp    0x29cf120d	(from 0x53)
+	// or     DWORD PTR [esi+0x1a],eax
+	// rcr    BYTE PTR [ebp-0x78],0x34
+	// movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
 	test(instructions[32], Operation::JMPN, 0x29cf120d - 0x53);
 //	XCTAssertEqual(instructions[32].source(), Source::None);
 	test(instructions[33], DataSize::DWord, Operation::OR, Source::eAX, ScaleIndexBase(Source::eSI), 0, 0x1a);
 	test(instructions[34], DataSize::Byte, Operation::RCR, Source::Immediate, ScaleIndexBase(Source::eBP), 0x34, -0x78);
 	test(instructions[35], DataSize::DWord, Operation::MOVS);
 
-	//and    edx,0xffffffd0
-	//cmc
-	//inc    esp
-	//popf
+	// and    edx,0xffffffd0
+	// cmc
+	// inc    esp
+	// popf
 	test(instructions[36], DataSize::DWord, Operation::AND, Source::Immediate, Source::eDX);
 	test(instructions[37], DataSize::None, Operation::CMC);
 	test(instructions[38], DataSize::DWord, Operation::INC, Source::eSP);
 	test(instructions[39], DataSize::DWord, Operation::POPF);
 
-	//movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
-	//rcr    DWORD PTR [esi+0x4f],0x7
-	//push   ecx
-	//aam    0xed
+	// movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
+	// rcr    DWORD PTR [esi+0x4f],0x7
+	// push   ecx
+	// aam    0xed
 	test(instructions[40], DataSize::DWord, Operation::MOVS);
 	test(instructions[41], DataSize::DWord, Operation::RCR, Source::Immediate, ScaleIndexBase(Source::eSI), 0x07, 0x4f);
 	test(instructions[42], DataSize::DWord, Operation::PUSH, Source::eCX);
 	test(instructions[43], Operation::AAM, 0xed);
 
-	//mov    al,0x69
-	//xlat   BYTE PTR ds:[ebx]
-	//add    ch,al
-	//push   ecx
+	// mov    al,0x69
+	// xlat   BYTE PTR ds:[ebx]
+	// add    ch,al
+	// push   ecx
 	test(instructions[44], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eAX, 0x69);
 	test(instructions[45], Operation::XLAT);
 	test(instructions[46], DataSize::Byte, Operation::ADD, Source::eAX, Source::CH);
 	test(instructions[47], DataSize::DWord, Operation::PUSH, Source::eCX);
 
-	//sti
-	//push   0x698b3a85
-	//sub    BYTE PTR [esp+ebp*8],cl
-	//mov    cl,0xb7
+	// sti
+	// push   0x698b3a85
+	// sub    BYTE PTR [esp+ebp*8],cl
+	// mov    cl,0xb7
 	test(instructions[48], Operation::STI);
 	test(instructions[49], DataSize::DWord, Operation::PUSH, Source::Immediate, Source::None, 0x698b3a85);
 	test(instructions[50], DataSize::Byte, Operation::SUB, Source::eCX, ScaleIndexBase(3, Source::eBP, Source::eSP));
 	test(instructions[51], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eCX, 0xb7);
 
-	//cmp    ecx,DWORD PTR [ebp+0x2c87445f]
-	//jecxz  0x00000084	(from 0x82)
-	//sahf
-	//je     0x000000f3	(from 0x85)
+	// cmp    ecx,DWORD PTR [ebp+0x2c87445f]
+	// jecxz  0x00000084	(from 0x82)
+	// sahf
+	// je     0x000000f3	(from 0x85)
 	test(instructions[52], DataSize::DWord, Operation::CMP, ScaleIndexBase(Source::eBP), Source::eCX, 0, 0x2c87445f);
 	test(instructions[53], Operation::JPCX, 0, 0x02);
 	test(instructions[54], Operation::SAHF);
 	test(instructions[55], Operation::JE, 0, 0x6e);
 
-	//sbb    ecx,DWORD PTR [edi+0x433c54d]
-	//lahf
-	//lods   al,BYTE PTR ds:[esi]
-	//ror    cl,0x60
+	// sbb    ecx,DWORD PTR [edi+0x433c54d]
+	// lahf
+	// lods   al,BYTE PTR ds:[esi]
+	// ror    cl,0x60
 	test(instructions[56], DataSize::DWord, Operation::SBB, ScaleIndexBase(Source::eDI), Source::eCX, 0, 0x433c54d);
 	test(instructions[57], Operation::LAHF);
 	test(instructions[58], Operation::LODS);
 	test(instructions[59], DataSize::Byte, Operation::ROR, Source::Immediate, Source::eCX, 0x60);
 
-	//call   0xe21b:0x97d0f58a
-	//fs pusha
-	//mov    al,0xcf
-	//jecxz  0x000000d4	(from 0x9d)
+	// call   0xe21b:0x97d0f58a
+	// fs pusha
+	// mov    al,0xcf
+	// jecxz  0x000000d4	(from 0x9d)
 	test_far(instructions[60], Operation::CALLF, 0xe21b, 0x97d0f58a);
 	test(instructions[61], Operation::PUSHA);
 	test(instructions[62], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eAX, 0xcf);

From ead8b7437e142a5268bbdfe818d047e51a132627 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 9 Mar 2022 15:26:20 -0500
Subject: [PATCH 077/104] Remove done TODO.

---
 InstructionSets/x86/Instruction.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 7eec36268..33c99dcb7 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -628,7 +628,6 @@ template<bool is_32bit> class Instruction {
 		Repetition repetition() const	{	return Repetition(repetition_size_ & 3);	}
 		DataSize operation_size() const {	return DataSize(repetition_size_ >> 2);		}
 
-		// TODO: confirm whether far call for some reason makes these 32-bit in protected mode.
 		uint16_t segment() const		{	return uint16_t(operand_);					}
 		DisplacementT offset() const	{	return displacement_;						}
 

From 381fd5dbe480ae8f3482e9056b0626ed5eb46bcf Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 9 Mar 2022 16:37:07 -0500
Subject: [PATCH 078/104] E8 is a relative call.

---
 InstructionSets/x86/Documentation/80386 opcode map.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InstructionSets/x86/Documentation/80386 opcode map.html b/InstructionSets/x86/Documentation/80386 opcode map.html
index ce5c62204..cbf824478 100644
--- a/InstructionSets/x86/Documentation/80386 opcode map.html	
+++ b/InstructionSets/x86/Documentation/80386 opcode map.html	
@@ -532,7 +532,7 @@
 				<td rowspan=2>JCXZ Jb</td>
 				<td colspan=2>IN</td>
 				<td colspan=2>OUT</td>
-				<td rowspan=2>CALL Av</td>
+				<td rowspan=2>CALL Jv</td>
 				<td colspan=3>JMP</td>
 				<td colspan=2>IN</td>
 				<td colspan=2>OUT</td>

From bbf925a27ecec6fd147f2b0e050134e064c1974b Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 9 Mar 2022 16:48:06 -0500
Subject: [PATCH 079/104] Clarify, unify and correct decoding and encoding of
 [CALL/RET/JMP][near/far/relative/absolute].

---
 InstructionSets/x86/Decoder.cpp               | 46 ++++++++-----------
 InstructionSets/x86/Instruction.hpp           | 18 ++++----
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 41 +++++++++++++----
 3 files changed, 61 insertions(+), 44 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index bf46d5180..8a376b8db 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -292,7 +292,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 
 			case 0x98: Complete(CBW, eAX, AH, DataSize::Byte);		break;
 			case 0x99: Complete(CWD, eAX, eDX, data_size_);			break;
-			case 0x9a: Far(CALLF);									break;
+			case 0x9a: Far(CALLfar);								break;
 			case 0x9b: Complete(WAIT, None, None, DataSize::None);	break;
 			case 0x9c: Complete(PUSHF, None, None, data_size_);		break;
 			case 0x9d: Complete(POPF, None, None, data_size_);		break;
@@ -340,12 +340,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				source_ = Source::Immediate;
 				operand_size_ = DataSize::Byte;
 			break;
-			case 0xc2: RegData(RETN, None, data_size_);				break;
-			case 0xc3: Complete(RETN, None, None, DataSize::None);	break;
-			case 0xc4: MemRegReg(LES, Reg_MemReg, data_size_);		break;
-			case 0xc5: MemRegReg(LDS, Reg_MemReg, data_size_);		break;
-			case 0xc6: MemRegReg(MOV, MemRegMOV, DataSize::Byte);	break;
-			case 0xc7: MemRegReg(MOV, MemRegMOV, data_size_);		break;
+			case 0xc2: RegData(RETnear, None, data_size_);				break;
+			case 0xc3: Complete(RETnear, None, None, DataSize::None);	break;
+			case 0xc4: MemRegReg(LES, Reg_MemReg, data_size_);			break;
+			case 0xc5: MemRegReg(LDS, Reg_MemReg, data_size_);			break;
+			case 0xc6: MemRegReg(MOV, MemRegMOV, DataSize::Byte);		break;
+			case 0xc7: MemRegReg(MOV, MemRegMOV, data_size_);			break;
 
 			case 0xc8:
 				RequiresMin(i80186);
@@ -356,8 +356,8 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				Complete(LEAVE, None, None, DataSize::None);
 			break;
 
-			case 0xca: RegData(RETF, None, data_size_);				break;
-			case 0xcb: Complete(RETF, None, None, DataSize::DWord);	break;
+			case 0xca: RegData(RETfar, None, data_size_);				break;
+			case 0xcb: Complete(RETfar, None, None, DataSize::DWord);	break;
 
 			case 0xcc: Complete(INT3, None, None, DataSize::None);	break;
 			case 0xcd: RegData(INT, None, DataSize::Byte);			break;
@@ -397,10 +397,10 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xe6: AddrReg(OUT, eAX, DataSize::Byte, DataSize::Byte);	break;
 			case 0xe7: AddrReg(OUT, eAX, data_size_, DataSize::Byte);		break;
 
-			case 0xe8: RegData(CALLD, None, data_size_);	break;
-			case 0xe9: RegData(JMPN, None, data_size_);		break;
-			case 0xea: Far(JMPF);							break;
-			case 0xeb: Displacement(JMPN, DataSize::Byte);	break;
+			case 0xe8: Displacement(CALLrel, data_size_);		break;
+			case 0xe9: Displacement(JMPrel, data_size_);		break;
+			case 0xea: Far(JMPfar);								break;
+			case 0xeb: Displacement(JMPrel, DataSize::Byte);	break;
 
 			case 0xec: Complete(IN, eDX, eAX, DataSize::Byte);	break;
 			case 0xed: Complete(IN, eDX, eAX, data_size_);		break;
@@ -739,20 +739,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				switch(reg) {
 					default: 	undefined();
 
-					case 0:		operation_ = Operation::INC;	break;
-					case 1:		operation_ = Operation::DEC;	break;
-					case 2:		operation_ = Operation::CALLN;	break;
-					case 3:
-						operation_ = Operation::CALLF;
-						operand_size_ = DataSize::DWord;
-						source_ = Source::Immediate;
-					break;
-					case 4:		operation_ = Operation::JMPN;	break;
-					case 5:
-						operation_ = Operation::JMPF;
-						operand_size_ = DataSize::DWord;
-						source_ = Source::Immediate;
-					break;
+					case 0:	operation_ = Operation::INC;		break;
+					case 1:	operation_ = Operation::DEC;		break;
+					case 2:	operation_ = Operation::CALLabs;	break;
+					case 3:	operation_ = Operation::CALLfar;	break;
+					case 4:	operation_ = Operation::JMPabs;		break;
+					case 5:	operation_ = Operation::JMPfar;		break;
 					case 6:	operation_ = Operation::PUSH;		break;
 				}
 			break;
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 33c99dcb7..6bac6ca5e 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -87,21 +87,23 @@ enum class Operation: uint8_t {
 	JS, JNS,	JP, JNP,	JL, JNL,	JLE, JNLE,
 
 	/// Far call; see the segment() and offset() fields.
-	CALLF,
+	CALLfar,
 	/// Displacement call; followed by a 16-bit operand providing a call offset.
-	CALLD,
+	CALLrel,
 	/// Near call.
-	CALLN,
+	CALLabs,
 	/// Return from interrupt.
 	IRET,
 	/// Near return; if source is not ::None then it will be an ::Immediate indicating how many additional bytes to remove from the stack.
-	RETF,
+	RETfar,
 	/// Far return; if source is not ::None then it will be an ::Immediate indicating how many additional bytes to remove from the stack.
-	RETN,
-	/// Near jump; if an operand is not ::None then it gives an absolute destination; otherwise see the displacement.
-	JMPN,
+	RETnear,
+	/// Near jump with an absolute destination.
+	JMPabs,
+	/// Near jump with a relative destination.
+	JMPrel,
 	/// Far jump to the indicated segment and offset.
-	JMPF,
+	JMPfar,
 	/// Relative jump performed only if CX = 0; see the displacement.
 	JPCX,
 	/// Generates a software interrupt of the level stated in the operand.
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index c188c75af..fa1f1697e 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -146,10 +146,10 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	// [[ omitted: gs insw (%dx),%es:(%di) ]]
 	// jnp		0xffffffaf
 	// ret		$0x4265
-	test(instructions[4], Operation::RETN);
-	test(instructions[5], Operation::RETF, 0x4826);
+	test(instructions[4], Operation::RETnear);
+	test(instructions[5], Operation::RETfar, 0x4826);
 	test(instructions[6], Operation::JNP, std::nullopt, 0xff9f);
-	test(instructions[7], Operation::RETN, 0x4265);
+	test(instructions[7], Operation::RETnear, 0x4265);
 
 	// dec		%si
 	// out		%ax,(%dx)
@@ -215,7 +215,7 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	// fwait
 	// out		%al,$0xd3
 	test(instructions[30], DataSize::Word, Operation::XCHG, Source::eAX, Source::eDI);
-	test(instructions[31], Operation::RETN);
+	test(instructions[31], Operation::RETnear);
 	test(instructions[32], Operation::WAIT);
 	test(instructions[33], DataSize::Byte, Operation::OUT, Source::eAX, Source::DirectAddress, 0xd3);
 
@@ -261,7 +261,7 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	// dec		%dx
 	// mov		$0x9e,%al
 	// stc
-	test(instructions[50], Operation::CALLD, uint16_t(0x16c8));
+	test(instructions[50], Operation::CALLrel, 0, 0x16c8);
 	test(instructions[51], DataSize::Word, Operation::DEC, Source::eDX, Source::eDX);
 	test(instructions[52], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eAX, 0x9e);
 	test(instructions[53], Operation::STC);
@@ -307,7 +307,7 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	});
 
 	XCTAssertEqual(instructions.size(), 1);
-	test_far(instructions[0], Operation::CALLF, 0x7856, 0x3412);
+	test_far(instructions[0], Operation::CALLfar, 0x7856, 0x3412);
 }
 
 - (void)testLDSLESEtc {
@@ -349,6 +349,30 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	XCTAssertEqual(instructions[1].address_size(), AddressSize::b16);
 }
 
+- (void)testJMP {
+	decltype(decode<Model::i80386>({0x00})) instructions;
+
+	instructions = decode<Model::i80386>({
+		// JMP +0x00efcdab
+		0xe9, 0xab, 0xcd, 0xef, 0x00,
+		// JMP 0xc389:0x67452301
+		0xea, 0x01, 0x23, 0x45, 0x67, 0x89, 0xc3,
+		// JMP -79
+		0xeb, 0xb1,
+		// JMP DWORD (edx)
+		0xff, 0x22,
+		// JMP FWORD (eax)
+		0xff, 0x28,
+	}, true);
+
+	XCTAssertEqual(instructions.size(), 5);
+	test(instructions[0], Operation::JMPrel, 0, 0xefcdab);
+	test_far(instructions[1], Operation::JMPfar, 0xc389, 0x67452301);
+	test(instructions[2], Operation::JMPrel, 0, -79);
+	test(instructions[3], DataSize::DWord, Operation::JMPabs, ScaleIndexBase(Source::eDX));
+	test(instructions[4], DataSize::DWord, Operation::JMPfar, ScaleIndexBase(Source::eAX));
+}
+
 - (void)test32bitSequence {
 	const auto instructions = decode<Model::i80386>({
 		0x2e, 0x42, 0x0c, 0x09, 0x81, 0x47, 0xbe, 0xa9, 0x3a, 0x68, 0x9f, 0xf0, 0x7a, 0xe2, 0x3e, 0xb4,
@@ -448,8 +472,7 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	// or     DWORD PTR [esi+0x1a],eax
 	// rcr    BYTE PTR [ebp-0x78],0x34
 	// movs   DWORD PTR es:[edi],DWORD PTR ds:[esi]
-	test(instructions[32], Operation::JMPN, 0x29cf120d - 0x53);
-//	XCTAssertEqual(instructions[32].source(), Source::None);
+	test(instructions[32], Operation::JMPrel, 0, 0x29cf120d - 0x53);
 	test(instructions[33], DataSize::DWord, Operation::OR, Source::eAX, ScaleIndexBase(Source::eSI), 0, 0x1a);
 	test(instructions[34], DataSize::Byte, Operation::RCR, Source::Immediate, ScaleIndexBase(Source::eBP), 0x34, -0x78);
 	test(instructions[35], DataSize::DWord, Operation::MOVS);
@@ -512,7 +535,7 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	// fs pusha
 	// mov    al,0xcf
 	// jecxz  0x000000d4	(from 0x9d)
-	test_far(instructions[60], Operation::CALLF, 0xe21b, 0x97d0f58a);
+	test_far(instructions[60], Operation::CALLfar, 0xe21b, 0x97d0f58a);
 	test(instructions[61], Operation::PUSHA);
 	test(instructions[62], DataSize::Byte, Operation::MOV, Source::Immediate, Source::eAX, 0xcf);
 	test(instructions[63], Operation::JPCX, 0, 0xd4 - 0x9d);

From c1cc4f96df07f43ccffb5f75620fc156f7e3b4c3 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 9 Mar 2022 16:56:32 -0500
Subject: [PATCH 080/104] Switch to `const auto`.

---
 OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index fa1f1697e..d5778bd65 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -350,9 +350,7 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 }
 
 - (void)testJMP {
-	decltype(decode<Model::i80386>({0x00})) instructions;
-
-	instructions = decode<Model::i80386>({
+	const auto instructions = decode<Model::i80386>({
 		// JMP +0x00efcdab
 		0xe9, 0xab, 0xcd, 0xef, 0x00,
 		// JMP 0xc389:0x67452301

From 520baa6ec8187ebd69fd59ef44b00670a919df77 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 9 Mar 2022 20:19:40 -0500
Subject: [PATCH 081/104] Formalise `IndirectNoBase` and permit a knowledgable
 caller to avoid conditionals.

---
 InstructionSets/x86/Instruction.hpp | 67 ++++++++++++++++++++++-------
 1 file changed, 51 insertions(+), 16 deletions(-)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 6bac6ca5e..d29c89656 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -201,15 +201,19 @@ enum class Operation: uint8_t {
 	/// Load AL with DS:[AL+BX].
 	XLAT,
 
-	// TODO: expand detail on all operations below.
-
 	//
 	// 80186 additions.
 	//
 
-	/// Checks an array index against bounds.
+	/// Checks whether the signed value in the destination register is within the bounds
+	/// stored at the location indicated by the source register, which will point to two
+	/// 16- or 32-bit words, the first being a signed lower bound and the signed upper.
+	/// Raises a bounds exception if not.
 	BOUND,
 
+
+	// TODO: expand detail on all operations below.
+
 	/// Create stack frame.
 	ENTER,
 	/// Procedure exit; copies BP to SP, then pops a new BP from the stack.
@@ -326,9 +330,15 @@ enum class Operation: uint8_t {
 	/// Two-operand form of IMUL; multiply the source by the destination and write to the destination.
 	IMUL_2,
 
+	// Various conditional sets; each sets the byte at the location given by the operand
+	// to $ff if the condition is met; $00 otherwise.
 	SETO, SETNO,	SETB, SETNB,	SETZ, SETNZ,	SETBE, SETNBE,
 	SETS, SETNS,	SETP, SETNP,	SETL, SETNL,	SETLE, SETNLE,
 
+	// Various special-case moves (i.e. those where it is impractical to extend the
+	// Source enum, so the requirement for special handling is loaded into the operation).
+	// In all cases the Cx, Dx and Tx Source aliases can be used to reinterpret the relevant
+	// source or destination.
 	MOVtoCr, MOVfromCr,
 	MOVtoDr, MOVfromDr,
 	MOVtoTr, MOVfromTr,
@@ -392,6 +402,11 @@ enum class Source: uint8_t {
 	eSI = eSIorDH,	DH = eSIorDH,
 	eDI = eDIorBH,	BH = eDIorBH,
 
+	// Aliases for control, test and debug registers.
+	C0 = 0, C1 = 1, C2 = 2, C3 = 3, C4 = 4, C5 = 5, C6 = 6, C7 = 7,
+	T0 = 0, T1 = 1, T2 = 2, T3 = 3, T4 = 4, T5 = 5, T6 = 6, T7 = 7,
+	D0 = 0, D1 = 1, D2 = 2, D3 = 3, D4 = 4, D5 = 5, D6 = 6, D7 = 7,
+
 	// Selectors.
 	ES, CS, SS, DS, FS, GS,
 
@@ -403,8 +418,6 @@ enum class Source: uint8_t {
 
 	/// The address included within this instruction should be used as the source.
 	DirectAddress,
-	// TODO: is this better eliminated in favour of an indirect
-	// source with a base() and index() of 0?
 
 	/// The immediate value included within this instruction should be used as the source.
 	Immediate,
@@ -414,8 +427,12 @@ enum class Source: uint8_t {
 	// Elsewhere, as an implementation detail, the low three bits of an indirect source
 	// are reused; (Indirect-1) is also used as a sentinel value but is not a valid member
 	// of the enum and isn't exposed externally.
+
+	/// The ScaleIndexBase associated with this source should be used, but
+	/// its base should be ignored (and is guaranteed to be zero if the default
+	/// getter is used).
+	IndirectNoBase = Indirect - 1,
 };
-constexpr Source SourceIndirectNoBase = Source(uint8_t(Source::Indirect) - 1);
 
 enum class Repetition: uint8_t {
 	None, RepE, RepNE
@@ -436,7 +453,12 @@ class ScaleIndexBase {
 	public:
 		constexpr ScaleIndexBase() noexcept {}
 		constexpr ScaleIndexBase(uint8_t sib) noexcept : sib_(sib) {}
-		constexpr ScaleIndexBase(int scale, Source index, Source base) noexcept : sib_(uint8_t(scale << 6 | (int(index != Source::None ? index : Source::eSI) << 3) | int(base))) {}
+		constexpr ScaleIndexBase(int scale, Source index, Source base) noexcept :
+			sib_(uint8_t(
+				scale << 6 |
+				(int(index != Source::None ? index : Source::eSI) << 3) |
+				int(base)
+			)) {}
 		constexpr ScaleIndexBase(Source index, Source base) noexcept : ScaleIndexBase(0, index, base) {}
 		constexpr explicit ScaleIndexBase(Source base) noexcept : ScaleIndexBase(0, Source::None, base) {}
 
@@ -459,6 +481,10 @@ class ScaleIndexBase {
 			return Source(sib_ & 0x7);
 		}
 
+		constexpr uint8_t without_base() const {
+			return sib_ & ~0x3;
+		}
+
 		bool operator ==(const ScaleIndexBase &rhs) const {
 			// Permit either exact equality or index and base being equal
 			// but transposed with a scale of 1.
@@ -501,20 +527,26 @@ class DataPointer {
 		constexpr DataPointer(Source source, ScaleIndexBase sib) noexcept : source_(source), sib_(sib) {}
 
 		/// Constructs an indirect DataPointer referencing the given base, index and scale.
+		/// Automatically maps Source::Indirect to Source::IndirectNoBase if base is Source::None.
 		constexpr DataPointer(Source base, Source index, int scale) noexcept :
-			source_(base != Source::None ? Source::Indirect : SourceIndirectNoBase),
+			source_(base != Source::None ? Source::Indirect : Source::IndirectNoBase),
 			sib_(scale, index, base) {}
 
 		constexpr bool operator ==(const DataPointer &rhs) const {
 			// Require a SIB match only if source_ is ::Indirect or ::IndirectNoBase.
-			return source_ == rhs.source_ && (source_ < SourceIndirectNoBase || sib_ == rhs.sib_);
+			return
+				source_ == rhs.source_ && (
+					source_ < Source::IndirectNoBase ||
+					(source_ == Source::Indirect && sib_ == rhs.sib_) ||
+					(source_ == Source::IndirectNoBase && sib_.without_base() == rhs.sib_.without_base())
+				);
 		}
 
-		// TODO: determine whether conditionals below
-		// have introduced branching.
-
-		constexpr Source source() const {
-			return (source_ >= SourceIndirectNoBase) ? Source::Indirect : source_;
+		template <bool obscure_indirectNoBase = false> constexpr Source source() const {
+			if constexpr (obscure_indirectNoBase) {
+				return (source_ >= Source::IndirectNoBase) ? Source::Indirect : source_;
+			}
+			return source_;
 		}
 
 		constexpr int scale() const {
@@ -525,8 +557,11 @@ class DataPointer {
 			return sib_.index();
 		}
 
-		constexpr Source base() const {
-			return (source_ <= SourceIndirectNoBase) ? Source::None : sib_.base();
+		template <bool obscure_indirectNoBase = false> constexpr Source base() const {
+			if constexpr (obscure_indirectNoBase) {
+				return (source_ <= Source::IndirectNoBase) ? Source::None : sib_.base();
+			}
+			return sib_.base();
 		}
 
 	private:

From cf6a910630f8a244353a01409f83b6908e2467c8 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Wed, 9 Mar 2022 20:20:32 -0500
Subject: [PATCH 082/104] Handle no-base case directly in existing switch.

---
 InstructionSets/x86/DataPointerResolver.hpp | 52 +++++++++++++--------
 1 file changed, 33 insertions(+), 19 deletions(-)

diff --git a/InstructionSets/x86/DataPointerResolver.hpp b/InstructionSets/x86/DataPointerResolver.hpp
index c6754ee08..2a2197a25 100644
--- a/InstructionSets/x86/DataPointerResolver.hpp
+++ b/InstructionSets/x86/DataPointerResolver.hpp
@@ -152,6 +152,7 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
 
 		/// Computes the effective address of @c pointer including any displacement applied by @c instruction.
 		/// @c pointer must be of type Source::Indirect.
+		template <bool obscured_indirectNoBase = true, bool has_base = true>
 		static uint32_t effective_address(
 			RegistersT &registers,
 			const Instruction<is_32bit(model)> &instruction,
@@ -211,6 +212,7 @@ template <typename DataT> void DataPointerResolver<model, RegistersT, MemoryT>::
 						rw(v, FS, i);		rw(v, GS, i);
 
 template <Model model, typename RegistersT, typename MemoryT>
+template <bool obscured_indirectNoBase, bool has_base>
 uint32_t DataPointerResolver<model, RegistersT, MemoryT>::effective_address(
 	RegistersT &registers,
 	const Instruction<is_32bit(model)> &instruction,
@@ -218,9 +220,11 @@ uint32_t DataPointerResolver<model, RegistersT, MemoryT>::effective_address(
 		using AddressT = typename Instruction<is_32bit(model)>::AddressT;
 		AddressT base = 0, index = 0;
 
-		switch(pointer.base()) {
-			default: break;
-			ALLREGS(base, false);
+		if constexpr (has_base) {
+			switch(pointer.base<obscured_indirectNoBase>()) {
+				default: break;
+				ALLREGS(base, false);
+			}
 		}
 
 		switch(pointer.index()) {
@@ -257,7 +261,7 @@ template <bool is_write, typename DataT> void DataPointerResolver<model, Registe
 	const Instruction<is_32bit(model)> &instruction,
 	DataPointer pointer,
 	DataT &value) {
-		const Source source = pointer.source();
+		const Source source = pointer.source<false>();
 
 		switch(source) {
 			default:
@@ -279,22 +283,32 @@ template <bool is_write, typename DataT> void DataPointerResolver<model, Registe
 				value = DataT(instruction.operand());
 			break;
 
-			case Source::Indirect: {
-				const auto address = effective_address(registers, instruction, pointer);
+#define indirect(has_base)	{								\
+	const auto address = effective_address<false, has_base>	\
+		(registers, instruction, pointer);					\
+															\
+	if constexpr (is_write) {								\
+		memory.template write(								\
+			instruction.data_segment(),						\
+			address,										\
+			value											\
+		);													\
+	} else {												\
+		value = memory.template read<DataT>(				\
+			instruction.data_segment(),						\
+			address											\
+		);													\
+	}														\
+}
+			case Source::IndirectNoBase:
+				indirect(false);
+			break;
+
+			case Source::Indirect:
+				indirect(true);
+			break;
+#undef indirect
 
-				if constexpr (is_write) {
-					memory.template write(
-						instruction.data_segment(),
-						address,
-						value
-					);
-				} else {
-					value = memory.template read<DataT>(
-						instruction.data_segment(),
-						address
-					);
-				}
-			}
 		}
 	}
 #undef ALLREGS

From 6dc99737543c44ed1e1488f8d78a65f5073cc633 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Thu, 10 Mar 2022 07:12:12 -0500
Subject: [PATCH 083/104] Incorporate length into `Instruction`.

---
 InstructionSets/x86/Decoder.cpp     |  4 +++-
 InstructionSets/x86/Instruction.hpp | 12 +++++++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 8a376b8db..aff9e5fb0 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -908,7 +908,9 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				repetition_,
 				DataSize(operation_size_),
 				static_cast<typename InstructionT::DisplacementT>(displacement_),
-				static_cast<typename InstructionT::ImmediateT>(operand_))
+				static_cast<typename InstructionT::ImmediateT>(operand_),
+				consumed_
+			)
 		);
 		reset_parsing();
 		return result;
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index d29c89656..7e6f4bfe8 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -579,7 +579,8 @@ template<bool is_32bit> class Instruction {
 				sources_ == rhs.sources_ &&
 				displacement_ == rhs.displacement_ &&
 				operand_ == rhs.operand_ &&
-				sib_ == rhs.sib_;
+				sib_ == rhs.sib_ &&
+				length_ == rhs.length_;
 		}
 
 		using DisplacementT = typename std::conditional<is_32bit, int32_t, int16_t>::type;
@@ -637,6 +638,7 @@ template<bool is_32bit> class Instruction {
 		// Fields yet to be properly incorporated...
 		ScaleIndexBase sib_;
 		AddressSize address_size_ = AddressSize::b16;
+		int length_ = 0;
 
 	public:
 		/// @returns The number of bytes used for meaningful content within this class. A receiver must use at least @c sizeof(Instruction) bytes
@@ -671,6 +673,8 @@ template<bool is_32bit> class Instruction {
 		DisplacementT displacement() const	{	return displacement_;					}
 		ImmediateT operand() const			{	return operand_;						}
 
+		int length() const { return length_; }
+
 		Instruction() noexcept {}
 		Instruction(
 			Operation operation,
@@ -683,7 +687,8 @@ template<bool is_32bit> class Instruction {
 			Repetition repetition,
 			DataSize operation_size,
 			DisplacementT displacement,
-			ImmediateT operand) noexcept :
+			ImmediateT operand,
+			int length) noexcept :
 				operation(operation),
 				repetition_size_(uint8_t((int(operation_size) << 2) | int(repetition))),
 				sources_(uint16_t(
@@ -695,7 +700,8 @@ template<bool is_32bit> class Instruction {
 				displacement_(displacement),
 				operand_(operand),
 				sib_(sib),
-				address_size_(address_size) {}
+				address_size_(address_size),
+				length_(length) {}
 };
 
 // TODO: repack.

From 673ffc50da7d7aa81e735d712f735b9a3abc30ee Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Thu, 10 Mar 2022 15:14:50 -0500
Subject: [PATCH 084/104] Switch to intended compact version of `Instruction`.

---
 InstructionSets/x86/Decoder.cpp               |   1 +
 InstructionSets/x86/Instruction.hpp           | 254 ++++++++++++------
 .../Clock SignalTests/x86DataPointerTests.mm  |   8 -
 .../Mac/Clock SignalTests/x86DecoderTests.mm  |  18 ++
 4 files changed, 185 insertions(+), 96 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index aff9e5fb0..3b71c2e7d 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -830,6 +830,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 
 		if(expects_sib && (source_ == Source::Indirect | destination_ == Source::Indirect)) {
 			phase_ = Phase::ScaleIndexBase;
+			// TODO: test for IndirectNoBase.
 		} else {
 			phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
 		}
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 7e6f4bfe8..a768b3058 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -434,6 +434,23 @@ enum class Source: uint8_t {
 	IndirectNoBase = Indirect - 1,
 };
 
+constexpr Source default_data_segment([[maybe_unused]] Operation operation) {
+	// TODO: is this really necessary, or can ::DS always be default?
+	// i.e. can the stack operations actually take a segment override?
+	// If not then the actual implementations just won't ask about a segment.
+//	constexpr std::set<Operation> stack_ops = {
+//
+//	};
+//
+//	if(
+//		operation == Operation::PUSH ||
+//		operation == Operation::PUSHF ||
+//		operation == Operation::POP) {
+//		return Source::SS;
+//	}
+	return Source::DS;
+}
+
 enum class Repetition: uint8_t {
 	None, RepE, RepNE
 };
@@ -497,6 +514,10 @@ class ScaleIndexBase {
 				);
 		}
 
+		operator uint8_t() const {
+			return sib_;
+		}
+
 	private:
 		// Data is stored directly as an 80386 SIB byte.
 		uint8_t sib_ = 0;
@@ -571,109 +592,147 @@ class DataPointer {
 
 template<bool is_32bit> class Instruction {
 	public:
-		Operation operation = Operation::Invalid;
+		const Operation operation = Operation::Invalid;
 
-		bool operator ==(const Instruction &rhs) const {
-			return
-				repetition_size_ == rhs.repetition_size_ &&
-				sources_ == rhs.sources_ &&
-				displacement_ == rhs.displacement_ &&
-				operand_ == rhs.operand_ &&
-				sib_ == rhs.sib_ &&
-				length_ == rhs.length_;
+		bool operator ==(const Instruction<is_32bit> &rhs) const {
+			if(	operation != rhs.operation ||
+				mem_exts_source_ != rhs.mem_exts_source_ ||
+				source_data_dest_sib_ != rhs.source_data_dest_sib_) {
+				return false;
+			}
+
+			// Have already established above that this and RHS have the
+			// same extensions, if any.
+			const int extension_count = has_length_extension() + has_displacement() + has_operand();
+			for(int c = 0; c < extension_count; c++) {
+				if(extensions_[c] != rhs.extensions_[c]) return false;
+			}
+
+			return true;
 		}
 
 		using DisplacementT = typename std::conditional<is_32bit, int32_t, int16_t>::type;
 		using ImmediateT = typename std::conditional<is_32bit, uint32_t, uint16_t>::type;
 		using AddressT = ImmediateT;
 
-		/* Note to self — current thinking is:
-
-			First 32bits:
-				5 bits source;
-				5 bits dest;
-				5 bits partial SIB, combined with three low bits of source or dest if indirect;
-				8 bits operation;
-				4 bits original instruction size;
-				2 bits data size;
-				1 bit memory size;
-				2 bits extension flags.
-
-			Extensions (16 or 32 bit, depending on templated size):
-				1) reptition + segment override + lock + original instruction size (= 10 bits);
-				2) displacement;
-				3) immediate operand.
-
-			Presence or absence of extensions is dictated by:
-				* instruction size = 0 => the repetition, etc extension (including the real extension size); and
-				* the extension flags for displacement and/or immediate.
-
-			Therefore an instruction's footprint is:
-				* 4–8 bytes (16-bit processors);
-				* 4–12 bytes (32-bit processors).
-
-			I'll then implement a collection suited to packing these things based on their
-			packing_size(), and later iterating them.
-
-			To verify: the 8086 allows unlimited-length instructions (which I'll probably handle by
-			generating length-15 NOPs and not resetting parser state), the 80386 limits them to
-			15 bytes, but what do the processors in between do?
-		*/
-
 	private:
-		// b0, b1: a Repetition;
-		// b2+: operation size.
-		uint8_t repetition_size_ = 0;
+		// Packing and encoding of fields is admittedly somewhat convoluted; what this
+		// achieves is that instructions will be sized:
+		//
+		//	four bytes + up to three extension words
+		//	(two bytes for 16-bit instructions, four for 32)
+		//
+		// Two of the extension words are used to retain an operand and displacement
+		// if the instruction has those. The other can store sizes greater than 15
+		// bytes (for earlier processors), plus any repetition, segment override or
+		// repetition prefixes.
 
-		// b0–b5: source;
-		// b6–b11: destination;
-		// b12–b14: segment override;
-		// b15: lock.
-		uint16_t sources_ = 0;
+		// b7: address size;
+		// b6: has displacement;
+		// b5: has operand;
+		// [b4, b0]: source.
+		const uint8_t mem_exts_source_ = 0xff;
 
-		// Unpackable fields.
-		DisplacementT displacement_ = 0;
-		ImmediateT operand_ = 0;		// ... or used to store a segment for far operations.
+		bool has_displacement() const {
+			return mem_exts_source_ & (1 << 6);
+		}
+		bool has_operand() const {
+			return mem_exts_source_ & (1 << 5);
+		}
 
-		// Fields yet to be properly incorporated...
-		ScaleIndexBase sib_;
-		AddressSize address_size_ = AddressSize::b16;
-		int length_ = 0;
+		// [b15, b14]: data size;
+		// [b13, b10]: source length (0 => has length extension);
+		// [b9, b5]: top five of SIB;
+		// [b4, b0]: dest.
+		const uint16_t source_data_dest_sib_ = 0xffff;
+
+		bool has_length_extension() const {
+			return !((source_data_dest_sib_ >> 10) & 15);
+		}
+
+		// {length extension}, {operand}, {displacement}.
+		// If length extension is present then:
+		//
+		//	[b15, b6]: source length;
+		//	[b5, b4]: repetition;
+		//	[b3, b1]: segment override;
+		//	b0: lock.
+		ImmediateT extensions_[3];
 
 	public:
 		/// @returns The number of bytes used for meaningful content within this class. A receiver must use at least @c sizeof(Instruction) bytes
 		/// to store an @c Instruction but is permitted to reuse the trailing sizeof(Instruction) - packing_size() for any purpose it likes. Teleologically,
 		/// this allows a denser packing of instructions into containers.
-		size_t packing_size() const		{	return sizeof(*this);	/* TODO */	}
+		size_t packing_size() const	{
+			return
+				offsetof(Instruction<is_32bit>, extensions) +
+				(has_displacement() + has_operand() + has_length_extension()) * sizeof(ImmediateT);
+		}
 
-		DataPointer source() const		{	return DataPointer(Source(sources_ & 0x3f), sib_);			}
-		DataPointer destination() const	{	return DataPointer(Source((sources_ >> 6) & 0x3f), sib_);	}
-		bool lock() const				{	return sources_ & 0x8000;									}
+	private:
+		// A lookup table to help with stripping parts of the SIB that have been
+		// hidden within the source/destination fields.
+		static constexpr uint8_t sib_masks[] = {
+			0x1f, 0x1f, 0x1f, 0x18
+		};
+
+	public:
+		DataPointer source() const {
+			return DataPointer(
+				Source(mem_exts_source_ & sib_masks[(mem_exts_source_ >> 3) & 3]),
+				((source_data_dest_sib_ >> 2) & 0xf8) | (mem_exts_source_ & 0x07)
+			);
+		}
+		DataPointer destination() const	{
+			return DataPointer(
+				Source(source_data_dest_sib_ & sib_masks[(source_data_dest_sib_ >> 3) & 3]),
+				((source_data_dest_sib_ >> 2) & 0xf8) | (source_data_dest_sib_ & 0x07)
+			);
+		}
+		bool lock() const {
+			return has_length_extension() && extensions_[0]&1;
+		}
 
 		AddressSize address_size() const {
-			return AddressSize(address_size_);
+			return AddressSize(mem_exts_source_ >> 7);
 		}
 		Source data_segment() const {
-			const auto segment_override = Source(
+			if(!has_length_extension()) return default_data_segment(operation);
+			return Source(
 				int(Source::ES) +
-				((sources_ >> 12) & 7)
+				((extensions_[0] >> 1) & 7)
 			);
-			if(segment_override != Source::None) return segment_override;
-
-			// TODO: default source should be SS for anything touching the stack.
-			return Source::DS;
 		}
 
-		Repetition repetition() const	{	return Repetition(repetition_size_ & 3);	}
-		DataSize operation_size() const {	return DataSize(repetition_size_ >> 2);		}
+		Repetition repetition() const {
+			if(!has_length_extension()) return Repetition::None;
+			return Repetition((extensions_[0] >> 4) & 3);
+		}
+		DataSize operation_size() const {
+			return DataSize(source_data_dest_sib_ >> 14);
+		}
 
-		uint16_t segment() const		{	return uint16_t(operand_);					}
-		DisplacementT offset() const	{	return displacement_;						}
+		int length() const {
+			const int short_length = (source_data_dest_sib_ >> 10) & 15;
+			if(short_length) return short_length;
+			return extensions_[0] >> 6;
+		}
 
-		DisplacementT displacement() const	{	return displacement_;					}
-		ImmediateT operand() const			{	return operand_;						}
+		ImmediateT operand() const	{
+			const ImmediateT ops[] = {0, extensions_[has_length_extension()]};
+			return ops[has_operand()];
+		}
+		DisplacementT displacement() const {
+			return DisplacementT(offset());
+		}
 
-		int length() const { return length_; }
+		uint16_t segment() const		{
+			return uint16_t(operand());
+		}
+		ImmediateT offset() const	{
+			const ImmediateT offsets[] = {0, extensions_[has_length_extension() + has_operand()]};
+			return offsets[has_displacement()];
+		}
 
 		Instruction() noexcept {}
 		Instruction(
@@ -685,27 +744,46 @@ template<bool is_32bit> class Instruction {
 			AddressSize address_size,
 			Source segment_override,
 			Repetition repetition,
-			DataSize operation_size,
+			DataSize data_size,
 			DisplacementT displacement,
 			ImmediateT operand,
 			int length) noexcept :
 				operation(operation),
-				repetition_size_(uint8_t((int(operation_size) << 2) | int(repetition))),
-				sources_(uint16_t(
+				mem_exts_source_(uint8_t(
+					(int(address_size) << 7) |
+					(displacement ? 0x40 : 0x00) |
+					(operand ? 0x20 : 0x00) |
 					int(source) |
-					(int(destination) << 6) |
-					((int(segment_override) & 7) << 12) |
-					(int(lock) << 15)
+					(source == Source::Indirect ? (uint8_t(sib) & 7) : 0)
 				)),
-				displacement_(displacement),
-				operand_(operand),
-				sib_(sib),
-				address_size_(address_size),
-				length_(length) {}
+				source_data_dest_sib_(uint16_t(
+					(int(data_size) << 14) |
+					((
+						(lock || (segment_override != Source::None) || (length > 15) || (repetition != Repetition::None))
+					) ? 0 : (length << 10)) |
+					((uint8_t(sib) & 0xf8) << 2) |
+					int(destination) |
+					(destination == Source::Indirect ? (uint8_t(sib) & 7) : 0)
+				)) {
+
+				int extension = 0;
+				if(has_length_extension()) {
+					if(segment_override == Source::None) segment_override = default_data_segment(operation);
+					extensions_[extension] = ImmediateT(
+						(length << 6) | (int(repetition) << 4) | ((int(segment_override) & 7) << 1) | int(lock)
+					);
+					++extension;
+				}
+				if(has_operand()) {
+					extensions_[extension] = operand;
+					++extension;
+				}
+				extensions_[extension] = ImmediateT(displacement);
+			}
 };
 
-// TODO: repack.
-//static_assert(sizeof(Instruction) <= 8);
+static_assert(sizeof(Instruction<true>) <= 16);
+static_assert(sizeof(Instruction<false>) <= 10);
 
 }
 }
diff --git a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm
index e9b01f5e9..8db794516 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DataPointerTests.mm	
@@ -18,14 +18,6 @@ using namespace InstructionSet::x86;
 
 @implementation x86DataPointerTests
 
-//- (InstructionSet::x86::Instruction<false>)instruction16WithSourceDataPointer:(DataPointer)pointer {
-//	return x86::Instruction<false>{
-//		InstructionSet::x86::Operation::AAA,
-//		S
-//	};
-//}
-
-
 - (void)test16bitSize1 {
 	const DataPointer indirectPointer(
 		Source::eAX, Source::eDI, 0
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index d5778bd65..1df0703a2 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -539,4 +539,22 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	test(instructions[63], Operation::JPCX, 0, 0xd4 - 0x9d);
 }
 
+- (void)testSourceSIB1 {
+	const auto instructions = decode<Model::i80386>({
+		0x62, 0x90, 0x90, 0xdf, 0xcd, 0xf9
+	}, true);
+
+	XCTAssertEqual(instructions.size(), 1);
+	test(instructions[0], DataSize::DWord, Operation::BOUND, ScaleIndexBase(Source::eAX), Source::eDX, 0, -0x6322070);
+}
+
+- (void)testSourceSIB2 {
+	const auto instructions = decode<Model::i80386>({
+		0x81, 0x47, 0xbe, 0xa9, 0x3a, 0x68, 0x9f
+	}, true);
+
+	XCTAssertEqual(instructions.size(), 1);
+	test(instructions[0], DataSize::DWord, Operation::ADD, Source::Immediate, ScaleIndexBase(Source::eDI), 0x9f683aa9, -0x42);
+}
+
 @end

From a2ae3771eb39002d9bf447b95652864c86eca0ec Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Thu, 10 Mar 2022 15:45:56 -0500
Subject: [PATCH 085/104] Add test for switch to Source::IndirectNoBase.

---
 InstructionSets/x86/Decoder.cpp                     | 7 ++++++-
 OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm | 4 ++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 3b71c2e7d..08deedfd2 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -830,7 +830,6 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 
 		if(expects_sib && (source_ == Source::Indirect | destination_ == Source::Indirect)) {
 			phase_ = Phase::ScaleIndexBase;
-			// TODO: test for IndirectNoBase.
 		} else {
 			phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
 		}
@@ -845,6 +844,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 		++source;
 		++consumed_;
 
+		// Potentially record the lack of a base.
+		if(displacement_size_ == DataSize::None && (uint8_t(sib_)&7) == 5) {
+			source_ = (source_ == Source::Indirect) ? Source::IndirectNoBase : source_;
+			destination_ = (destination_ == Source::Indirect) ? Source::IndirectNoBase : destination_;
+		}
+
 		phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
 	}
 
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 1df0703a2..0e3b737e9 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -539,7 +539,7 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	test(instructions[63], Operation::JPCX, 0, 0xd4 - 0x9d);
 }
 
-- (void)testSourceSIB1 {
+- (void)testSourceModRegRM1 {
 	const auto instructions = decode<Model::i80386>({
 		0x62, 0x90, 0x90, 0xdf, 0xcd, 0xf9
 	}, true);
@@ -548,7 +548,7 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	test(instructions[0], DataSize::DWord, Operation::BOUND, ScaleIndexBase(Source::eAX), Source::eDX, 0, -0x6322070);
 }
 
-- (void)testSourceSIB2 {
+- (void)testSourceModRegRM2 {
 	const auto instructions = decode<Model::i80386>({
 		0x81, 0x47, 0xbe, 0xa9, 0x3a, 0x68, 0x9f
 	}, true);

From bf7faa80c11b3e79fadc987373a5914378334a40 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Thu, 10 Mar 2022 16:47:54 -0500
Subject: [PATCH 086/104] Add TODO.

---
 InstructionSets/x86/Decoder.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 08deedfd2..dd016adb2 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -14,6 +14,12 @@
 
 using namespace InstructionSet::x86;
 
+// TODO: instruction length limits:
+//
+//	8086/80186: none
+//	80286: 10 bytes
+//	80386: 15 bytes
+
 template <Model model>
 std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(const uint8_t *source, size_t length) {
 	const uint8_t *const end = source + length;

From 641e0c1afc704f3f460f1e4ed37e8373adaa3a4c Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Thu, 10 Mar 2022 20:27:35 -0500
Subject: [PATCH 087/104] Resolve default segment question.

---
 InstructionSets/x86/Instruction.hpp | 28 +++++++++-------------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index a768b3058..da0feb950 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -434,23 +434,6 @@ enum class Source: uint8_t {
 	IndirectNoBase = Indirect - 1,
 };
 
-constexpr Source default_data_segment([[maybe_unused]] Operation operation) {
-	// TODO: is this really necessary, or can ::DS always be default?
-	// i.e. can the stack operations actually take a segment override?
-	// If not then the actual implementations just won't ask about a segment.
-//	constexpr std::set<Operation> stack_ops = {
-//
-//	};
-//
-//	if(
-//		operation == Operation::PUSH ||
-//		operation == Operation::PUSHF ||
-//		operation == Operation::POP) {
-//		return Source::SS;
-//	}
-	return Source::DS;
-}
-
 enum class Repetition: uint8_t {
 	None, RepE, RepNE
 };
@@ -696,8 +679,13 @@ template<bool is_32bit> class Instruction {
 		AddressSize address_size() const {
 			return AddressSize(mem_exts_source_ >> 7);
 		}
+
+		/// @returns @c Source::DS if no segment override was found; the overridden segment otherwise.
+		/// On x86 a segment override cannot modify the segment used as a destination in string instructions,
+		/// or that used by stack instructions, but this function does not spend the time necessary to provide
+		/// the correct default for those.
 		Source data_segment() const {
-			if(!has_length_extension()) return default_data_segment(operation);
+			if(!has_length_extension()) return Source::DS;
 			return Source(
 				int(Source::ES) +
 				((extensions_[0] >> 1) & 7)
@@ -768,7 +756,9 @@ template<bool is_32bit> class Instruction {
 
 				int extension = 0;
 				if(has_length_extension()) {
-					if(segment_override == Source::None) segment_override = default_data_segment(operation);
+					// As per the rule stated for segment(), this class provides ::DS for any instruction
+					// that doesn't have a segment override.
+					if(segment_override == Source::None) segment_override = Source::DS;
 					extensions_[extension] = ImmediateT(
 						(length << 6) | (int(repetition) << 4) | ((int(segment_override) & 7) << 1) | int(lock)
 					);

From f92ffddb8243ad6c1e39ab512f8e3d4af1ed23ac Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Thu, 10 Mar 2022 20:47:56 -0500
Subject: [PATCH 088/104] Add instruction length limits.

---
 InstructionSets/x86/Decoder.cpp | 26 ++++++++++++++++++--------
 InstructionSets/x86/Decoder.hpp | 21 ++++++++++++++++-----
 2 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index dd016adb2..dce0ca9ed 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -14,15 +14,17 @@
 
 using namespace InstructionSet::x86;
 
-// TODO: instruction length limits:
-//
-//	8086/80186: none
-//	80286: 10 bytes
-//	80386: 15 bytes
-
 template <Model model>
 std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(const uint8_t *source, size_t length) {
-	const uint8_t *const end = source + length;
+	// Instruction length limits:
+	//
+	//	8086/80186: none
+	//	80286: 10 bytes
+	//	80386: 15 bytes
+	constexpr int max_instruction_length = model >= Model::i80386 ? 15 : (model == Model::i80286 ? 10 : 0);
+
+	const uint8_t *const buffer_end = source + length;
+	const uint8_t *const end = max_instruction_length ? std::min(buffer_end, source + max_instruction_length - consumed_) : buffer_end;
 
 	// MARK: - Prefixes (if present) and the opcode.
 
@@ -436,6 +438,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 	}
 
 	// MARK: - Additional F page of instructions.
+
 	if(phase_ == Phase::InstructionPageF && source != end) {
 		// Update the instruction acquired.
 		const uint8_t instr = *source;
@@ -861,7 +864,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 
 	// MARK: - Displacement and operand.
 
-	if(phase_ == Phase::DisplacementOrOperand && source != end) {
+	if(phase_ == Phase::DisplacementOrOperand) {
 		const auto required_bytes = int(byte_size(displacement_size_) + byte_size(operand_size_));
 
 		const int outstanding_bytes = required_bytes - operand_bytes_;
@@ -928,6 +931,13 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 		return result;
 	}
 
+	// Check for a too-long instruction.
+	if(max_instruction_length && consumed_ == max_instruction_length) {
+		const auto result = std::make_pair(consumed_, InstructionT());
+		reset_parsing();
+		return result;
+	}
+
 	// i.e. not done yet.
 	return std::make_pair(0, InstructionT());
 }
diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 6c837df2a..4491ce959 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -28,11 +28,22 @@ template <Model model> class Decoder {
 		using InstructionT = Instruction<model >= Model::i80386>;
 
 		/*!
-			@returns an @c Instruction plus a size; a positive size to indicate successful decoding; a
-				negative size specifies the [negatived] number of further bytes the caller should ideally
-				collect before calling again. The caller is free to call with fewer, but may not get a decoded
-				instruction in response, and the decoder may still not be able to complete decoding
-				even if given that number of bytes.
+			@returns an @c Instruction plus a size; a positive size indicates successful decoding of
+				an instruction that was that many bytes long in total; a negative size specifies the [negatived]
+				minimum number of further bytes the caller should ideally collect before calling again. The
+				caller is free to call with fewer, but may not get a decoded instruction in response, and the
+				decoder may still not be able to complete decoding even if given that number of bytes.
+
+				Successful decoding is defined to mean that all decoding steps are complete. The output
+				may still be an illegal instruction (indicated by Operation::Invalid), if the byte sequence
+				supplied cannot form a valid instruction.
+
+			@discussion although instructions also contain an indicator of their length, on chips prior
+				to the 80286 there is no limit to instruction length and that could in theory overflow the available
+				storage, which can describe instructions only up to 1kb in size.
+
+				The 80286 and 80386 have instruction length limits of 10 and 15 bytes respectively, so
+				cannot overflow the field.
 		*/
 		std::pair<int, InstructionT> decode(const uint8_t *source, size_t length);
 

From 572dc40e6b39e756667d577f2ad0f2845b4df3d5 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 11 Mar 2022 09:47:23 -0500
Subject: [PATCH 089/104] Allow assignments.

---
 InstructionSets/x86/Instruction.hpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index da0feb950..8ec9890d3 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -575,7 +575,7 @@ class DataPointer {
 
 template<bool is_32bit> class Instruction {
 	public:
-		const Operation operation = Operation::Invalid;
+		Operation operation = Operation::Invalid;
 
 		bool operator ==(const Instruction<is_32bit> &rhs) const {
 			if(	operation != rhs.operation ||
@@ -614,7 +614,7 @@ template<bool is_32bit> class Instruction {
 		// b6: has displacement;
 		// b5: has operand;
 		// [b4, b0]: source.
-		const uint8_t mem_exts_source_ = 0xff;
+		uint8_t mem_exts_source_ = 0xff;
 
 		bool has_displacement() const {
 			return mem_exts_source_ & (1 << 6);
@@ -627,7 +627,7 @@ template<bool is_32bit> class Instruction {
 		// [b13, b10]: source length (0 => has length extension);
 		// [b9, b5]: top five of SIB;
 		// [b4, b0]: dest.
-		const uint16_t source_data_dest_sib_ = 0xffff;
+		uint16_t source_data_dest_sib_ = 0xffff;
 
 		bool has_length_extension() const {
 			return !((source_data_dest_sib_ >> 10) & 15);
@@ -722,8 +722,10 @@ template<bool is_32bit> class Instruction {
 			return offsets[has_displacement()];
 		}
 
-		Instruction() noexcept {}
-		Instruction(
+		constexpr Instruction() noexcept {}
+		constexpr Instruction(Operation operation, int length) noexcept :
+			Instruction(operation, Source::None, Source::None, ScaleIndexBase(), false, AddressSize::b16, Source::None, Repetition::None, DataSize::None, 0, 0, length) {}
+		constexpr Instruction(
 			Operation operation,
 			Source source,
 			Source destination,

From dc8cff364fbde2eb8b2a02027650d031e0f0045f Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 11 Mar 2022 09:48:02 -0500
Subject: [PATCH 090/104] Switch to common test.

---
 InstructionSets/x86/Decoder.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp
index 4491ce959..02af8bfba 100644
--- a/InstructionSets/x86/Decoder.hpp
+++ b/InstructionSets/x86/Decoder.hpp
@@ -25,7 +25,7 @@ namespace x86 {
 */
 template <Model model> class Decoder {
 	public:
-		using InstructionT = Instruction<model >= Model::i80386>;
+		using InstructionT = Instruction<is_32bit(model)>;
 
 		/*!
 			@returns an @c Instruction plus a size; a positive size indicates successful decoding of

From 91d75d77047b6b3730e29dcd5bdb31236e693cfe Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 11 Mar 2022 09:48:26 -0500
Subject: [PATCH 091/104] Switch strategy on 8086 instruction lengths.

---
 InstructionSets/x86/Decoder.cpp | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index dce0ca9ed..245a81ec7 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -18,13 +18,16 @@ template <Model model>
 std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(const uint8_t *source, size_t length) {
 	// Instruction length limits:
 	//
-	//	8086/80186: none
+	//	8086/80186: none*
 	//	80286: 10 bytes
 	//	80386: 15 bytes
-	constexpr int max_instruction_length = model >= Model::i80386 ? 15 : (model == Model::i80286 ? 10 : 0);
-
-	const uint8_t *const buffer_end = source + length;
-	const uint8_t *const end = max_instruction_length ? std::min(buffer_end, source + max_instruction_length - consumed_) : buffer_end;
+	//
+	// * but, can treat internally as a limit of 65536 bytes — after that distance the IP will
+	// be back to wherever it started, so it's safe to spit out a NOP and reset parsing
+	// without any loss of context. This reduces the risk of the decoder tricking a caller into
+	// an infinite loop.
+	constexpr int max_instruction_length = model >= Model::i80386 ? 15 : (model == Model::i80286 ? 10 : 65536);
+	const uint8_t *const end = source + std::min(length, size_t(max_instruction_length - consumed_));
 
 	// MARK: - Prefixes (if present) and the opcode.
 
@@ -932,8 +935,13 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 	}
 
 	// Check for a too-long instruction.
-	if(max_instruction_length && consumed_ == max_instruction_length) {
-		const auto result = std::make_pair(consumed_, InstructionT());
+	if(consumed_ == max_instruction_length) {
+		std::pair<int, InstructionT> result;
+		if(max_instruction_length == 65536) {
+			result = std::make_pair(consumed_, InstructionT(Operation::NOP, consumed_));
+		} else {
+			result = std::make_pair(consumed_, InstructionT());
+		}
 		reset_parsing();
 		return result;
 	}

From 40cafb95ed053a669ffc2e16442bffcab1d3ec88 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 11 Mar 2022 09:48:51 -0500
Subject: [PATCH 092/104] Add 286 and 386 instruction length tests.

---
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 75 ++++++++++++++++++-
 1 file changed, 73 insertions(+), 2 deletions(-)

diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 0e3b737e9..68bf1abe1 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -29,14 +29,14 @@ template <typename InstructionT> void test(
 	const InstructionT &instruction,
 	DataSize size,
 	Operation operation,
-	InstructionSet::x86::DataPointer source,
+	std::optional<InstructionSet::x86::DataPointer> source,
 	std::optional<InstructionSet::x86::DataPointer> destination = std::nullopt,
 	std::optional<typename InstructionT::ImmediateT> operand = std::nullopt,
 	std::optional<typename InstructionT::DisplacementT> displacement = std::nullopt) {
 
 	XCTAssertEqual(instruction.operation_size(), InstructionSet::x86::DataSize(size));
 	XCTAssertEqual(instruction.operation, operation);
-	XCTAssert(instruction.source() == source);
+	if(source) XCTAssert(instruction.source() == *source);
 	if(destination) XCTAssert(instruction.destination() == *destination);
 	if(operand)	XCTAssertEqual(instruction.operand(), *operand);
 	if(displacement) XCTAssertEqual(instruction.displacement(), *displacement);
@@ -557,4 +557,75 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	test(instructions[0], DataSize::DWord, Operation::ADD, Source::Immediate, ScaleIndexBase(Source::eDI), 0x9f683aa9, -0x42);
 }
 
+- (void)test286LengthLimit {
+	const auto instructions = decode<Model::i80286>({
+		0x90,
+		0x26, 0x90,
+		0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+	});
+
+	XCTAssertEqual(instructions.size(), 12);
+	test(instructions[0], Operation::NOP);
+	test(instructions[1], Operation::NOP);
+	test(instructions[2], Operation::NOP);
+	test(instructions[3], Operation::NOP);
+	test(instructions[4], Operation::NOP);
+	test(instructions[5], Operation::NOP);
+	test(instructions[6], Operation::NOP);
+	test(instructions[7], Operation::NOP);
+	test(instructions[8], Operation::NOP);
+	test(instructions[9], Operation::NOP);
+	test(instructions[10], Operation::Invalid);
+	test(instructions[11], Operation::NOP);
+}
+
+ - (void)test386LengthLimit {
+	const auto instructions = decode<Model::i80386>({
+		0x90,
+		0x26, 0x90,
+		0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+		0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x90,
+	});
+
+	XCTAssertEqual(instructions.size(), 17);
+	test(instructions[0], Operation::NOP);
+	test(instructions[1], Operation::NOP);
+	test(instructions[2], Operation::NOP);
+	test(instructions[3], Operation::NOP);
+	test(instructions[4], Operation::NOP);
+	test(instructions[5], Operation::NOP);
+	test(instructions[6], Operation::NOP);
+	test(instructions[7], Operation::NOP);
+	test(instructions[8], Operation::NOP);
+	test(instructions[9], Operation::NOP);
+	test(instructions[10], Operation::NOP);
+	test(instructions[11], Operation::NOP);
+	test(instructions[12], Operation::NOP);
+	test(instructions[13], Operation::NOP);
+	test(instructions[14], Operation::NOP);
+	test(instructions[15], Operation::Invalid);
+	test(instructions[16], Operation::NOP);
+}
+
+
 @end

From c744a97e3c8be996795d78da9034c8d02932169e Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 11 Mar 2022 11:55:26 -0500
Subject: [PATCH 093/104] Ensure no extensions for default constructed
 Instruction.

---
 InstructionSets/x86/Instruction.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 8ec9890d3..4e797fbca 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -614,7 +614,7 @@ template<bool is_32bit> class Instruction {
 		// b6: has displacement;
 		// b5: has operand;
 		// [b4, b0]: source.
-		uint8_t mem_exts_source_ = 0xff;
+		uint8_t mem_exts_source_ = 0;
 
 		bool has_displacement() const {
 			return mem_exts_source_ & (1 << 6);
@@ -627,7 +627,7 @@ template<bool is_32bit> class Instruction {
 		// [b13, b10]: source length (0 => has length extension);
 		// [b9, b5]: top five of SIB;
 		// [b4, b0]: dest.
-		uint16_t source_data_dest_sib_ = 0xffff;
+		uint16_t source_data_dest_sib_ = 1 << 10;	// So that ::Invalid doesn't seem to have a length extension.
 
 		bool has_length_extension() const {
 			return !((source_data_dest_sib_ >> 10) & 15);
@@ -640,7 +640,7 @@ template<bool is_32bit> class Instruction {
 		//	[b5, b4]: repetition;
 		//	[b3, b1]: segment override;
 		//	b0: lock.
-		ImmediateT extensions_[3];
+		ImmediateT extensions_[3]{};
 
 	public:
 		/// @returns The number of bytes used for meaningful content within this class. A receiver must use at least @c sizeof(Instruction) bytes

From 727342134c503534c0944e46bbaf0065a1066ece Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 11 Mar 2022 11:55:41 -0500
Subject: [PATCH 094/104] Add 8086 length limit test.

---
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 50 ++++++++++++++-----
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index 68bf1abe1..b702b9605 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -64,8 +64,28 @@ template <typename InstructionT> void test_far(
 
 // MARK: - Decoder
 
+template <Model model, typename CollectionT>
+std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT>
+decode(const CollectionT &stream, bool set_32_bit = false) {
+	// Build instructions list with a byte-by-byte decoding.
+	std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> instructions;
+
+	InstructionSet::x86::Decoder<model> decoder;
+	decoder.set_32bit_protected_mode(set_32_bit);
+
+	for(uint8_t item: stream) {
+		const auto [size, next] = decoder.decode(&item, 1);
+		if(size > 0) {
+			instructions.push_back(next);
+		}
+	}
+
+	return instructions;
+}
+
 template <Model model>
-std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(const std::initializer_list<uint8_t> &stream, bool set_32_bit = false) {
+std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT>
+decode(const std::initializer_list<uint8_t> &stream, bool set_32_bit = false) {
 	// Decode by offering up all data at once.
 	std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> instructions;
 	InstructionSet::x86::Decoder<model> decoder;
@@ -80,18 +100,17 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	}
 
 	// Grab a byte-at-a-time decoding and check that it matches the previous.
-	{
-		InstructionSet::x86::Decoder<model> decoder;
-		decoder.set_32bit_protected_mode(set_32_bit);
+	const auto byte_instructions = decode<model>(std::vector<uint8_t>{stream}, set_32_bit);
 
-		auto previous_instruction = instructions.begin();
-		for(auto item: stream) {
-			const auto [size, next] = decoder.decode(&item, 1);
-			if(size > 0) {
-				XCTAssert(next == *previous_instruction);
-				++previous_instruction;
-			}
-		}
+	XCTAssertEqual(byte_instructions.size(), instructions.size());
+
+	auto previous_instruction = instructions.begin();
+	auto byte_instruction = byte_instructions.begin();
+	while(previous_instruction != instructions.end()) {
+		XCTAssert(*previous_instruction == *byte_instruction);
+
+		++previous_instruction;
+		++byte_instruction;
 	}
 
 	return instructions;
@@ -557,6 +576,13 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
 	test(instructions[0], DataSize::DWord, Operation::ADD, Source::Immediate, ScaleIndexBase(Source::eDI), 0x9f683aa9, -0x42);
 }
 
+- (void)test8086LengthLimit {
+	const std::vector<uint8_t> all_prefix(65536, 0x26);
+	const auto instructions = decode<Model::i8086>(all_prefix);
+	XCTAssertEqual(instructions.size(), 1);
+	test(instructions[0], Operation::NOP);
+}
+
 - (void)test286LengthLimit {
 	const auto instructions = decode<Model::i80286>({
 		0x90,

From 9b4048ec6e30065065ac41b2f0b00b84fa389339 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 11 Mar 2022 12:46:07 -0500
Subject: [PATCH 095/104] The address size modifier doesn't seem to affect far
 address sizes.

It's meant to affect only instructions with operands that reside in memory, I think. So probably only ::DirectAddress in my nomenclature. More research to do.
---
 InstructionSets/x86/Decoder.cpp               |  4 +-
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 40 +++++++++++++++++++
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 245a81ec7..4e863ebe2 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -84,12 +84,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 	phase_ = Phase::DisplacementOrOperand;			\
 	operand_size_ = size
 
-/// Handles far CALL and far JMP — fixed four byte operand operations.
+/// Handles far CALL and far JMP — fixed four or six byte operand operations.
 #define Far(op)										\
 	operation_ = Operation::op;						\
 	phase_ = Phase::DisplacementOrOperand;			\
 	operand_size_ = DataSize::Word;					\
-	displacement_size_ = data_size(address_size_)
+	displacement_size_ = data_size(default_address_size_)
 
 /// Handles ENTER — a fixed three-byte operation.
 #define Displacement16Operand8(op)					\
diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index b702b9605..a5266214a 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -653,5 +653,45 @@ decode(const std::initializer_list<uint8_t> &stream, bool set_32_bit = false) {
 	test(instructions[16], Operation::NOP);
 }
 
+- (void)testAddressSizeModifier {
+	const auto instructions = decode<Model::i80386>({
+		0x67, 0xf3, 0x5d, 0x67, 0x3f, 0x67, 0x5a, 0x67, 0xea, 0x17, 0xa2, 0x38, 0x0b, 0xeb, 0xbc, 0x67,
+		0x4c, 0x67, 0x3a, 0x1f, 0x67, 0x00, 0x8d, 0xf9, 0x43, 0x67, 0xb1, 0x7c, 0x67, 0x88, 0xd1, 0x67,
+		0x31, 0xed, 0x67, 0x22, 0x00, 0x67, 0x79, 0xa7, 0x67, 0x87, 0x3c, 0x67, 0xd4, 0xa2, 0x67, 0x57,
+		0x67, 0x02, 0x21, 0x67, 0x48, 0x67, 0x33, 0x5d, 0xd7, 0x67, 0x3c, 0xe1, 0x67, 0x91, 0x67, 0x1b,
+		0x84, 0x43, 0x7f, 0x67, 0x15, 0xf6, 0x06, 0x2b, 0x6d
+	}, true);
+
+	XCTAssertEqual(instructions.size(), 22);
+
+	// addr16 repz pop ebp
+	// addr16 aas
+	// addr16 pop edx
+	// addr16 jmp 0xbceb:0xb38a217
+
+	// addr16 dec esp
+	// cmp    bl,BYTE PTR [bx]
+	// add    BYTE PTR
+	// addr16 mov cl,0x7c
+
+	// addr16 mov cl,dl
+	// addr16 xor ebp,ebp
+	// and    al,BYTE PTR [bx+si]
+	// addr16 jns 0xffffffcf
+
+	// xchg   DWORD PTR [si],edi
+	// addr16 aam 0xa2
+	// addr16 push edi
+	// add    ah,BYTE PTR [bx+di]
+
+	// addr16 dec eax
+	// xor    ebx,DWORD PTR
+	// addr16 cmp al,0xe1
+	// addr16 xchg ecx,eax
+
+	// sbb    eax,DWORD PTR
+	// addr16 adc eax,0x6d2b06f6
+
+}
 
 @end

From dc1d1f132e00e7f4314a837b4d9799a8ac19dae7 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 11 Mar 2022 13:01:02 -0500
Subject: [PATCH 096/104] Add one more address size modifier test.

---
 .../Mac/Clock SignalTests/x86DecoderTests.mm  | 40 ++++++-------------
 1 file changed, 13 insertions(+), 27 deletions(-)

diff --git a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm
index a5266214a..77118a266 100644
--- a/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
+++ b/OSBindings/Mac/Clock SignalTests/x86DecoderTests.mm	
@@ -662,36 +662,22 @@ decode(const std::initializer_list<uint8_t> &stream, bool set_32_bit = false) {
 		0x84, 0x43, 0x7f, 0x67, 0x15, 0xf6, 0x06, 0x2b, 0x6d
 	}, true);
 
+	// Lazy: just check that the right number of operations came out.
+	// Since the potential issue is reading the wrong size of address, that'll do.
 	XCTAssertEqual(instructions.size(), 22);
+}
 
-	// addr16 repz pop ebp
-	// addr16 aas
-	// addr16 pop edx
-	// addr16 jmp 0xbceb:0xb38a217
-
-	// addr16 dec esp
-	// cmp    bl,BYTE PTR [bx]
-	// add    BYTE PTR
-	// addr16 mov cl,0x7c
-
-	// addr16 mov cl,dl
-	// addr16 xor ebp,ebp
-	// and    al,BYTE PTR [bx+si]
-	// addr16 jns 0xffffffcf
-
-	// xchg   DWORD PTR [si],edi
-	// addr16 aam 0xa2
-	// addr16 push edi
-	// add    ah,BYTE PTR [bx+di]
-
-	// addr16 dec eax
-	// xor    ebx,DWORD PTR
-	// addr16 cmp al,0xe1
-	// addr16 xchg ecx,eax
-
-	// sbb    eax,DWORD PTR
-	// addr16 adc eax,0x6d2b06f6
+- (void)testAddressSizeModifierSIB {
+	const auto instructions = decode<Model::i80386>({
+		// add dword ptr [bx + si + 256], eax
+		0x67, 0x01, 0x80, 0x00, 0x01,
+		// add [eax + 256], eax
+		0x01, 0x80, 0x00, 0x01, 0x00, 0x00
+	}, true);
 
+	XCTAssertEqual(instructions.size(), 2);
+	test(instructions[0], DataSize::DWord, Operation::ADD, Source::eAX, ScaleIndexBase(Source::eBX, Source::eSI), 0, 0x100);
+	test(instructions[1], DataSize::DWord, Operation::ADD, Source::eAX, ScaleIndexBase(Source::eAX), 0, 0x100);
 }
 
 @end

From f694620087ddb8c4e7ecc85b85fc352ca660653f Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 11 Mar 2022 13:10:44 -0500
Subject: [PATCH 097/104] Resolve TODO.

---
 InstructionSets/x86/Decoder.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 4e863ebe2..34de2c9ba 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -610,7 +610,9 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 
 		Source memreg;
 
-		// TODO: can I just eliminate these lookup tables given the deliberate ordering within Source?
+		// These tables are fairly redundant due to the register ordering within
+		// Source, but act to improve readability and permit further Source
+		// reordering in the future.
 		constexpr Source reg_table[8] = {
 			Source::eAX,		Source::eCX,		Source::eDX,		Source::eBX,
 			Source::eSPorAH,	Source::eBPorCH,	Source::eSIorDH,	Source::eDIorBH,

From 4b4f92780e9a4da2ab957267b2b20a1067c25946 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 11 Mar 2022 13:24:45 -0500
Subject: [PATCH 098/104] Shuffle extension word order.

The primary objective here is simplifying index calculation, but as per the note it does also potentially open up options with regard to packing in the future.
---
 InstructionSets/x86/Instruction.hpp | 47 +++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 12 deletions(-)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 4e797fbca..cfb67eec6 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -633,7 +633,8 @@ template<bool is_32bit> class Instruction {
 			return !((source_data_dest_sib_ >> 10) & 15);
 		}
 
-		// {length extension}, {operand}, {displacement}.
+		// {operand}, {displacement}, {length extension}.
+		//
 		// If length extension is present then:
 		//
 		//	[b15, b6]: source length;
@@ -642,6 +643,16 @@ template<bool is_32bit> class Instruction {
 		//	b0: lock.
 		ImmediateT extensions_[3]{};
 
+		ImmediateT operand_extension() const {
+			return extensions_[0];
+		}
+		ImmediateT displacement_extension() const {
+			return extensions_[(mem_exts_source_ >> 5) & 1];
+		}
+		ImmediateT length_extension() const {
+			return extensions_[((mem_exts_source_ >> 5) & 1) + ((mem_exts_source_ >> 6) & 1)];
+		}
+
 	public:
 		/// @returns The number of bytes used for meaningful content within this class. A receiver must use at least @c sizeof(Instruction) bytes
 		/// to store an @c Instruction but is permitted to reuse the trailing sizeof(Instruction) - packing_size() for any purpose it likes. Teleologically,
@@ -650,6 +661,13 @@ template<bool is_32bit> class Instruction {
 			return
 				offsetof(Instruction<is_32bit>, extensions) +
 				(has_displacement() + has_operand() + has_length_extension()) * sizeof(ImmediateT);
+
+			// To consider in the future: the length extension is always the last one,
+			// and uses only 8 bits of content within 32-bit instructions, so it'd be
+			// possible further to trim the packing size on little endian machines.
+			//
+			// ... but is that a speed improvement? How much space does it save, and
+			// is it enough to undo the costs of unaligned data?
 		}
 
 	private:
@@ -673,7 +691,7 @@ template<bool is_32bit> class Instruction {
 			);
 		}
 		bool lock() const {
-			return has_length_extension() && extensions_[0]&1;
+			return has_length_extension() && length_extension()&1;
 		}
 
 		AddressSize address_size() const {
@@ -688,13 +706,13 @@ template<bool is_32bit> class Instruction {
 			if(!has_length_extension()) return Source::DS;
 			return Source(
 				int(Source::ES) +
-				((extensions_[0] >> 1) & 7)
+				((length_extension() >> 1) & 7)
 			);
 		}
 
 		Repetition repetition() const {
 			if(!has_length_extension()) return Repetition::None;
-			return Repetition((extensions_[0] >> 4) & 3);
+			return Repetition((length_extension() >> 4) & 3);
 		}
 		DataSize operation_size() const {
 			return DataSize(source_data_dest_sib_ >> 14);
@@ -703,11 +721,11 @@ template<bool is_32bit> class Instruction {
 		int length() const {
 			const int short_length = (source_data_dest_sib_ >> 10) & 15;
 			if(short_length) return short_length;
-			return extensions_[0] >> 6;
+			return length_extension() >> 6;
 		}
 
 		ImmediateT operand() const	{
-			const ImmediateT ops[] = {0, extensions_[has_length_extension()]};
+			const ImmediateT ops[] = {0, operand_extension()};
 			return ops[has_operand()];
 		}
 		DisplacementT displacement() const {
@@ -718,7 +736,7 @@ template<bool is_32bit> class Instruction {
 			return uint16_t(operand());
 		}
 		ImmediateT offset() const	{
-			const ImmediateT offsets[] = {0, extensions_[has_length_extension() + has_operand()]};
+			const ImmediateT offsets[] = {0, displacement_extension()};
 			return offsets[has_displacement()];
 		}
 
@@ -756,7 +774,17 @@ template<bool is_32bit> class Instruction {
 					(destination == Source::Indirect ? (uint8_t(sib) & 7) : 0)
 				)) {
 
+				// Decisions on whether to include operand, displacement and/or size extension words
+				// have implicitly been made in the int packing above; honour them here.
 				int extension = 0;
+				if(has_operand()) {
+					extensions_[extension] = operand;
+					++extension;
+				}
+				if(has_displacement()) {
+					extensions_[extension] = ImmediateT(displacement);
+					++extension;
+				}
 				if(has_length_extension()) {
 					// As per the rule stated for segment(), this class provides ::DS for any instruction
 					// that doesn't have a segment override.
@@ -766,11 +794,6 @@ template<bool is_32bit> class Instruction {
 					);
 					++extension;
 				}
-				if(has_operand()) {
-					extensions_[extension] = operand;
-					++extension;
-				}
-				extensions_[extension] = ImmediateT(displacement);
 			}
 };
 

From 44252984c2f299215b2d68f527281dea6311f325 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 11 Mar 2022 14:03:46 -0500
Subject: [PATCH 099/104] Eliminate INT3 special case.

---
 InstructionSets/x86/Decoder.cpp     | 7 ++++++-
 InstructionSets/x86/Instruction.hpp | 2 --
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 34de2c9ba..58e7f01b7 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -370,7 +370,12 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0xca: RegData(RETfar, None, data_size_);				break;
 			case 0xcb: Complete(RETfar, None, None, DataSize::DWord);	break;
 
-			case 0xcc: Complete(INT3, None, None, DataSize::None);	break;
+			case 0xcc:
+				// Encode INT3 as though it were INT with an
+				// immediate operand of 3.
+				Complete(INT, Immediate, None, DataSize::Byte);
+				operand_ = 3;
+			break;
 			case 0xcd: RegData(INT, None, DataSize::Byte);			break;
 			case 0xce: Complete(INTO, None, None, DataSize::None);	break;
 			case 0xcf: Complete(IRET, None, None, DataSize::None);	break;
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index cfb67eec6..3b1c9b3d8 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -108,8 +108,6 @@ enum class Operation: uint8_t {
 	JPCX,
 	/// Generates a software interrupt of the level stated in the operand.
 	INT,
-	/// Generates a software interrupt of level 3.
-	INT3,
 	/// Generates a software interrupt of level 4 if overflow is set.
 	INTO,
 

From c22e8112e7fd8ffc955e269f9f9bb0f9f1d32ad7 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 11 Mar 2022 20:30:56 -0500
Subject: [PATCH 100/104] Expand exposition.

---
 InstructionSets/x86/Instruction.hpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 3b1c9b3d8..1777a1081 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -311,18 +311,24 @@ enum class Operation: uint8_t {
 	/// [Early 80386s only] Insert bit string.
 	IBTS,
 
-	/// Inputs a double word from a port, incrementing or decrementing the destination.
+	/// Inputs a double word the port specified by DX, writing it to
+	/// ES:[e]DI and incrementing or decrementing [e]DI as per the
+	/// current EFLAGS DF flag.
 	INSD,
 
-	/// Convert dword to qword.
+	/// Convert dword to qword; fills EDX with the sign bit of EAX.
 	CDQ,
 	/// Convert word to dword; AX will be expanded to fill EAX.
 	/// Compare and contrast to CWD which would expand AX to DX:AX.
 	CWDE,
 
-	/// Move with zero extension.
+	/// Move from the source to the destination, extending the source with zeros.
+	/// The instruction data size dictates the size of the source; the destination will
+	/// be either 16- or 32-bit depending on the current processor operating mode.
 	MOVZX,
-	/// Move with sign extension.
+	/// Move from the source to the destination, applying a sign extension.
+	/// The instruction data size dictates the size of the source; the destination will
+	/// be either 16- or 32-bit depending on the current processor operating mode.
 	MOVSX,
 
 	/// Two-operand form of IMUL; multiply the source by the destination and write to the destination.

From e6bd2657291821ab985b459d2d66533b63869f65 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Fri, 11 Mar 2022 20:34:28 -0500
Subject: [PATCH 101/104] Explain which BOUNDs operand is which.

---
 InstructionSets/x86/Instruction.hpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 1777a1081..75ecfe73b 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -210,13 +210,14 @@ enum class Operation: uint8_t {
 	BOUND,
 
 
-	// TODO: expand detail on all operations below.
-
-	/// Create stack frame.
+	/// Create stack frame. See operand() for the nesting level and offset()
+	/// for the dynamic storage size.
 	ENTER,
 	/// Procedure exit; copies BP to SP, then pops a new BP from the stack.
 	LEAVE,
 
+	// TODO: expand detail on all operations below.
+
 	/// Inputs from a port, incrementing or decrementing the destination.
 	INS,
 	/// Outputs to a port, incrementing or decrementing the destination.

From f1c486401634b9a9e0f68c0bec940ee971eadb85 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sat, 12 Mar 2022 11:37:21 -0500
Subject: [PATCH 102/104] Eliminate INSD.

---
 InstructionSets/x86/Decoder.cpp     |  4 ++--
 InstructionSets/x86/Instruction.hpp | 18 ++++++++----------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index 58e7f01b7..bcd657a96 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -244,7 +244,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				RequiresMin(i80186);
 				Complete(INS, None, None, DataSize::Byte);
 			break;
-			case 0x6d:	// INSW
+			case 0x6d:	// INSW/INSD
 				RequiresMin(i80186);
 				Complete(INS, None, None, data_size_);
 			break;
@@ -252,7 +252,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 				RequiresMin(i80186);
 				Complete(OUTS, None, None, DataSize::Byte);
 			break;
-			case 0x6f:	// OUTSW
+			case 0x6f:	// OUTSW/OUSD
 				RequiresMin(i80186);
 				Complete(OUTS, None, None, data_size_);
 			break;
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 75ecfe73b..8cf45a768 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -88,7 +88,7 @@ enum class Operation: uint8_t {
 
 	/// Far call; see the segment() and offset() fields.
 	CALLfar,
-	/// Displacement call; followed by a 16-bit operand providing a call offset.
+	/// Relative call; see displacement().
 	CALLrel,
 	/// Near call.
 	CALLabs,
@@ -216,11 +216,12 @@ enum class Operation: uint8_t {
 	/// Procedure exit; copies BP to SP, then pops a new BP from the stack.
 	LEAVE,
 
-	// TODO: expand detail on all operations below.
-
-	/// Inputs from a port, incrementing or decrementing the destination.
+	/// Inputs a byte, word or double word from the port specified by DX, writing it to
+	/// ES:[e]DI and incrementing or decrementing [e]DI as per the
+	/// current EFLAGS DF flag.
 	INS,
-	/// Outputs to a port, incrementing or decrementing the destination.
+	/// Outputs a byte, word or double word from ES:[e]DI  to the port specified by DX,
+	/// incrementing or decrementing [e]DI as per the current EFLAGS DF flag.]
 	OUTS,
 
 	/// Pushes all general purpose registers to the stack, in the order:
@@ -234,6 +235,8 @@ enum class Operation: uint8_t {
 	// 80286 additions.
 	//
 
+	// TODO: expand detail on all operations below.
+
 	/// Adjusts requested privilege level.
 	ARPL,
 	/// Clears the task-switched flag.
@@ -312,11 +315,6 @@ enum class Operation: uint8_t {
 	/// [Early 80386s only] Insert bit string.
 	IBTS,
 
-	/// Inputs a double word the port specified by DX, writing it to
-	/// ES:[e]DI and incrementing or decrementing [e]DI as per the
-	/// current EFLAGS DF flag.
-	INSD,
-
 	/// Convert dword to qword; fills EDX with the sign bit of EAX.
 	CDQ,
 	/// Convert word to dword; AX will be expanded to fill EAX.

From fd4f85eb198f79fd033ffc270a4aa63c3a33a442 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sat, 12 Mar 2022 12:23:48 -0500
Subject: [PATCH 103/104] Add SMSW.

---
 InstructionSets/x86/Documentation/80386 opcode map.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InstructionSets/x86/Documentation/80386 opcode map.html b/InstructionSets/x86/Documentation/80386 opcode map.html
index cbf824478..3bab74c42 100644
--- a/InstructionSets/x86/Documentation/80386 opcode map.html	
+++ b/InstructionSets/x86/Documentation/80386 opcode map.html	
@@ -895,10 +895,10 @@
 				<td>SIDT Ms</td>
 				<td>LGDT Ms</td>
 				<td>LIDT Ms</td>
+				<td>SMSW Ew</td>
 				<td></td>
 				<td>LMSW Ew</td>
 				<td></td>
-				<td></td>
 			</tr>
 			<tr>
 				<th>Group 8</th>

From 1725894fe9527887aa5204012a3012aa071e08d6 Mon Sep 17 00:00:00 2001
From: Thomas Harte <thomas.harte@gmail.com>
Date: Sat, 12 Mar 2022 12:24:44 -0500
Subject: [PATCH 104/104] Eliminate redundant CMPSD, CDQ, CWDE.

Also removes IBTS for now, as I'm unclear where it should sit in the opcode map.
---
 InstructionSets/x86/Decoder.cpp     |  4 +++-
 InstructionSets/x86/Instruction.hpp | 19 ++++++-------------
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp
index bcd657a96..ae8447e14 100644
--- a/InstructionSets/x86/Decoder.cpp
+++ b/InstructionSets/x86/Decoder.cpp
@@ -301,7 +301,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 			case 0x96: Complete(XCHG, eAX, eSI, data_size_);		break;
 			case 0x97: Complete(XCHG, eAX, eDI, data_size_);		break;
 
-			case 0x98: Complete(CBW, eAX, AH, DataSize::Byte);		break;
+			case 0x98: Complete(CBW, eAX, AH, data_size_);			break;
 			case 0x99: Complete(CWD, eAX, eDX, data_size_);			break;
 			case 0x9a: Far(CALLfar);								break;
 			case 0x9b: Complete(WAIT, None, None, DataSize::None);	break;
@@ -823,7 +823,9 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
 					default: undefined();
 
 					case 0: 	operation_ = Operation::SGDT;	break;
+					case 1: 	operation_ = Operation::SIDT;	break;
 					case 2: 	operation_ = Operation::LGDT;	break;
+					case 3: 	operation_ = Operation::LIDT;	break;
 					case 4: 	operation_ = Operation::SMSW;	break;
 					case 6: 	operation_ = Operation::LMSW;	break;
 				}
diff --git a/InstructionSets/x86/Instruction.hpp b/InstructionSets/x86/Instruction.hpp
index 8cf45a768..d34acb44c 100644
--- a/InstructionSets/x86/Instruction.hpp
+++ b/InstructionSets/x86/Instruction.hpp
@@ -42,9 +42,13 @@ enum class Operation: uint8_t {
 	/// Decimal adjust after subtraction; source and destination will be AL.
 	DAS,
 
-	/// Convert byte into word; source will be AL, destination will be AH.
+	/// If data size is word, convert byte into word; source will be AL, destination will be AH.
+	/// If data size is DWord, convert word to dword; AX will be expanded to fill EAX.
+	/// In both cases, conversion will be by sign extension.
 	CBW,
-	/// Convert word to double word; source will be AX and destination will be DX.
+	/// If data size is Word, converts word to double word; source will be AX and destination will be DX.
+	/// If data size is DWord, converts double word to quad word (i.e. CDW); source will be EAX and destination will be EDX:EAX.
+	/// In both cases, conversion will be by sign extension.
 	CWD,
 
 	/// Escape, for a coprocessor; perform the bus cycles necessary to read the source and destination and perform a NOP.
@@ -310,17 +314,6 @@ enum class Operation: uint8_t {
 	/// Bit test and set.
 	BTS,
 
-	/// Compare string double word.
-	CMPSD,
-	/// [Early 80386s only] Insert bit string.
-	IBTS,
-
-	/// Convert dword to qword; fills EDX with the sign bit of EAX.
-	CDQ,
-	/// Convert word to dword; AX will be expanded to fill EAX.
-	/// Compare and contrast to CWD which would expand AX to DX:AX.
-	CWDE,
-
 	/// Move from the source to the destination, extending the source with zeros.
 	/// The instruction data size dictates the size of the source; the destination will
 	/// be either 16- or 32-bit depending on the current processor operating mode.

A	Direct address; the instruction has no MODRM field; the address of the operand is encoded in the instruction; no base register, index register, or scaling factor can be applied; e.g., far JMP (EA).
C	The reg field of the MODRM field selects a control register; e.g., MOV (0F20, 0F22).
D	The reg field of the MODRM field selects a debug register; e.g., MOV (0F21, 0F23).
E	A MODRM field follows the opcode and specifies the operand. The operand is either a general register or a memory address. If it is a memory address, the address is computed from a segment register and any of the following values: a base register, an index register, a scaling factor, a displacement.
F	Flags register
G	The reg field of the MODRM field selects a general register; e.g,. ADD (00).
I	Immediate data. The value of the operand is encoded in subsequent bytes of the instruction.
J	The instruction contains a relative offset to be added to the instruction-pointer register; e.g., JMP short, LOOP.
M	The MODRM field may refer only to memory; e.g., BOUND, LES, LDS, LSS, LFS, LGS.
O	The instruction has no MODRM field; the offset of the operand is coded as a word or dword (depending on address sie attribute) in the instruction. No base register, index register, or scaling factor can be applied; e.g., MOV (A0–A3).
R	The mod field of the MODRM field may refer only to a general register; e.g., MOV(0F20–0F24, 0F26).
S	The reg field of the MODRM field selects a segment register; e.g., MOV (8C, 8E).
T	The reg field of the MODRM field selects a test register; e.g., MOV (0F24, 0F26).
X	Memory addressed by DS:SI; e.g., MOVS, COMPS, OUTS, LODS, SCAS.
Y	Memory addressed by ES:DI; e.g., MOVS, CMPS, INS, STOS.
a	Two one-word operands in memory or two dword operands in memory, depending on operand size attribute (used only by BOUND).
b	Byte (regardless of operand size attribute).
c	Byte or word, depending on operand size attribute.
d	Dword (regardless of operand size attribute).
p	32-bit or 48-bit pointer, depending on operand size attribute.
s	Six-byte pesudo-descriptor.
v	Word or dword, depending on operand size attribute.
w	Word (regardless of operand size attribute).
	0	1	2	3	4	5	6	7	8	9	A	B	C	D	E	F
0	ADD						PUSH ES	POP ES	OR						PUSH CS	2-byte escape codes
0	Eb, Gb	Ev, Gv	Gb, Eb	Gv, Ev	AL, Ib	eAX, Iv	PUSH ES	POP ES	Eb, Gb	Ev, Gv	Gb, Eb	Gv, Ev	AL, Ib	eAX, Iv	PUSH CS	2-byte escape codes
1	ADC						PUSH SS	POP SS	SBB						PUSH DS	POP DS
1	Eb, Gb	Ev, Gv	Gb, Eb	Gv, Ev	AL, Ib	eAX, Iv	PUSH SS	POP SS	Eb, Gb	Ev, Gv	Gb, Eb	Gv, Ev	AL, Ib	eAX, Iv	PUSH DS	POP DS
2	AND						SEG =ES	POP ES	SUB						SEG =CS	DAS
2	Eb, Gb	Ev, Gv	Gb, Eb	Gv, Ev	AL, Ib	eAX, Iv	SEG =ES	POP ES	Eb, Gb	Ev, Gv	Gb, Eb	Gv, Ev	AL, Ib	eAX, Iv	SEG =CS	DAS
3	XOR						SEG =SS	AAA	CMP						SEG =DS	AAS
3	Eb, Gb	Ev, Gv	Gb, Eb	Gv, Ev	AL, Ib	eAX, Iv	SEG =SS	AAA	Eb, Gb	Ev, Gv	Gb, Eb	Gv, Ev	AL, Ib	eAX, Iv	SEG =DS	AAS
4	INC general register								DEC general register
4	eAX	eCX	eDX	eBX	eSP	eBP	eSI	eDI	eAX	eCX	eDX	eBX	eSP	eBP	eSI	eDI
5	PUSH general register								POP general register
5	eAX	eCX	eDX	eBX	eSP	eBP	eSI	eDI	eAX	eCX	eDX	eBX	eSP	eBP	eSI	eDI
6	PUSHA	POPA	BOUND Gv, Ma	ARPL Gv, Ma	SEG =FS	SEG =GS	Operand Size	Address Size	PUSH Iv	IMUL GvEvIv	PUSH Ib	IMUL GvEvIb	INSB Yb, Dx	INSW/D Yv, Dx	OUTSB Dx, Xb	OUTSW/D Dx, Xb
6	PUSHA	POPA	BOUND Gv, Ma	ARPL Gv, Ma	SEG =FS	SEG =GS	Operand Size	Address Size	PUSH Iv	IMUL GvEvIv	PUSH Ib	IMUL GvEvIb	INSB Yb, Dx	INSW/D Yv, Dx	OUTSB Dx, Xb	OUTSW/D Dx, Xb
7	Short-displacement jump on condition (Jb)
7	JO	JNO	JB	JNB	JZ	JNZ	JBE	JNBE	JS	JNS	JP	JNP	JP	JNL	JLE	JNLE
8	Immediate Grp1			Grp1 Ev, Ib	TEST		XCHG		MOV				MOV Ew, Sw	LEA Gv, M	MOV Sw, Ew	POP Ev
8	Eb, Ib	Ev, Iv		Grp1 Ev, Ib	Eb, Gb	Ev, Gv	Eb, Gb	Ev, Gv	Eb, Gb	Ev, Gv	Gb, Eb	Gv, Ev	MOV Ew, Sw	LEA Gv, M	MOV Sw, Ew	POP Ev
9	NOP	XCHG word or double-word register with eAX							CBW	CWD	CALL Ap	WAIT	PUSHF Fv	POPF Fv	SAHF	LAHF
9	NOP	eCX	eDX	eBX	eSP	eBP	eSI	eDI	CBW	CWD	CALL Ap	WAIT	PUSHF Fv	POPF Fv	SAHF	LAHF
A	MOV				MOVSB Xb, Yv	MOVSW/D Xv, Yv	CMPSB Xb, Yb	CMPSW/D Xv, Yv	TEST		STOSB Yb, AL	STOSW/D Yv, eAX	LDSB AL, Xb	LDSW/D eAX, Yv	SCASB AL, Xb	SCASW/D eAX, Xv
A	AL, Ob	eAX, Ov	Ob, AL	Ov, eAX	MOVSB Xb, Yv	MOVSW/D Xv, Yv	CMPSB Xb, Yb	CMPSW/D Xv, Yv	AL, Ib	eAX, Iv	STOSB Yb, AL	STOSW/D Yv, eAX	LDSB AL, Xb	LDSW/D eAX, Yv	SCASB AL, Xb	SCASW/D eAX, Xv
B	MOV immediate byte into byte register								MOV immediate word or double into word or double register
B	AL	CL	DL	BL	AH	CH	DH	BH	eAX	eCX	eDX	eBX	eSP	eBP	eSI	eDI
C	Shift Grp2		RET near		LES Gv, Mp	LDS Gv, Mp	MOV		ENTER	LEAVE	RET far		INT 3	INT Ib	INTO	IRET
C	Eb, Ib	Ev, Iv	Iw		LES Gv, Mp	LDS Gv, Mp	Eb, Ib	Ev, Iv	ENTER	LEAVE	Iw		INT 3	INT Ib	INTO	IRET
D	Shift Grp2				AAM	AAD		XLAT	ESC (Escape to coprocessor instruction set)
D	Eb, 1	Ev, 1	Eb, CL	Ev, CL	AAM	AAD		XLAT	ESC (Escape to coprocessor instruction set)
E	LOOPNE Jb	LOOPE Jb	LOOP Jb	JCXZ Jb	IN		OUT		CALL Av	JMP			IN		OUT
E	LOOPNE Jb	LOOPE Jb	LOOP Jb	JCXZ Jb	AL, Ib	eAX, Ib	Ib, AL	Ib, eAX	CALL Av	Jv	Ap	Jb	AL, DX	eAX, DX	DX, AL	DX, eAX
F	LOCK		REPNE	REP / REPE	HLT	CMC	Unary Grp3		CLC	STC	CLI	STI	CLD	STD	INC/DEC Grp4	Indirect Grp5
F	LOCK		REPNE	REP / REPE	HLT	CMC	Eb	Ev	CLC	STC	CLI	STI	CLD	STD	INC/DEC Grp4	Indirect Grp5
	0	1	2	3	4	5	6	7	8	9	A	B	C	D	E	F
0	Grp6	Grp7	LAR Gv, Ew	LSL Gv, Ew			CLTS
0	Grp6	Grp7	LAR Gv, Ew	LSL Gv, Ew			CLTS
1
1
2	MOV Cr, Rd	MOV Dd, Rd	MOV Rd, Cd	MOV Rd, Dd	MOV Td, Rd		MOV Rd, Td
2	MOV Cr, Rd	MOV Dd, Rd	MOV Rd, Cd	MOV Rd, Dd	MOV Td, Rd		MOV Rd, Td
≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈
≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈
8	Long-displacement jump on condition (Jv)
8	JO	JNO	JB	JNB	JZ	JNZ	JBE	JNBE	JS	JNS	JP	JNP	JP	JNL	JLE	JNLE
9	Byte set on condition (Eb)								SETS	SETNS	SETP	SETNP	SETL	SETNL	SETLE	SETNLE
9	SETO	SETNO	SETB	SETNB	SETZ	SETNZ	SETBE	SETNBE	SETS	SETNS	SETP	SETNP	SETL	SETNL	SETLE	SETNLE
A	PUSH FS	POP FS		BT Ev, Gv	SHLD EvGvIb	SHLD EvGcCL			PUSH GS	POP GS		BTS Ev, Gv	SHRD EvGvIb	SHRD EvGvCL		IMUL Gv, Ev
A	PUSH FS	POP FS		BT Ev, Gv	SHLD EvGvIb	SHLD EvGcCL			PUSH GS	POP GS		BTS Ev, Gv	SHRD EvGvIb	SHRD EvGvCL		IMUL Gv, Ev
B			LSS Mp	BTR Ev, Gv	LFS Mp	LGS Mp	MOVZX				Grp8 Ev, Ib	BTC Ev, Gv	BSF Gv, Ev	BSR Gv, Ev	MOVSX
B			LSS Mp	BTR Ev, Gv	LFS Mp	LGS Mp	Gv, Eb	Gv, Ew			Grp8 Ev, Ib	BTC Ev, Gv	BSF Gv, Ev	BSR Gv, Ev	Gv, Eb	Gv, Ew
≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈
≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈	≈
F
F
	000	001	010	011	100	101	110	111
Group 1	ADD	OR	ADC	SBB	AND	SUB	XOR	CMP
Group 2	ROL	ROR	RCL	RCR	SHL	SHR		SAR
Group 3	TEST Ib/Iv		NOT	NEG	MUL AL/eAX	IMUL AL/EAX	DIV AL/eAX	IDIV AL/eAX
Group 4	INC Eb	DEC Eb
Group 5	INC Ev	DEC Ev	CALL Ev	CALL Ep	JMP Ev	JMP Ep	PUSH Ev
Group 6	SLDT Ew	STR Ew	LLDT Ew	LTR Ew	VERR Ew	VERW Ew
Group 7	SGDT Ms	SIDT Ms	LGDT Ms	LIDT Ms		LMSW Ew
Group 6					BT	BTS	BTR	BTC