1
0
mirror of https://github.com/TomHarte/CLK.git synced 2024-07-04 18:29:40 +00:00

Refactor: (i) to expose effective address calculation; and (ii) to include address size in Instruction.

This commit is contained in:
Thomas Harte 2022-03-01 09:36:37 -05:00
parent b920507f34
commit 2c816db45e
2 changed files with 213 additions and 161 deletions

View File

@ -39,6 +39,63 @@ enum class Register: uint8_t {
None
};
template <typename DataT> constexpr Register register_for_source(Source source) {
static_assert(sizeof(DataT) == 4 || sizeof(DataT) == 2 || sizeof(DataT) == 1);
if constexpr (sizeof(DataT) == 4) {
switch(source) {
case Source::eAX: return Register::EAX;
case Source::eCX: return Register::ECX;
case Source::eDX: return Register::EDX;
case Source::eBX: return Register::EBX;
case Source::eSPorAH: return Register::ESP;
case Source::eBPorCH: return Register::EBP;
case Source::eSIorDH: return Register::ESI;
case Source::eDIorBH: return Register::EDI;
default: break;
}
}
if constexpr (sizeof(DataT) == 2) {
switch(source) {
case Source::eAX: return Register::AX;
case Source::eCX: return Register::CX;
case Source::eDX: return Register::DX;
case Source::eBX: return Register::BX;
case Source::eSPorAH: return Register::SP;
case Source::eBPorCH: return Register::BP;
case Source::eSIorDH: return Register::SI;
case Source::eDIorBH: return Register::DI;
case Source::ES: return Register::ES;
case Source::CS: return Register::CS;
case Source::SS: return Register::SS;
case Source::DS: return Register::DS;
case Source::FS: return Register::FS;
case Source::GS: return Register::GS;
default: break;
}
}
if constexpr (sizeof(DataT) == 1) {
switch(source) {
case Source::eAX: return Register::AL;
case Source::eCX: return Register::CL;
case Source::eDX: return Register::DL;
case Source::eBX: return Register::BL;
case Source::eSPorAH: return Register::AH;
case Source::eBPorCH: return Register::CH;
case Source::eSIorDH: return Register::DH;
case Source::eDIorBH: return Register::BH;
default: break;
}
}
return Register::None;
}
/// Reads from or writes to the source or target identified by a DataPointer, relying upon two user-supplied classes:
///
/// * a register bank; and
@ -51,26 +108,28 @@ enum class Register: uint8_t {
/// `template<typename DataT> void write(Source segment, uint32_t address, DataT value)`.
template <Model model, typename RegistersT, typename MemoryT> class DataPointerResolver {
public:
public:
/// Reads the data pointed to by @c pointer, referencing @c instruction, @c memory and @c registers as necessary.
template <typename DataT> static DataT read(
RegistersT &registers,
MemoryT &memory,
const Instruction<is_32bit(model)> &instruction,
DataPointer pointer,
typename Instruction<is_32bit(model)>::AddressT memory_mask = ~0) {
DataT result;
access<true>(registers, memory, instruction, pointer, memory_mask, result);
return result;
}
DataPointer pointer);
/// Writes @c value to the data pointed to by @c pointer, referencing @c instruction, @c memory and @c registers as necessary.
template <typename DataT> static void write(
RegistersT &registers,
MemoryT &memory,
const Instruction<is_32bit(model)> &instruction,
DataPointer pointer,
DataT value,
typename Instruction<is_32bit(model)>::AddressT memory_mask = ~0) {
access<false>(registers, memory, instruction, pointer, memory_mask, value);
}
DataT value);
/// Computes the effective address of @c pointer including any displacement applied by @c instruction.
/// @c pointer must be of type Source::Indirect.
static uint32_t effective_address(
RegistersT &registers,
const Instruction<is_32bit(model)> &instruction,
DataPointer pointer);
private:
template <bool is_write, typename DataT> static void access(
@ -78,150 +137,139 @@ template <Model model, typename RegistersT, typename MemoryT> class DataPointerR
MemoryT &memory,
const Instruction<is_32bit(model)> &instruction,
DataPointer pointer,
typename Instruction<is_32bit(model)>::AddressT memory_mask,
DataT &value) {
assert(memory_mask == 0xffff'ffff || memory_mask == 0xffff);
const Source source = pointer.source();
#define read_or_write(v, x, is_for_indirection) \
case Source::x: \
if constexpr (!is_for_indirection && is_write) { \
registers.template write<decltype(v), register_for_source<decltype(v)>(Source::x)>(v); \
} else { \
v = registers.template read<decltype(v), register_for_source<decltype(v)>(Source::x)>(); \
} \
break;
#define ALLREGS(v) f(v, eAX); f(v, eCX); f(v, eDX); f(v, eBX); \
f(v, eSPorAH); f(v, eBPorCH); f(v, eSIorDH); f(v, eDIorBH); \
f(v, ES); f(v, CS); f(v, SS); f(v, DS); f(v, FS); f(v, GS);
switch(source) {
default:
if constexpr (!is_write) {
value = 0;
}
return;
#define f(x, y) read_or_write(x, y, false)
ALLREGS(value);
#undef f
case Source::DirectAddress:
if constexpr(is_write) {
memory.template write<DataT>(instruction.data_segment(), instruction.displacement(), value);
} else {
value = memory.template read<DataT>(instruction.data_segment(), instruction.displacement());
}
break;
case Source::Immediate:
value = DataT(instruction.operand());
break;
case Source::Indirect: {
using AddressT = typename Instruction<is_32bit(model)>::AddressT;
AddressT base = 0, index = 0;
#define f(x, y) read_or_write(x, y, true)
switch(pointer.base()) {
default: break;
ALLREGS(base);
}
switch(pointer.index()) {
default: break;
ALLREGS(index);
}
#undef f
// Always compute address as 32-bit.
// TODO: verify application of memory_mask here.
// The point of memory_mask is that 32-bit x86 offers the memory size modifier,
// permitting 16-bit addresses to be generated in 32-bit mode and vice versa.
// To figure out is at what point in the calculation the 16-bit constraint is
// applied when active.
uint32_t address = index;
if constexpr (model >= Model::i80386) {
address <<= pointer.scale();
} else {
assert(!pointer.scale());
}
address = (address & memory_mask) + (base & memory_mask) + instruction.displacement();
if constexpr (is_write) {
value = memory.template read<DataT>(
instruction.data_segment(),
address
);
} else {
memory.template write<DataT>(
instruction.data_segment(),
address,
value
);
}
}
}
#undef ALLREGS
}
template <typename DataT> constexpr static Register register_for_source(Source source) {
if constexpr (sizeof(DataT) == 4) {
switch(source) {
case Source::eAX: return Register::EAX;
case Source::eCX: return Register::ECX;
case Source::eDX: return Register::EDX;
case Source::eBX: return Register::EBX;
case Source::eSPorAH: return Register::ESP;
case Source::eBPorCH: return Register::EBP;
case Source::eSIorDH: return Register::ESI;
case Source::eDIorBH: return Register::EDI;
default: break;
}
}
if constexpr (sizeof(DataT) == 2) {
switch(source) {
case Source::eAX: return Register::AX;
case Source::eCX: return Register::CX;
case Source::eDX: return Register::DX;
case Source::eBX: return Register::BX;
case Source::eSPorAH: return Register::SP;
case Source::eBPorCH: return Register::BP;
case Source::eSIorDH: return Register::SI;
case Source::eDIorBH: return Register::DI;
case Source::ES: return Register::ES;
case Source::CS: return Register::CS;
case Source::SS: return Register::SS;
case Source::DS: return Register::DS;
case Source::FS: return Register::FS;
case Source::GS: return Register::GS;
default: break;
}
}
if constexpr (sizeof(DataT) == 1) {
switch(source) {
case Source::eAX: return Register::AL;
case Source::eCX: return Register::CL;
case Source::eDX: return Register::DL;
case Source::eBX: return Register::BL;
case Source::eSPorAH: return Register::AH;
case Source::eBPorCH: return Register::CH;
case Source::eSIorDH: return Register::DH;
case Source::eDIorBH: return Register::BH;
default: break;
}
}
return Register::None;
}
DataT &value);
};
//
// Implementation begins here.
//
template <Model model, typename RegistersT, typename MemoryT>
template <typename DataT> DataT DataPointerResolver<model, RegistersT, MemoryT>::read(
RegistersT &registers,
MemoryT &memory,
const Instruction<is_32bit(model)> &instruction,
DataPointer pointer) {
DataT result;
access<true>(registers, memory, instruction, pointer, result);
return result;
}
template <Model model, typename RegistersT, typename MemoryT>
template <typename DataT> void DataPointerResolver<model, RegistersT, MemoryT>::write(
RegistersT &registers,
MemoryT &memory,
const Instruction<is_32bit(model)> &instruction,
DataPointer pointer,
DataT value) {
access<false>(registers, memory, instruction, pointer, value);
}
#define rw(v, r, is_write) \
case Source::r: { \
if constexpr (is_write) { \
registers.template write<decltype(v), register_for_source<decltype(v)>(Source::r)>(v); \
} else { \
v = registers.template read<decltype(v), register_for_source<decltype(v)>(Source::r)>(); \
} \
} break;
#define ALLREGS(v, i) rw(v, eAX, i); rw(v, eCX, i); \
rw(v, eDX, i); rw(v, eBX, i); \
rw(v, eSPorAH, i); rw(v, eBPorCH, i); \
rw(v, eSIorDH, i); rw(v, eDIorBH, i); \
rw(v, ES, i); rw(v, CS, i); \
rw(v, SS, i); rw(v, DS, i); \
rw(v, FS, i); rw(v, GS, i);
template <Model model, typename RegistersT, typename MemoryT>
uint32_t DataPointerResolver<model, RegistersT, MemoryT>::effective_address(
RegistersT &registers,
const Instruction<is_32bit(model)> &instruction,
DataPointer pointer) {
using AddressT = typename Instruction<is_32bit(model)>::AddressT;
AddressT base = 0, index = 0;
switch(pointer.base()) {
default: break;
ALLREGS(base, false);
}
switch(pointer.index()) {
default: break;
ALLREGS(index, false);
}
// Always compute address as 32-bit.
// TODO: verify application of memory_mask around here.
// The point of memory_mask is that 32-bit x86 offers the memory size modifier,
// permitting 16-bit addresses to be generated in 32-bit mode and vice versa.
// To figure out is at what point in the calculation the 16-bit constraint is
// applied when active.
uint32_t address = index;
if constexpr (model >= Model::i80386) {
address <<= pointer.scale();
} else {
assert(!pointer.scale());
}
constexpr uint32_t memory_masks[] = {0x0000'ffff, 0xffff'ffff};
const uint32_t memory_mask = memory_masks[instruction.address_size_is_32()];
address = (address & memory_mask) + (base & memory_mask) + instruction.displacement();
return address;
}
template <Model model, typename RegistersT, typename MemoryT>
template <bool is_write, typename DataT> void DataPointerResolver<model, RegistersT, MemoryT>::access(
RegistersT &registers,
MemoryT &memory,
const Instruction<is_32bit(model)> &instruction,
DataPointer pointer,
DataT &value) {
const Source source = pointer.source();
switch(source) {
default:
if constexpr (!is_write) {
value = 0;
}
return;
ALLREGS(value, is_write);
case Source::DirectAddress:
if constexpr(is_write) {
memory.template write<DataT>(instruction.data_segment(), instruction.displacement(), value);
} else {
value = memory.template read<DataT>(instruction.data_segment(), instruction.displacement());
}
break;
case Source::Immediate:
value = DataT(instruction.operand());
break;
case Source::Indirect: {
const auto address = effective_address(registers, instruction, pointer);
if constexpr (is_write) {
value = memory.template read<DataT>(
instruction.data_segment(),
address
);
} else {
memory.template write<DataT>(
instruction.data_segment(),
address,
value
);
}
}
}
}
#undef ALLREGS
#undef read_or_write
}
}

View File

@ -522,14 +522,18 @@ template<bool is_32bit> class Instruction {
8 bits operation;
4 bits original instruction size;
2 bits data size;
3 bits extension flags.
1 bit memory size;
2 bits extension flags.
Extensions (16 or 32 bit, depending on templated size):
1) reptition + segment override + lock + memory size toggle (= 7 bits);
1) reptition + segment override + lock + original instruction size (= 10 bits);
2) displacement;
3) immediate operand.
Presence or absence of extensions is dictated by the extention flags.
Presence or absence of extensions is dictated by:
* instruction size = 0 => the repetition, etc extension (including the real extension size); and
* the extension flags for displacement and/or immediate.
Therefore an instruction's footprint is:
* 48 bytes (16-bit processors);
* 412 bytes (32-bit processors).
@ -537,9 +541,9 @@ template<bool is_32bit> class Instruction {
I'll then implement a collection suited to packing these things based on their
packing_size(), and later iterating them.
To verify: do the 8086 and 80286 limit instructions to 15 bytes as later members
of the family do? If not then consider original instruction size = 0 to imply an
extension of one word prior to the other extensions.
To verify: the 8086 allows unlimited-length instructions (which I'll probably handle by
generating length-15 NOPs and not resetting parser state), the 80386 limits them to
15 bytes, but what do the processors in between do?
*/
private:
@ -570,7 +574,7 @@ template<bool is_32bit> class Instruction {
DataPointer source() const { return DataPointer(Source(sources_ & 0x3f), sib_); }
DataPointer destination() const { return DataPointer(Source((sources_ >> 6) & 0x3f), sib_); }
bool lock() const { return sources_ & 0x8000; }
bool address_size() const { return address_size_; }
bool address_size_is_32() const { return address_size_; }
Source data_segment() const {
const auto segment_override = Source((sources_ >> 12) & 7);
if(segment_override != Source::None) return segment_override;
@ -586,8 +590,8 @@ template<bool is_32bit> class Instruction {
uint16_t segment() const { return uint16_t(operand_); }
uint16_t offset() const { return uint16_t(displacement_); }
DisplacementT displacement() const { return displacement_; }
ImmediateT operand() const { return operand_; }
DisplacementT displacement() const { return displacement_; }
ImmediateT operand() const { return operand_; }
Instruction() noexcept {}
Instruction(