1
0
mirror of https://github.com/TomHarte/CLK.git synced 2024-11-26 08:49:37 +00:00

Switch to intended compact version of Instruction.

This commit is contained in:
Thomas Harte 2022-03-10 15:14:50 -05:00
parent 6dc9973754
commit 673ffc50da
4 changed files with 185 additions and 96 deletions

View File

@ -830,6 +830,7 @@ std::pair<int, typename Decoder<model>::InstructionT> Decoder<model>::decode(con
if(expects_sib && (source_ == Source::Indirect | destination_ == Source::Indirect)) {
phase_ = Phase::ScaleIndexBase;
// TODO: test for IndirectNoBase.
} else {
phase_ = (displacement_size_ != DataSize::None || operand_size_ != DataSize::None) ? Phase::DisplacementOrOperand : Phase::ReadyToPost;
}

View File

@ -434,6 +434,23 @@ enum class Source: uint8_t {
IndirectNoBase = Indirect - 1,
};
constexpr Source default_data_segment([[maybe_unused]] Operation operation) {
// TODO: is this really necessary, or can ::DS always be default?
// i.e. can the stack operations actually take a segment override?
// If not then the actual implementations just won't ask about a segment.
// constexpr std::set<Operation> stack_ops = {
//
// };
//
// if(
// operation == Operation::PUSH ||
// operation == Operation::PUSHF ||
// operation == Operation::POP) {
// return Source::SS;
// }
return Source::DS;
}
enum class Repetition: uint8_t {
None, RepE, RepNE
};
@ -497,6 +514,10 @@ class ScaleIndexBase {
);
}
operator uint8_t() const {
return sib_;
}
private:
// Data is stored directly as an 80386 SIB byte.
uint8_t sib_ = 0;
@ -571,109 +592,147 @@ class DataPointer {
template<bool is_32bit> class Instruction {
public:
Operation operation = Operation::Invalid;
const Operation operation = Operation::Invalid;
bool operator ==(const Instruction &rhs) const {
return
repetition_size_ == rhs.repetition_size_ &&
sources_ == rhs.sources_ &&
displacement_ == rhs.displacement_ &&
operand_ == rhs.operand_ &&
sib_ == rhs.sib_ &&
length_ == rhs.length_;
bool operator ==(const Instruction<is_32bit> &rhs) const {
if( operation != rhs.operation ||
mem_exts_source_ != rhs.mem_exts_source_ ||
source_data_dest_sib_ != rhs.source_data_dest_sib_) {
return false;
}
// Have already established above that this and RHS have the
// same extensions, if any.
const int extension_count = has_length_extension() + has_displacement() + has_operand();
for(int c = 0; c < extension_count; c++) {
if(extensions_[c] != rhs.extensions_[c]) return false;
}
return true;
}
using DisplacementT = typename std::conditional<is_32bit, int32_t, int16_t>::type;
using ImmediateT = typename std::conditional<is_32bit, uint32_t, uint16_t>::type;
using AddressT = ImmediateT;
/* Note to self — current thinking is:
First 32bits:
5 bits source;
5 bits dest;
5 bits partial SIB, combined with three low bits of source or dest if indirect;
8 bits operation;
4 bits original instruction size;
2 bits data size;
1 bit memory size;
2 bits extension flags.
Extensions (16 or 32 bit, depending on templated size):
1) reptition + segment override + lock + original instruction size (= 10 bits);
2) displacement;
3) immediate operand.
Presence or absence of extensions is dictated by:
* instruction size = 0 => the repetition, etc extension (including the real extension size); and
* the extension flags for displacement and/or immediate.
Therefore an instruction's footprint is:
* 48 bytes (16-bit processors);
* 412 bytes (32-bit processors).
I'll then implement a collection suited to packing these things based on their
packing_size(), and later iterating them.
To verify: the 8086 allows unlimited-length instructions (which I'll probably handle by
generating length-15 NOPs and not resetting parser state), the 80386 limits them to
15 bytes, but what do the processors in between do?
*/
private:
// b0, b1: a Repetition;
// b2+: operation size.
uint8_t repetition_size_ = 0;
// Packing and encoding of fields is admittedly somewhat convoluted; what this
// achieves is that instructions will be sized:
//
// four bytes + up to three extension words
// (two bytes for 16-bit instructions, four for 32)
//
// Two of the extension words are used to retain an operand and displacement
// if the instruction has those. The other can store sizes greater than 15
// bytes (for earlier processors), plus any repetition, segment override or
// repetition prefixes.
// b0b5: source;
// b6b11: destination;
// b12b14: segment override;
// b15: lock.
uint16_t sources_ = 0;
// b7: address size;
// b6: has displacement;
// b5: has operand;
// [b4, b0]: source.
const uint8_t mem_exts_source_ = 0xff;
// Unpackable fields.
DisplacementT displacement_ = 0;
ImmediateT operand_ = 0; // ... or used to store a segment for far operations.
bool has_displacement() const {
return mem_exts_source_ & (1 << 6);
}
bool has_operand() const {
return mem_exts_source_ & (1 << 5);
}
// Fields yet to be properly incorporated...
ScaleIndexBase sib_;
AddressSize address_size_ = AddressSize::b16;
int length_ = 0;
// [b15, b14]: data size;
// [b13, b10]: source length (0 => has length extension);
// [b9, b5]: top five of SIB;
// [b4, b0]: dest.
const uint16_t source_data_dest_sib_ = 0xffff;
bool has_length_extension() const {
return !((source_data_dest_sib_ >> 10) & 15);
}
// {length extension}, {operand}, {displacement}.
// If length extension is present then:
//
// [b15, b6]: source length;
// [b5, b4]: repetition;
// [b3, b1]: segment override;
// b0: lock.
ImmediateT extensions_[3];
public:
/// @returns The number of bytes used for meaningful content within this class. A receiver must use at least @c sizeof(Instruction) bytes
/// to store an @c Instruction but is permitted to reuse the trailing sizeof(Instruction) - packing_size() for any purpose it likes. Teleologically,
/// this allows a denser packing of instructions into containers.
size_t packing_size() const { return sizeof(*this); /* TODO */ }
size_t packing_size() const {
return
offsetof(Instruction<is_32bit>, extensions) +
(has_displacement() + has_operand() + has_length_extension()) * sizeof(ImmediateT);
}
DataPointer source() const { return DataPointer(Source(sources_ & 0x3f), sib_); }
DataPointer destination() const { return DataPointer(Source((sources_ >> 6) & 0x3f), sib_); }
bool lock() const { return sources_ & 0x8000; }
private:
// A lookup table to help with stripping parts of the SIB that have been
// hidden within the source/destination fields.
static constexpr uint8_t sib_masks[] = {
0x1f, 0x1f, 0x1f, 0x18
};
public:
DataPointer source() const {
return DataPointer(
Source(mem_exts_source_ & sib_masks[(mem_exts_source_ >> 3) & 3]),
((source_data_dest_sib_ >> 2) & 0xf8) | (mem_exts_source_ & 0x07)
);
}
DataPointer destination() const {
return DataPointer(
Source(source_data_dest_sib_ & sib_masks[(source_data_dest_sib_ >> 3) & 3]),
((source_data_dest_sib_ >> 2) & 0xf8) | (source_data_dest_sib_ & 0x07)
);
}
bool lock() const {
return has_length_extension() && extensions_[0]&1;
}
AddressSize address_size() const {
return AddressSize(address_size_);
return AddressSize(mem_exts_source_ >> 7);
}
Source data_segment() const {
const auto segment_override = Source(
if(!has_length_extension()) return default_data_segment(operation);
return Source(
int(Source::ES) +
((sources_ >> 12) & 7)
((extensions_[0] >> 1) & 7)
);
if(segment_override != Source::None) return segment_override;
// TODO: default source should be SS for anything touching the stack.
return Source::DS;
}
Repetition repetition() const { return Repetition(repetition_size_ & 3); }
DataSize operation_size() const { return DataSize(repetition_size_ >> 2); }
Repetition repetition() const {
if(!has_length_extension()) return Repetition::None;
return Repetition((extensions_[0] >> 4) & 3);
}
DataSize operation_size() const {
return DataSize(source_data_dest_sib_ >> 14);
}
uint16_t segment() const { return uint16_t(operand_); }
DisplacementT offset() const { return displacement_; }
int length() const {
const int short_length = (source_data_dest_sib_ >> 10) & 15;
if(short_length) return short_length;
return extensions_[0] >> 6;
}
DisplacementT displacement() const { return displacement_; }
ImmediateT operand() const { return operand_; }
ImmediateT operand() const {
const ImmediateT ops[] = {0, extensions_[has_length_extension()]};
return ops[has_operand()];
}
DisplacementT displacement() const {
return DisplacementT(offset());
}
int length() const { return length_; }
uint16_t segment() const {
return uint16_t(operand());
}
ImmediateT offset() const {
const ImmediateT offsets[] = {0, extensions_[has_length_extension() + has_operand()]};
return offsets[has_displacement()];
}
Instruction() noexcept {}
Instruction(
@ -685,27 +744,46 @@ template<bool is_32bit> class Instruction {
AddressSize address_size,
Source segment_override,
Repetition repetition,
DataSize operation_size,
DataSize data_size,
DisplacementT displacement,
ImmediateT operand,
int length) noexcept :
operation(operation),
repetition_size_(uint8_t((int(operation_size) << 2) | int(repetition))),
sources_(uint16_t(
mem_exts_source_(uint8_t(
(int(address_size) << 7) |
(displacement ? 0x40 : 0x00) |
(operand ? 0x20 : 0x00) |
int(source) |
(int(destination) << 6) |
((int(segment_override) & 7) << 12) |
(int(lock) << 15)
(source == Source::Indirect ? (uint8_t(sib) & 7) : 0)
)),
displacement_(displacement),
operand_(operand),
sib_(sib),
address_size_(address_size),
length_(length) {}
source_data_dest_sib_(uint16_t(
(int(data_size) << 14) |
((
(lock || (segment_override != Source::None) || (length > 15) || (repetition != Repetition::None))
) ? 0 : (length << 10)) |
((uint8_t(sib) & 0xf8) << 2) |
int(destination) |
(destination == Source::Indirect ? (uint8_t(sib) & 7) : 0)
)) {
int extension = 0;
if(has_length_extension()) {
if(segment_override == Source::None) segment_override = default_data_segment(operation);
extensions_[extension] = ImmediateT(
(length << 6) | (int(repetition) << 4) | ((int(segment_override) & 7) << 1) | int(lock)
);
++extension;
}
if(has_operand()) {
extensions_[extension] = operand;
++extension;
}
extensions_[extension] = ImmediateT(displacement);
}
};
// TODO: repack.
//static_assert(sizeof(Instruction) <= 8);
static_assert(sizeof(Instruction<true>) <= 16);
static_assert(sizeof(Instruction<false>) <= 10);
}
}

View File

@ -18,14 +18,6 @@ using namespace InstructionSet::x86;
@implementation x86DataPointerTests
//- (InstructionSet::x86::Instruction<false>)instruction16WithSourceDataPointer:(DataPointer)pointer {
// return x86::Instruction<false>{
// InstructionSet::x86::Operation::AAA,
// S
// };
//}
- (void)test16bitSize1 {
const DataPointer indirectPointer(
Source::eAX, Source::eDI, 0

View File

@ -539,4 +539,22 @@ std::vector<typename InstructionSet::x86::Decoder<model>::InstructionT> decode(c
test(instructions[63], Operation::JPCX, 0, 0xd4 - 0x9d);
}
- (void)testSourceSIB1 {
const auto instructions = decode<Model::i80386>({
0x62, 0x90, 0x90, 0xdf, 0xcd, 0xf9
}, true);
XCTAssertEqual(instructions.size(), 1);
test(instructions[0], DataSize::DWord, Operation::BOUND, ScaleIndexBase(Source::eAX), Source::eDX, 0, -0x6322070);
}
- (void)testSourceSIB2 {
const auto instructions = decode<Model::i80386>({
0x81, 0x47, 0xbe, 0xa9, 0x3a, 0x68, 0x9f
}, true);
XCTAssertEqual(instructions.size(), 1);
test(instructions[0], DataSize::DWord, Operation::ADD, Source::Immediate, ScaleIndexBase(Source::eDI), 0x9f683aa9, -0x42);
}
@end