From f92ffddb8243ad6c1e39ab512f8e3d4af1ed23ac Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Thu, 10 Mar 2022 20:47:56 -0500 Subject: [PATCH] Add instruction length limits. --- InstructionSets/x86/Decoder.cpp | 26 ++++++++++++++++++-------- InstructionSets/x86/Decoder.hpp | 21 ++++++++++++++++----- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/InstructionSets/x86/Decoder.cpp b/InstructionSets/x86/Decoder.cpp index dd016adb2..dce0ca9ed 100644 --- a/InstructionSets/x86/Decoder.cpp +++ b/InstructionSets/x86/Decoder.cpp @@ -14,15 +14,17 @@ using namespace InstructionSet::x86; -// TODO: instruction length limits: -// -// 8086/80186: none -// 80286: 10 bytes -// 80386: 15 bytes - template std::pair::InstructionT> Decoder::decode(const uint8_t *source, size_t length) { - const uint8_t *const end = source + length; + // Instruction length limits: + // + // 8086/80186: none + // 80286: 10 bytes + // 80386: 15 bytes + constexpr int max_instruction_length = model >= Model::i80386 ? 15 : (model == Model::i80286 ? 10 : 0); + + const uint8_t *const buffer_end = source + length; + const uint8_t *const end = max_instruction_length ? std::min(buffer_end, source + max_instruction_length - consumed_) : buffer_end; // MARK: - Prefixes (if present) and the opcode. @@ -436,6 +438,7 @@ std::pair::InstructionT> Decoder::decode(con } // MARK: - Additional F page of instructions. + if(phase_ == Phase::InstructionPageF && source != end) { // Update the instruction acquired. const uint8_t instr = *source; @@ -861,7 +864,7 @@ std::pair::InstructionT> Decoder::decode(con // MARK: - Displacement and operand. - if(phase_ == Phase::DisplacementOrOperand && source != end) { + if(phase_ == Phase::DisplacementOrOperand) { const auto required_bytes = int(byte_size(displacement_size_) + byte_size(operand_size_)); const int outstanding_bytes = required_bytes - operand_bytes_; @@ -928,6 +931,13 @@ std::pair::InstructionT> Decoder::decode(con return result; } + // Check for a too-long instruction. + if(max_instruction_length && consumed_ == max_instruction_length) { + const auto result = std::make_pair(consumed_, InstructionT()); + reset_parsing(); + return result; + } + // i.e. not done yet. return std::make_pair(0, InstructionT()); } diff --git a/InstructionSets/x86/Decoder.hpp b/InstructionSets/x86/Decoder.hpp index 6c837df2a..4491ce959 100644 --- a/InstructionSets/x86/Decoder.hpp +++ b/InstructionSets/x86/Decoder.hpp @@ -28,11 +28,22 @@ template class Decoder { using InstructionT = Instruction= Model::i80386>; /*! - @returns an @c Instruction plus a size; a positive size to indicate successful decoding; a - negative size specifies the [negatived] number of further bytes the caller should ideally - collect before calling again. The caller is free to call with fewer, but may not get a decoded - instruction in response, and the decoder may still not be able to complete decoding - even if given that number of bytes. + @returns an @c Instruction plus a size; a positive size indicates successful decoding of + an instruction that was that many bytes long in total; a negative size specifies the [negatived] + minimum number of further bytes the caller should ideally collect before calling again. The + caller is free to call with fewer, but may not get a decoded instruction in response, and the + decoder may still not be able to complete decoding even if given that number of bytes. + + Successful decoding is defined to mean that all decoding steps are complete. The output + may still be an illegal instruction (indicated by Operation::Invalid), if the byte sequence + supplied cannot form a valid instruction. + + @discussion although instructions also contain an indicator of their length, on chips prior + to the 80286 there is no limit to instruction length and that could in theory overflow the available + storage, which can describe instructions only up to 1kb in size. + + The 80286 and 80386 have instruction length limits of 10 and 15 bytes respectively, so + cannot overflow the field. */ std::pair decode(const uint8_t *source, size_t length);