From 251b8e69ad35f02e163eea16c33938ce08c42a23 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Mon, 4 Sep 2023 15:13:06 -0400 Subject: [PATCH] Attempt to support 'exhaustive' disassemblies (i.e. ones that chase every byte). --- Analyser/Static/Disassembler/6502.cpp | 2 +- Analyser/Static/Disassembler/Kernel.hpp | 48 +++++++++++++++++-------- Analyser/Static/Disassembler/Z80.cpp | 19 ++++++++-- Analyser/Static/Disassembler/Z80.hpp | 11 +++++- Analyser/Static/MSX/StaticAnalyser.cpp | 3 +- 5 files changed, 64 insertions(+), 19 deletions(-) diff --git a/Analyser/Static/Disassembler/6502.cpp b/Analyser/Static/Disassembler/6502.cpp index 146559da5..9b360aa31 100644 --- a/Analyser/Static/Disassembler/6502.cpp +++ b/Analyser/Static/Disassembler/6502.cpp @@ -316,5 +316,5 @@ Disassembly Analyser::Static::MOS6502::Disassemble( const std::vector &memory, const std::function &address_mapper, std::vector entry_points) { - return Analyser::Static::Disassembly::Disassemble(memory, address_mapper, entry_points); + return Analyser::Static::Disassembly::Disassemble(memory, address_mapper, entry_points, false); } diff --git a/Analyser/Static/Disassembler/Kernel.hpp b/Analyser/Static/Disassembler/Kernel.hpp index e6817b760..380b7198a 100644 --- a/Analyser/Static/Disassembler/Kernel.hpp +++ b/Analyser/Static/Disassembler/Kernel.hpp @@ -14,30 +14,50 @@ namespace Analyser::Static::Disassembly { template struct PartialDisassembly { D disassembly; std::vector remaining_entry_points; + std::vector touched; }; template D Disassemble( const std::vector &memory, const std::function &address_mapper, - std::vector entry_points) { + std::vector entry_points, + bool exhaustive) { PartialDisassembly partial_disassembly; partial_disassembly.remaining_entry_points = entry_points; + partial_disassembly.touched.resize(memory.size()); - while(!partial_disassembly.remaining_entry_points.empty()) { - // pull the next entry point from the back of the vector - S next_entry_point = partial_disassembly.remaining_entry_points.back(); - partial_disassembly.remaining_entry_points.pop_back(); + while(true) { + // Do a recursive-style disassembly for all current entry points. + while(!partial_disassembly.remaining_entry_points.empty()) { + // Pull the next entry point from the back of the vector. + S next_entry_point = partial_disassembly.remaining_entry_points.back(); + partial_disassembly.remaining_entry_points.pop_back(); - // if that address has already been visited, forget about it - if( partial_disassembly.disassembly.instructions_by_address.find(next_entry_point) - != partial_disassembly.disassembly.instructions_by_address.end()) continue; + // If that address has already been visited, forget about it. + if( partial_disassembly.disassembly.instructions_by_address.find(next_entry_point) + != partial_disassembly.disassembly.instructions_by_address.end()) continue; - // if it's outgoing, log it as such and forget about it; otherwise disassemble - std::size_t mapped_entry_point = address_mapper(next_entry_point); - if(mapped_entry_point >= memory.size()) - partial_disassembly.disassembly.outward_calls.insert(next_entry_point); - else - Disassembler::AddToDisassembly(partial_disassembly, memory, address_mapper, next_entry_point); + // If it's outgoing, log it as such and forget about it; otherwise disassemble. + std::size_t mapped_entry_point = address_mapper(next_entry_point); + if(mapped_entry_point >= memory.size()) + partial_disassembly.disassembly.outward_calls.insert(next_entry_point); + else + Disassembler::AddToDisassembly(partial_disassembly, memory, address_mapper, next_entry_point); + } + + // If this is not an exhaustive disassembly, that's your lot. + if(!exhaustive) { + break; + } + + // Otherwise, find the first address that isn't yet disassembled and chuck it onto the list. + auto first_untouched = std::find(partial_disassembly.touched.begin(), partial_disassembly.touched.end(), false); + if(first_untouched == partial_disassembly.touched.end()) { + break; + } + partial_disassembly.remaining_entry_points.push_back( + static_cast(first_untouched - partial_disassembly.touched.begin()) + ); } return partial_disassembly.disassembly; diff --git a/Analyser/Static/Disassembler/Z80.cpp b/Analyser/Static/Disassembler/Z80.cpp index 81998c8b6..20b4c8cab 100644 --- a/Analyser/Static/Disassembler/Z80.cpp +++ b/Analyser/Static/Disassembler/Z80.cpp @@ -558,6 +558,12 @@ struct Z80Disassembler { // Store the instruction away. disassembly.disassembly.instructions_by_address[instruction.address] = instruction; + // Apply all touches. + std::fill( + disassembly.touched.begin() + instruction.address, + disassembly.touched.begin() + accessor.address(), + true); + // Update access tables. int access_type = ((instruction.source == Instruction::Location::Operand_Indirect) ? 1 : 0) | @@ -611,9 +617,18 @@ struct Z80Disassembler { } // end of anonymous namespace + + Disassembly Analyser::Static::Z80::Disassemble( const std::vector &memory, const std::function &address_mapper, - std::vector entry_points) { - return Analyser::Static::Disassembly::Disassemble(memory, address_mapper, entry_points); + std::vector entry_points, + Approach approach) +{ + return Analyser::Static::Disassembly::Disassemble( + memory, + address_mapper, + entry_points, + approach == Approach::Exhaustive + ); } diff --git a/Analyser/Static/Disassembler/Z80.hpp b/Analyser/Static/Disassembler/Z80.hpp index ebed669d2..bc8b9818e 100644 --- a/Analyser/Static/Disassembler/Z80.hpp +++ b/Analyser/Static/Disassembler/Z80.hpp @@ -76,10 +76,19 @@ struct Disassembly { std::set internal_stores, internal_loads, internal_modifies; }; +enum class Approach { + /// Disassemble from the supplied entry points until an indeterminate branch or return only, adding other fully-static + /// entry points as they are observed. + Recursive, + /// Disassemble all supplied bytes, regardless of what nonsense may be encountered by accidental parsing of data areas. + Exhaustive, +}; + Disassembly Disassemble( const std::vector &memory, const std::function &address_mapper, - std::vector entry_points); + std::vector entry_points, + Approach approach); } diff --git a/Analyser/Static/MSX/StaticAnalyser.cpp b/Analyser/Static/MSX/StaticAnalyser.cpp index 322189ca4..edb8f6d34 100644 --- a/Analyser/Static/MSX/StaticAnalyser.cpp +++ b/Analyser/Static/MSX/StaticAnalyser.cpp @@ -119,7 +119,8 @@ static Analyser::Static::TargetList CartridgeTargetsFrom( Analyser::Static::Z80::Disassemble( first_8k, Analyser::Static::Disassembler::OffsetMapper(start_address), - { init_address } + { init_address }, + Analyser::Static::Z80::Approach::Exhaustive ); // // Look for a indirect store followed by an unconditional JP or CALL into another