From 251b8e69ad35f02e163eea16c33938ce08c42a23 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Mon, 4 Sep 2023 15:13:06 -0400 Subject: [PATCH 1/3] Attempt to support 'exhaustive' disassemblies (i.e. ones that chase every byte). --- Analyser/Static/Disassembler/6502.cpp | 2 +- Analyser/Static/Disassembler/Kernel.hpp | 48 +++++++++++++++++-------- Analyser/Static/Disassembler/Z80.cpp | 19 ++++++++-- Analyser/Static/Disassembler/Z80.hpp | 11 +++++- Analyser/Static/MSX/StaticAnalyser.cpp | 3 +- 5 files changed, 64 insertions(+), 19 deletions(-) diff --git a/Analyser/Static/Disassembler/6502.cpp b/Analyser/Static/Disassembler/6502.cpp index 146559da5..9b360aa31 100644 --- a/Analyser/Static/Disassembler/6502.cpp +++ b/Analyser/Static/Disassembler/6502.cpp @@ -316,5 +316,5 @@ Disassembly Analyser::Static::MOS6502::Disassemble( const std::vector &memory, const std::function &address_mapper, std::vector entry_points) { - return Analyser::Static::Disassembly::Disassemble(memory, address_mapper, entry_points); + return Analyser::Static::Disassembly::Disassemble(memory, address_mapper, entry_points, false); } diff --git a/Analyser/Static/Disassembler/Kernel.hpp b/Analyser/Static/Disassembler/Kernel.hpp index e6817b760..380b7198a 100644 --- a/Analyser/Static/Disassembler/Kernel.hpp +++ b/Analyser/Static/Disassembler/Kernel.hpp @@ -14,30 +14,50 @@ namespace Analyser::Static::Disassembly { template struct PartialDisassembly { D disassembly; std::vector remaining_entry_points; + std::vector touched; }; template D Disassemble( const std::vector &memory, const std::function &address_mapper, - std::vector entry_points) { + std::vector entry_points, + bool exhaustive) { PartialDisassembly partial_disassembly; partial_disassembly.remaining_entry_points = entry_points; + partial_disassembly.touched.resize(memory.size()); - while(!partial_disassembly.remaining_entry_points.empty()) { - // pull the next entry point from the back of the vector - S next_entry_point = partial_disassembly.remaining_entry_points.back(); - partial_disassembly.remaining_entry_points.pop_back(); + while(true) { + // Do a recursive-style disassembly for all current entry points. + while(!partial_disassembly.remaining_entry_points.empty()) { + // Pull the next entry point from the back of the vector. + S next_entry_point = partial_disassembly.remaining_entry_points.back(); + partial_disassembly.remaining_entry_points.pop_back(); - // if that address has already been visited, forget about it - if( partial_disassembly.disassembly.instructions_by_address.find(next_entry_point) - != partial_disassembly.disassembly.instructions_by_address.end()) continue; + // If that address has already been visited, forget about it. + if( partial_disassembly.disassembly.instructions_by_address.find(next_entry_point) + != partial_disassembly.disassembly.instructions_by_address.end()) continue; - // if it's outgoing, log it as such and forget about it; otherwise disassemble - std::size_t mapped_entry_point = address_mapper(next_entry_point); - if(mapped_entry_point >= memory.size()) - partial_disassembly.disassembly.outward_calls.insert(next_entry_point); - else - Disassembler::AddToDisassembly(partial_disassembly, memory, address_mapper, next_entry_point); + // If it's outgoing, log it as such and forget about it; otherwise disassemble. + std::size_t mapped_entry_point = address_mapper(next_entry_point); + if(mapped_entry_point >= memory.size()) + partial_disassembly.disassembly.outward_calls.insert(next_entry_point); + else + Disassembler::AddToDisassembly(partial_disassembly, memory, address_mapper, next_entry_point); + } + + // If this is not an exhaustive disassembly, that's your lot. + if(!exhaustive) { + break; + } + + // Otherwise, find the first address that isn't yet disassembled and chuck it onto the list. + auto first_untouched = std::find(partial_disassembly.touched.begin(), partial_disassembly.touched.end(), false); + if(first_untouched == partial_disassembly.touched.end()) { + break; + } + partial_disassembly.remaining_entry_points.push_back( + static_cast(first_untouched - partial_disassembly.touched.begin()) + ); } return partial_disassembly.disassembly; diff --git a/Analyser/Static/Disassembler/Z80.cpp b/Analyser/Static/Disassembler/Z80.cpp index 81998c8b6..20b4c8cab 100644 --- a/Analyser/Static/Disassembler/Z80.cpp +++ b/Analyser/Static/Disassembler/Z80.cpp @@ -558,6 +558,12 @@ struct Z80Disassembler { // Store the instruction away. disassembly.disassembly.instructions_by_address[instruction.address] = instruction; + // Apply all touches. + std::fill( + disassembly.touched.begin() + instruction.address, + disassembly.touched.begin() + accessor.address(), + true); + // Update access tables. int access_type = ((instruction.source == Instruction::Location::Operand_Indirect) ? 1 : 0) | @@ -611,9 +617,18 @@ struct Z80Disassembler { } // end of anonymous namespace + + Disassembly Analyser::Static::Z80::Disassemble( const std::vector &memory, const std::function &address_mapper, - std::vector entry_points) { - return Analyser::Static::Disassembly::Disassemble(memory, address_mapper, entry_points); + std::vector entry_points, + Approach approach) +{ + return Analyser::Static::Disassembly::Disassemble( + memory, + address_mapper, + entry_points, + approach == Approach::Exhaustive + ); } diff --git a/Analyser/Static/Disassembler/Z80.hpp b/Analyser/Static/Disassembler/Z80.hpp index ebed669d2..bc8b9818e 100644 --- a/Analyser/Static/Disassembler/Z80.hpp +++ b/Analyser/Static/Disassembler/Z80.hpp @@ -76,10 +76,19 @@ struct Disassembly { std::set internal_stores, internal_loads, internal_modifies; }; +enum class Approach { + /// Disassemble from the supplied entry points until an indeterminate branch or return only, adding other fully-static + /// entry points as they are observed. + Recursive, + /// Disassemble all supplied bytes, regardless of what nonsense may be encountered by accidental parsing of data areas. + Exhaustive, +}; + Disassembly Disassemble( const std::vector &memory, const std::function &address_mapper, - std::vector entry_points); + std::vector entry_points, + Approach approach); } diff --git a/Analyser/Static/MSX/StaticAnalyser.cpp b/Analyser/Static/MSX/StaticAnalyser.cpp index 322189ca4..edb8f6d34 100644 --- a/Analyser/Static/MSX/StaticAnalyser.cpp +++ b/Analyser/Static/MSX/StaticAnalyser.cpp @@ -119,7 +119,8 @@ static Analyser::Static::TargetList CartridgeTargetsFrom( Analyser::Static::Z80::Disassemble( first_8k, Analyser::Static::Disassembler::OffsetMapper(start_address), - { init_address } + { init_address }, + Analyser::Static::Z80::Approach::Exhaustive ); // // Look for a indirect store followed by an unconditional JP or CALL into another From 8c3ebe23f6adcdf594ea748ffdc2b0174c0ea960 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Wed, 6 Sep 2023 22:26:15 -0400 Subject: [PATCH 2/3] Use ranges properly to apply address mapping. --- Analyser/Static/Disassembler/Kernel.hpp | 28 ++++++++++++++----------- Analyser/Static/Disassembler/Z80.cpp | 8 +++---- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/Analyser/Static/Disassembler/Kernel.hpp b/Analyser/Static/Disassembler/Kernel.hpp index 380b7198a..7b9ef0f8e 100644 --- a/Analyser/Static/Disassembler/Kernel.hpp +++ b/Analyser/Static/Disassembler/Kernel.hpp @@ -14,23 +14,23 @@ namespace Analyser::Static::Disassembly { template struct PartialDisassembly { D disassembly; std::vector remaining_entry_points; - std::vector touched; + std::map touched; // Maps from start of range to end. }; template D Disassemble( const std::vector &memory, const std::function &address_mapper, std::vector entry_points, - bool exhaustive) { + bool exhaustive) +{ PartialDisassembly partial_disassembly; partial_disassembly.remaining_entry_points = entry_points; - partial_disassembly.touched.resize(memory.size()); - while(true) { + while(!partial_disassembly.remaining_entry_points.empty()) { // Do a recursive-style disassembly for all current entry points. while(!partial_disassembly.remaining_entry_points.empty()) { // Pull the next entry point from the back of the vector. - S next_entry_point = partial_disassembly.remaining_entry_points.back(); + const S next_entry_point = partial_disassembly.remaining_entry_points.back(); partial_disassembly.remaining_entry_points.pop_back(); // If that address has already been visited, forget about it. @@ -50,14 +50,18 @@ template D Disassemble( break; } - // Otherwise, find the first address that isn't yet disassembled and chuck it onto the list. - auto first_untouched = std::find(partial_disassembly.touched.begin(), partial_disassembly.touched.end(), false); - if(first_untouched == partial_disassembly.touched.end()) { - break; + // Otherwise, find the first area between or just beyond a disassembled range + // that isn't yet disassembled and chuck it onto the list. + for(const auto &pair: partial_disassembly.touched) { + const auto end = pair.second; + if(partial_disassembly.touched.find(end) == partial_disassembly.touched.end()) { + if(address_mapper(end) < memory.size()) { + partial_disassembly.remaining_entry_points.push_back(end); + } + + break; + } } - partial_disassembly.remaining_entry_points.push_back( - static_cast(first_untouched - partial_disassembly.touched.begin()) - ); } return partial_disassembly.disassembly; diff --git a/Analyser/Static/Disassembler/Z80.cpp b/Analyser/Static/Disassembler/Z80.cpp index 20b4c8cab..92c6e53d0 100644 --- a/Analyser/Static/Disassembler/Z80.cpp +++ b/Analyser/Static/Disassembler/Z80.cpp @@ -546,6 +546,9 @@ struct Z80Disassembler { disassembly.disassembly.internal_calls.insert(entry_point); Accessor accessor(memory, address_mapper, entry_point); + auto &touched = disassembly.touched[entry_point]; + touched = entry_point; + while(!accessor.at_end()) { Instruction instruction; instruction.address = accessor.address(); @@ -559,10 +562,7 @@ struct Z80Disassembler { disassembly.disassembly.instructions_by_address[instruction.address] = instruction; // Apply all touches. - std::fill( - disassembly.touched.begin() + instruction.address, - disassembly.touched.begin() + accessor.address(), - true); + touched = accessor.address(); // Update access tables. int access_type = From e98f78316bac24435a602742c35a08102f89c644 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Wed, 6 Sep 2023 22:40:39 -0400 Subject: [PATCH 3/3] Accept a paging scheme if it becomes 60% likely. --- Analyser/Static/MSX/StaticAnalyser.cpp | 147 ++++++------------------- 1 file changed, 31 insertions(+), 116 deletions(-) diff --git a/Analyser/Static/MSX/StaticAnalyser.cpp b/Analyser/Static/MSX/StaticAnalyser.cpp index edb8f6d34..9bf91d7cb 100644 --- a/Analyser/Static/MSX/StaticAnalyser.cpp +++ b/Analyser/Static/MSX/StaticAnalyser.cpp @@ -123,90 +123,8 @@ static Analyser::Static::TargetList CartridgeTargetsFrom( Analyser::Static::Z80::Approach::Exhaustive ); -// // Look for a indirect store followed by an unconditional JP or CALL into another -// // segment, that's a fairly explicit sign where found. using Instruction = Analyser::Static::Z80::Instruction; - std::map &instructions = disassembly.instructions_by_address; - bool is_ascii = false; -// auto iterator = instructions.begin(); -// while(iterator != instructions.end()) { -// auto next_iterator = iterator; -// next_iterator++; -// if(next_iterator == instructions.end()) break; -// -// if( iterator->second.operation == Instruction::Operation::LD && -// iterator->second.destination == Instruction::Location::Operand_Indirect && -// ( -// iterator->second.operand == 0x5000 || -// iterator->second.operand == 0x6000 || -// iterator->second.operand == 0x6800 || -// iterator->second.operand == 0x7000 || -// iterator->second.operand == 0x77ff || -// iterator->second.operand == 0x7800 || -// iterator->second.operand == 0x8000 || -// iterator->second.operand == 0x9000 || -// iterator->second.operand == 0xa000 -// ) && -// ( -// next_iterator->second.operation == Instruction::Operation::CALL || -// next_iterator->second.operation == Instruction::Operation::JP -// ) && -// ((next_iterator->second.operand >> 13) != (0x4000 >> 13)) -// ) { -// const uint16_t address = uint16_t(next_iterator->second.operand); -// switch(iterator->second.operand) { -// case 0x6000: -// if(address >= 0x6000 && address < 0x8000) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC; -// } -// break; -// case 0x6800: -// if(address >= 0x6000 && address < 0x6800) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::ASCII8kb; -// } -// break; -// case 0x7000: -// if(address >= 0x6000 && address < 0x8000) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC; -// } -// if(address >= 0x7000 && address < 0x7800) { -// is_ascii = true; -// } -// break; -// case 0x77ff: -// if(address >= 0x7000 && address < 0x7800) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::ASCII16kb; -// } -// break; -// case 0x7800: -// if(address >= 0xa000 && address < 0xc000) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::ASCII8kb; -// } -// break; -// case 0x8000: -// if(address >= 0x8000 && address < 0xa000) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC; -// } -// break; -// case 0x9000: -// if(address >= 0x8000 && address < 0xa000) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC; -// } -// break; -// case 0xa000: -// if(address >= 0xa000 && address < 0xc000) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::Konami; -// } -// break; -// case 0xb000: -// if(address >= 0xa000 && address < 0xc000) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC; -// } -// break; -// } -// } -// -// iterator = next_iterator; + const std::map &instructions = disassembly.instructions_by_address; // Look for LD (nnnn), A instructions, and collate those addresses. std::map address_counts; @@ -218,49 +136,46 @@ static Analyser::Static::TargetList CartridgeTargetsFrom( } } - // Weight confidences by number of observed hits. - float total_hits = - float( - address_counts[0x6000] + address_counts[0x6800] + - address_counts[0x7000] + address_counts[0x7800] + - address_counts[0x77ff] + address_counts[0x8000] + - address_counts[0xa000] + address_counts[0x5000] + - address_counts[0x9000] + address_counts[0xb000] - ); + // Weight confidences by number of observed hits; if any is above 60% confidence, just use it. + const auto ascii_8kb_total = address_counts[0x6000] + address_counts[0x6800] + address_counts[0x7000] + address_counts[0x7800]; + const auto ascii_16kb_total = address_counts[0x6000] + address_counts[0x7000] + address_counts[0x77ff]; + const auto konami_total = address_counts[0x6000] + address_counts[0x8000] + address_counts[0xa000]; + const auto konami_with_scc_total = address_counts[0x5000] + address_counts[0x7000] + address_counts[0x9000] + address_counts[0xb000]; - targets.push_back(CartridgeTarget( - segment, - start_address, - Analyser::Static::MSX::Cartridge::ASCII8kb, - float( address_counts[0x6000] + - address_counts[0x6800] + - address_counts[0x7000] + - address_counts[0x7800]) / total_hits)); - targets.push_back(CartridgeTarget( - segment, - start_address, - Analyser::Static::MSX::Cartridge::ASCII16kb, - float( address_counts[0x6000] + - address_counts[0x7000] + - address_counts[0x77ff]) / total_hits)); - if(!is_ascii) { + const auto total_hits = ascii_8kb_total + ascii_16kb_total + konami_total + konami_with_scc_total; + + const bool is_ascii_8kb = (ascii_8kb_total * 5) / (total_hits * 3); + const bool is_ascii_16kb = (ascii_16kb_total * 5) / (total_hits * 3); + const bool is_konami = (konami_total * 5) / (total_hits * 3); + const bool is_konami_with_scc = (konami_with_scc_total * 5) / (total_hits * 3); + + if(!is_ascii_16kb && !is_konami && !is_konami_with_scc) { + targets.push_back(CartridgeTarget( + segment, + start_address, + Analyser::Static::MSX::Cartridge::ASCII8kb, + float(ascii_8kb_total) / float(total_hits))); + } + if(!is_ascii_8kb && !is_konami && !is_konami_with_scc) { + targets.push_back(CartridgeTarget( + segment, + start_address, + Analyser::Static::MSX::Cartridge::ASCII16kb, + float(ascii_16kb_total) / float(total_hits))); + } + if(!is_ascii_8kb && !is_ascii_16kb && !is_konami_with_scc) { targets.push_back(CartridgeTarget( segment, start_address, Analyser::Static::MSX::Cartridge::Konami, - float( address_counts[0x6000] + - address_counts[0x8000] + - address_counts[0xa000]) / total_hits)); + float(konami_total) / float(total_hits))); } - if(!is_ascii) { + if(!is_ascii_8kb && !is_ascii_16kb && !is_konami) { targets.push_back(CartridgeTarget( segment, start_address, Analyser::Static::MSX::Cartridge::KonamiWithSCC, - float( address_counts[0x5000] + - address_counts[0x7000] + - address_counts[0x9000] + - address_counts[0xb000]) / total_hits)); + float(konami_with_scc_total) / float(total_hits))); } }