diff --git a/Analyser/Static/Disassembler/6502.cpp b/Analyser/Static/Disassembler/6502.cpp index 146559da5..9b360aa31 100644 --- a/Analyser/Static/Disassembler/6502.cpp +++ b/Analyser/Static/Disassembler/6502.cpp @@ -316,5 +316,5 @@ Disassembly Analyser::Static::MOS6502::Disassemble( const std::vector &memory, const std::function &address_mapper, std::vector entry_points) { - return Analyser::Static::Disassembly::Disassemble(memory, address_mapper, entry_points); + return Analyser::Static::Disassembly::Disassemble(memory, address_mapper, entry_points, false); } diff --git a/Analyser/Static/Disassembler/Kernel.hpp b/Analyser/Static/Disassembler/Kernel.hpp index e6817b760..7b9ef0f8e 100644 --- a/Analyser/Static/Disassembler/Kernel.hpp +++ b/Analyser/Static/Disassembler/Kernel.hpp @@ -14,30 +14,54 @@ namespace Analyser::Static::Disassembly { template struct PartialDisassembly { D disassembly; std::vector remaining_entry_points; + std::map touched; // Maps from start of range to end. }; template D Disassemble( const std::vector &memory, const std::function &address_mapper, - std::vector entry_points) { + std::vector entry_points, + bool exhaustive) +{ PartialDisassembly partial_disassembly; partial_disassembly.remaining_entry_points = entry_points; while(!partial_disassembly.remaining_entry_points.empty()) { - // pull the next entry point from the back of the vector - S next_entry_point = partial_disassembly.remaining_entry_points.back(); - partial_disassembly.remaining_entry_points.pop_back(); + // Do a recursive-style disassembly for all current entry points. + while(!partial_disassembly.remaining_entry_points.empty()) { + // Pull the next entry point from the back of the vector. + const S next_entry_point = partial_disassembly.remaining_entry_points.back(); + partial_disassembly.remaining_entry_points.pop_back(); - // if that address has already been visited, forget about it - if( partial_disassembly.disassembly.instructions_by_address.find(next_entry_point) - != partial_disassembly.disassembly.instructions_by_address.end()) continue; + // If that address has already been visited, forget about it. + if( partial_disassembly.disassembly.instructions_by_address.find(next_entry_point) + != partial_disassembly.disassembly.instructions_by_address.end()) continue; - // if it's outgoing, log it as such and forget about it; otherwise disassemble - std::size_t mapped_entry_point = address_mapper(next_entry_point); - if(mapped_entry_point >= memory.size()) - partial_disassembly.disassembly.outward_calls.insert(next_entry_point); - else - Disassembler::AddToDisassembly(partial_disassembly, memory, address_mapper, next_entry_point); + // If it's outgoing, log it as such and forget about it; otherwise disassemble. + std::size_t mapped_entry_point = address_mapper(next_entry_point); + if(mapped_entry_point >= memory.size()) + partial_disassembly.disassembly.outward_calls.insert(next_entry_point); + else + Disassembler::AddToDisassembly(partial_disassembly, memory, address_mapper, next_entry_point); + } + + // If this is not an exhaustive disassembly, that's your lot. + if(!exhaustive) { + break; + } + + // Otherwise, find the first area between or just beyond a disassembled range + // that isn't yet disassembled and chuck it onto the list. + for(const auto &pair: partial_disassembly.touched) { + const auto end = pair.second; + if(partial_disassembly.touched.find(end) == partial_disassembly.touched.end()) { + if(address_mapper(end) < memory.size()) { + partial_disassembly.remaining_entry_points.push_back(end); + } + + break; + } + } } return partial_disassembly.disassembly; diff --git a/Analyser/Static/Disassembler/Z80.cpp b/Analyser/Static/Disassembler/Z80.cpp index 81998c8b6..92c6e53d0 100644 --- a/Analyser/Static/Disassembler/Z80.cpp +++ b/Analyser/Static/Disassembler/Z80.cpp @@ -546,6 +546,9 @@ struct Z80Disassembler { disassembly.disassembly.internal_calls.insert(entry_point); Accessor accessor(memory, address_mapper, entry_point); + auto &touched = disassembly.touched[entry_point]; + touched = entry_point; + while(!accessor.at_end()) { Instruction instruction; instruction.address = accessor.address(); @@ -558,6 +561,9 @@ struct Z80Disassembler { // Store the instruction away. disassembly.disassembly.instructions_by_address[instruction.address] = instruction; + // Apply all touches. + touched = accessor.address(); + // Update access tables. int access_type = ((instruction.source == Instruction::Location::Operand_Indirect) ? 1 : 0) | @@ -611,9 +617,18 @@ struct Z80Disassembler { } // end of anonymous namespace + + Disassembly Analyser::Static::Z80::Disassemble( const std::vector &memory, const std::function &address_mapper, - std::vector entry_points) { - return Analyser::Static::Disassembly::Disassemble(memory, address_mapper, entry_points); + std::vector entry_points, + Approach approach) +{ + return Analyser::Static::Disassembly::Disassemble( + memory, + address_mapper, + entry_points, + approach == Approach::Exhaustive + ); } diff --git a/Analyser/Static/Disassembler/Z80.hpp b/Analyser/Static/Disassembler/Z80.hpp index ebed669d2..bc8b9818e 100644 --- a/Analyser/Static/Disassembler/Z80.hpp +++ b/Analyser/Static/Disassembler/Z80.hpp @@ -76,10 +76,19 @@ struct Disassembly { std::set internal_stores, internal_loads, internal_modifies; }; +enum class Approach { + /// Disassemble from the supplied entry points until an indeterminate branch or return only, adding other fully-static + /// entry points as they are observed. + Recursive, + /// Disassemble all supplied bytes, regardless of what nonsense may be encountered by accidental parsing of data areas. + Exhaustive, +}; + Disassembly Disassemble( const std::vector &memory, const std::function &address_mapper, - std::vector entry_points); + std::vector entry_points, + Approach approach); } diff --git a/Analyser/Static/MSX/StaticAnalyser.cpp b/Analyser/Static/MSX/StaticAnalyser.cpp index 322189ca4..9bf91d7cb 100644 --- a/Analyser/Static/MSX/StaticAnalyser.cpp +++ b/Analyser/Static/MSX/StaticAnalyser.cpp @@ -119,93 +119,12 @@ static Analyser::Static::TargetList CartridgeTargetsFrom( Analyser::Static::Z80::Disassemble( first_8k, Analyser::Static::Disassembler::OffsetMapper(start_address), - { init_address } + { init_address }, + Analyser::Static::Z80::Approach::Exhaustive ); -// // Look for a indirect store followed by an unconditional JP or CALL into another -// // segment, that's a fairly explicit sign where found. using Instruction = Analyser::Static::Z80::Instruction; - std::map &instructions = disassembly.instructions_by_address; - bool is_ascii = false; -// auto iterator = instructions.begin(); -// while(iterator != instructions.end()) { -// auto next_iterator = iterator; -// next_iterator++; -// if(next_iterator == instructions.end()) break; -// -// if( iterator->second.operation == Instruction::Operation::LD && -// iterator->second.destination == Instruction::Location::Operand_Indirect && -// ( -// iterator->second.operand == 0x5000 || -// iterator->second.operand == 0x6000 || -// iterator->second.operand == 0x6800 || -// iterator->second.operand == 0x7000 || -// iterator->second.operand == 0x77ff || -// iterator->second.operand == 0x7800 || -// iterator->second.operand == 0x8000 || -// iterator->second.operand == 0x9000 || -// iterator->second.operand == 0xa000 -// ) && -// ( -// next_iterator->second.operation == Instruction::Operation::CALL || -// next_iterator->second.operation == Instruction::Operation::JP -// ) && -// ((next_iterator->second.operand >> 13) != (0x4000 >> 13)) -// ) { -// const uint16_t address = uint16_t(next_iterator->second.operand); -// switch(iterator->second.operand) { -// case 0x6000: -// if(address >= 0x6000 && address < 0x8000) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC; -// } -// break; -// case 0x6800: -// if(address >= 0x6000 && address < 0x6800) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::ASCII8kb; -// } -// break; -// case 0x7000: -// if(address >= 0x6000 && address < 0x8000) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC; -// } -// if(address >= 0x7000 && address < 0x7800) { -// is_ascii = true; -// } -// break; -// case 0x77ff: -// if(address >= 0x7000 && address < 0x7800) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::ASCII16kb; -// } -// break; -// case 0x7800: -// if(address >= 0xa000 && address < 0xc000) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::ASCII8kb; -// } -// break; -// case 0x8000: -// if(address >= 0x8000 && address < 0xa000) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC; -// } -// break; -// case 0x9000: -// if(address >= 0x8000 && address < 0xa000) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC; -// } -// break; -// case 0xa000: -// if(address >= 0xa000 && address < 0xc000) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::Konami; -// } -// break; -// case 0xb000: -// if(address >= 0xa000 && address < 0xc000) { -// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC; -// } -// break; -// } -// } -// -// iterator = next_iterator; + const std::map &instructions = disassembly.instructions_by_address; // Look for LD (nnnn), A instructions, and collate those addresses. std::map address_counts; @@ -217,49 +136,46 @@ static Analyser::Static::TargetList CartridgeTargetsFrom( } } - // Weight confidences by number of observed hits. - float total_hits = - float( - address_counts[0x6000] + address_counts[0x6800] + - address_counts[0x7000] + address_counts[0x7800] + - address_counts[0x77ff] + address_counts[0x8000] + - address_counts[0xa000] + address_counts[0x5000] + - address_counts[0x9000] + address_counts[0xb000] - ); + // Weight confidences by number of observed hits; if any is above 60% confidence, just use it. + const auto ascii_8kb_total = address_counts[0x6000] + address_counts[0x6800] + address_counts[0x7000] + address_counts[0x7800]; + const auto ascii_16kb_total = address_counts[0x6000] + address_counts[0x7000] + address_counts[0x77ff]; + const auto konami_total = address_counts[0x6000] + address_counts[0x8000] + address_counts[0xa000]; + const auto konami_with_scc_total = address_counts[0x5000] + address_counts[0x7000] + address_counts[0x9000] + address_counts[0xb000]; - targets.push_back(CartridgeTarget( - segment, - start_address, - Analyser::Static::MSX::Cartridge::ASCII8kb, - float( address_counts[0x6000] + - address_counts[0x6800] + - address_counts[0x7000] + - address_counts[0x7800]) / total_hits)); - targets.push_back(CartridgeTarget( - segment, - start_address, - Analyser::Static::MSX::Cartridge::ASCII16kb, - float( address_counts[0x6000] + - address_counts[0x7000] + - address_counts[0x77ff]) / total_hits)); - if(!is_ascii) { + const auto total_hits = ascii_8kb_total + ascii_16kb_total + konami_total + konami_with_scc_total; + + const bool is_ascii_8kb = (ascii_8kb_total * 5) / (total_hits * 3); + const bool is_ascii_16kb = (ascii_16kb_total * 5) / (total_hits * 3); + const bool is_konami = (konami_total * 5) / (total_hits * 3); + const bool is_konami_with_scc = (konami_with_scc_total * 5) / (total_hits * 3); + + if(!is_ascii_16kb && !is_konami && !is_konami_with_scc) { + targets.push_back(CartridgeTarget( + segment, + start_address, + Analyser::Static::MSX::Cartridge::ASCII8kb, + float(ascii_8kb_total) / float(total_hits))); + } + if(!is_ascii_8kb && !is_konami && !is_konami_with_scc) { + targets.push_back(CartridgeTarget( + segment, + start_address, + Analyser::Static::MSX::Cartridge::ASCII16kb, + float(ascii_16kb_total) / float(total_hits))); + } + if(!is_ascii_8kb && !is_ascii_16kb && !is_konami_with_scc) { targets.push_back(CartridgeTarget( segment, start_address, Analyser::Static::MSX::Cartridge::Konami, - float( address_counts[0x6000] + - address_counts[0x8000] + - address_counts[0xa000]) / total_hits)); + float(konami_total) / float(total_hits))); } - if(!is_ascii) { + if(!is_ascii_8kb && !is_ascii_16kb && !is_konami) { targets.push_back(CartridgeTarget( segment, start_address, Analyser::Static::MSX::Cartridge::KonamiWithSCC, - float( address_counts[0x5000] + - address_counts[0x7000] + - address_counts[0x9000] + - address_counts[0xb000]) / total_hits)); + float(konami_with_scc_total) / float(total_hits))); } }