mirror of
https://github.com/TomHarte/CLK.git
synced 2024-11-23 03:32:32 +00:00
Merge pull request #1165 from TomHarte/MSX2Detection
Improve MSX cartridge type detection.
This commit is contained in:
commit
b7a27fbc6b
@ -316,5 +316,5 @@ Disassembly Analyser::Static::MOS6502::Disassemble(
|
||||
const std::vector<uint8_t> &memory,
|
||||
const std::function<std::size_t(uint16_t)> &address_mapper,
|
||||
std::vector<uint16_t> entry_points) {
|
||||
return Analyser::Static::Disassembly::Disassemble<Disassembly, uint16_t, MOS6502Disassembler>(memory, address_mapper, entry_points);
|
||||
return Analyser::Static::Disassembly::Disassemble<Disassembly, uint16_t, MOS6502Disassembler>(memory, address_mapper, entry_points, false);
|
||||
}
|
||||
|
@ -14,30 +14,54 @@ namespace Analyser::Static::Disassembly {
|
||||
template <typename D, typename S> struct PartialDisassembly {
|
||||
D disassembly;
|
||||
std::vector<S> remaining_entry_points;
|
||||
std::map<S, S> touched; // Maps from start of range to end.
|
||||
};
|
||||
|
||||
template <typename D, typename S, typename Disassembler> D Disassemble(
|
||||
const std::vector<uint8_t> &memory,
|
||||
const std::function<std::size_t(S)> &address_mapper,
|
||||
std::vector<S> entry_points) {
|
||||
std::vector<S> entry_points,
|
||||
bool exhaustive)
|
||||
{
|
||||
PartialDisassembly<D, S> partial_disassembly;
|
||||
partial_disassembly.remaining_entry_points = entry_points;
|
||||
|
||||
while(!partial_disassembly.remaining_entry_points.empty()) {
|
||||
// pull the next entry point from the back of the vector
|
||||
S next_entry_point = partial_disassembly.remaining_entry_points.back();
|
||||
partial_disassembly.remaining_entry_points.pop_back();
|
||||
// Do a recursive-style disassembly for all current entry points.
|
||||
while(!partial_disassembly.remaining_entry_points.empty()) {
|
||||
// Pull the next entry point from the back of the vector.
|
||||
const S next_entry_point = partial_disassembly.remaining_entry_points.back();
|
||||
partial_disassembly.remaining_entry_points.pop_back();
|
||||
|
||||
// if that address has already been visited, forget about it
|
||||
if( partial_disassembly.disassembly.instructions_by_address.find(next_entry_point)
|
||||
!= partial_disassembly.disassembly.instructions_by_address.end()) continue;
|
||||
// If that address has already been visited, forget about it.
|
||||
if( partial_disassembly.disassembly.instructions_by_address.find(next_entry_point)
|
||||
!= partial_disassembly.disassembly.instructions_by_address.end()) continue;
|
||||
|
||||
// if it's outgoing, log it as such and forget about it; otherwise disassemble
|
||||
std::size_t mapped_entry_point = address_mapper(next_entry_point);
|
||||
if(mapped_entry_point >= memory.size())
|
||||
partial_disassembly.disassembly.outward_calls.insert(next_entry_point);
|
||||
else
|
||||
Disassembler::AddToDisassembly(partial_disassembly, memory, address_mapper, next_entry_point);
|
||||
// If it's outgoing, log it as such and forget about it; otherwise disassemble.
|
||||
std::size_t mapped_entry_point = address_mapper(next_entry_point);
|
||||
if(mapped_entry_point >= memory.size())
|
||||
partial_disassembly.disassembly.outward_calls.insert(next_entry_point);
|
||||
else
|
||||
Disassembler::AddToDisassembly(partial_disassembly, memory, address_mapper, next_entry_point);
|
||||
}
|
||||
|
||||
// If this is not an exhaustive disassembly, that's your lot.
|
||||
if(!exhaustive) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Otherwise, find the first area between or just beyond a disassembled range
|
||||
// that isn't yet disassembled and chuck it onto the list.
|
||||
for(const auto &pair: partial_disassembly.touched) {
|
||||
const auto end = pair.second;
|
||||
if(partial_disassembly.touched.find(end) == partial_disassembly.touched.end()) {
|
||||
if(address_mapper(end) < memory.size()) {
|
||||
partial_disassembly.remaining_entry_points.push_back(end);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return partial_disassembly.disassembly;
|
||||
|
@ -546,6 +546,9 @@ struct Z80Disassembler {
|
||||
disassembly.disassembly.internal_calls.insert(entry_point);
|
||||
Accessor accessor(memory, address_mapper, entry_point);
|
||||
|
||||
auto &touched = disassembly.touched[entry_point];
|
||||
touched = entry_point;
|
||||
|
||||
while(!accessor.at_end()) {
|
||||
Instruction instruction;
|
||||
instruction.address = accessor.address();
|
||||
@ -558,6 +561,9 @@ struct Z80Disassembler {
|
||||
// Store the instruction away.
|
||||
disassembly.disassembly.instructions_by_address[instruction.address] = instruction;
|
||||
|
||||
// Apply all touches.
|
||||
touched = accessor.address();
|
||||
|
||||
// Update access tables.
|
||||
int access_type =
|
||||
((instruction.source == Instruction::Location::Operand_Indirect) ? 1 : 0) |
|
||||
@ -611,9 +617,18 @@ struct Z80Disassembler {
|
||||
|
||||
} // end of anonymous namespace
|
||||
|
||||
|
||||
|
||||
Disassembly Analyser::Static::Z80::Disassemble(
|
||||
const std::vector<uint8_t> &memory,
|
||||
const std::function<std::size_t(uint16_t)> &address_mapper,
|
||||
std::vector<uint16_t> entry_points) {
|
||||
return Analyser::Static::Disassembly::Disassemble<Disassembly, uint16_t, Z80Disassembler>(memory, address_mapper, entry_points);
|
||||
std::vector<uint16_t> entry_points,
|
||||
Approach approach)
|
||||
{
|
||||
return Analyser::Static::Disassembly::Disassemble<Disassembly, uint16_t, Z80Disassembler>(
|
||||
memory,
|
||||
address_mapper,
|
||||
entry_points,
|
||||
approach == Approach::Exhaustive
|
||||
);
|
||||
}
|
||||
|
@ -76,10 +76,19 @@ struct Disassembly {
|
||||
std::set<uint16_t> internal_stores, internal_loads, internal_modifies;
|
||||
};
|
||||
|
||||
enum class Approach {
|
||||
/// Disassemble from the supplied entry points until an indeterminate branch or return only, adding other fully-static
|
||||
/// entry points as they are observed.
|
||||
Recursive,
|
||||
/// Disassemble all supplied bytes, regardless of what nonsense may be encountered by accidental parsing of data areas.
|
||||
Exhaustive,
|
||||
};
|
||||
|
||||
Disassembly Disassemble(
|
||||
const std::vector<uint8_t> &memory,
|
||||
const std::function<std::size_t(uint16_t)> &address_mapper,
|
||||
std::vector<uint16_t> entry_points);
|
||||
std::vector<uint16_t> entry_points,
|
||||
Approach approach);
|
||||
|
||||
}
|
||||
|
||||
|
@ -119,93 +119,12 @@ static Analyser::Static::TargetList CartridgeTargetsFrom(
|
||||
Analyser::Static::Z80::Disassemble(
|
||||
first_8k,
|
||||
Analyser::Static::Disassembler::OffsetMapper(start_address),
|
||||
{ init_address }
|
||||
{ init_address },
|
||||
Analyser::Static::Z80::Approach::Exhaustive
|
||||
);
|
||||
|
||||
// // Look for a indirect store followed by an unconditional JP or CALL into another
|
||||
// // segment, that's a fairly explicit sign where found.
|
||||
using Instruction = Analyser::Static::Z80::Instruction;
|
||||
std::map<uint16_t, Instruction> &instructions = disassembly.instructions_by_address;
|
||||
bool is_ascii = false;
|
||||
// auto iterator = instructions.begin();
|
||||
// while(iterator != instructions.end()) {
|
||||
// auto next_iterator = iterator;
|
||||
// next_iterator++;
|
||||
// if(next_iterator == instructions.end()) break;
|
||||
//
|
||||
// if( iterator->second.operation == Instruction::Operation::LD &&
|
||||
// iterator->second.destination == Instruction::Location::Operand_Indirect &&
|
||||
// (
|
||||
// iterator->second.operand == 0x5000 ||
|
||||
// iterator->second.operand == 0x6000 ||
|
||||
// iterator->second.operand == 0x6800 ||
|
||||
// iterator->second.operand == 0x7000 ||
|
||||
// iterator->second.operand == 0x77ff ||
|
||||
// iterator->second.operand == 0x7800 ||
|
||||
// iterator->second.operand == 0x8000 ||
|
||||
// iterator->second.operand == 0x9000 ||
|
||||
// iterator->second.operand == 0xa000
|
||||
// ) &&
|
||||
// (
|
||||
// next_iterator->second.operation == Instruction::Operation::CALL ||
|
||||
// next_iterator->second.operation == Instruction::Operation::JP
|
||||
// ) &&
|
||||
// ((next_iterator->second.operand >> 13) != (0x4000 >> 13))
|
||||
// ) {
|
||||
// const uint16_t address = uint16_t(next_iterator->second.operand);
|
||||
// switch(iterator->second.operand) {
|
||||
// case 0x6000:
|
||||
// if(address >= 0x6000 && address < 0x8000) {
|
||||
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC;
|
||||
// }
|
||||
// break;
|
||||
// case 0x6800:
|
||||
// if(address >= 0x6000 && address < 0x6800) {
|
||||
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::ASCII8kb;
|
||||
// }
|
||||
// break;
|
||||
// case 0x7000:
|
||||
// if(address >= 0x6000 && address < 0x8000) {
|
||||
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC;
|
||||
// }
|
||||
// if(address >= 0x7000 && address < 0x7800) {
|
||||
// is_ascii = true;
|
||||
// }
|
||||
// break;
|
||||
// case 0x77ff:
|
||||
// if(address >= 0x7000 && address < 0x7800) {
|
||||
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::ASCII16kb;
|
||||
// }
|
||||
// break;
|
||||
// case 0x7800:
|
||||
// if(address >= 0xa000 && address < 0xc000) {
|
||||
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::ASCII8kb;
|
||||
// }
|
||||
// break;
|
||||
// case 0x8000:
|
||||
// if(address >= 0x8000 && address < 0xa000) {
|
||||
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC;
|
||||
// }
|
||||
// break;
|
||||
// case 0x9000:
|
||||
// if(address >= 0x8000 && address < 0xa000) {
|
||||
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC;
|
||||
// }
|
||||
// break;
|
||||
// case 0xa000:
|
||||
// if(address >= 0xa000 && address < 0xc000) {
|
||||
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::Konami;
|
||||
// }
|
||||
// break;
|
||||
// case 0xb000:
|
||||
// if(address >= 0xa000 && address < 0xc000) {
|
||||
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC;
|
||||
// }
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// iterator = next_iterator;
|
||||
const std::map<uint16_t, Instruction> &instructions = disassembly.instructions_by_address;
|
||||
|
||||
// Look for LD (nnnn), A instructions, and collate those addresses.
|
||||
std::map<uint16_t, int> address_counts;
|
||||
@ -217,49 +136,46 @@ static Analyser::Static::TargetList CartridgeTargetsFrom(
|
||||
}
|
||||
}
|
||||
|
||||
// Weight confidences by number of observed hits.
|
||||
float total_hits =
|
||||
float(
|
||||
address_counts[0x6000] + address_counts[0x6800] +
|
||||
address_counts[0x7000] + address_counts[0x7800] +
|
||||
address_counts[0x77ff] + address_counts[0x8000] +
|
||||
address_counts[0xa000] + address_counts[0x5000] +
|
||||
address_counts[0x9000] + address_counts[0xb000]
|
||||
);
|
||||
// Weight confidences by number of observed hits; if any is above 60% confidence, just use it.
|
||||
const auto ascii_8kb_total = address_counts[0x6000] + address_counts[0x6800] + address_counts[0x7000] + address_counts[0x7800];
|
||||
const auto ascii_16kb_total = address_counts[0x6000] + address_counts[0x7000] + address_counts[0x77ff];
|
||||
const auto konami_total = address_counts[0x6000] + address_counts[0x8000] + address_counts[0xa000];
|
||||
const auto konami_with_scc_total = address_counts[0x5000] + address_counts[0x7000] + address_counts[0x9000] + address_counts[0xb000];
|
||||
|
||||
targets.push_back(CartridgeTarget(
|
||||
segment,
|
||||
start_address,
|
||||
Analyser::Static::MSX::Cartridge::ASCII8kb,
|
||||
float( address_counts[0x6000] +
|
||||
address_counts[0x6800] +
|
||||
address_counts[0x7000] +
|
||||
address_counts[0x7800]) / total_hits));
|
||||
targets.push_back(CartridgeTarget(
|
||||
segment,
|
||||
start_address,
|
||||
Analyser::Static::MSX::Cartridge::ASCII16kb,
|
||||
float( address_counts[0x6000] +
|
||||
address_counts[0x7000] +
|
||||
address_counts[0x77ff]) / total_hits));
|
||||
if(!is_ascii) {
|
||||
const auto total_hits = ascii_8kb_total + ascii_16kb_total + konami_total + konami_with_scc_total;
|
||||
|
||||
const bool is_ascii_8kb = (ascii_8kb_total * 5) / (total_hits * 3);
|
||||
const bool is_ascii_16kb = (ascii_16kb_total * 5) / (total_hits * 3);
|
||||
const bool is_konami = (konami_total * 5) / (total_hits * 3);
|
||||
const bool is_konami_with_scc = (konami_with_scc_total * 5) / (total_hits * 3);
|
||||
|
||||
if(!is_ascii_16kb && !is_konami && !is_konami_with_scc) {
|
||||
targets.push_back(CartridgeTarget(
|
||||
segment,
|
||||
start_address,
|
||||
Analyser::Static::MSX::Cartridge::ASCII8kb,
|
||||
float(ascii_8kb_total) / float(total_hits)));
|
||||
}
|
||||
if(!is_ascii_8kb && !is_konami && !is_konami_with_scc) {
|
||||
targets.push_back(CartridgeTarget(
|
||||
segment,
|
||||
start_address,
|
||||
Analyser::Static::MSX::Cartridge::ASCII16kb,
|
||||
float(ascii_16kb_total) / float(total_hits)));
|
||||
}
|
||||
if(!is_ascii_8kb && !is_ascii_16kb && !is_konami_with_scc) {
|
||||
targets.push_back(CartridgeTarget(
|
||||
segment,
|
||||
start_address,
|
||||
Analyser::Static::MSX::Cartridge::Konami,
|
||||
float( address_counts[0x6000] +
|
||||
address_counts[0x8000] +
|
||||
address_counts[0xa000]) / total_hits));
|
||||
float(konami_total) / float(total_hits)));
|
||||
}
|
||||
if(!is_ascii) {
|
||||
if(!is_ascii_8kb && !is_ascii_16kb && !is_konami) {
|
||||
targets.push_back(CartridgeTarget(
|
||||
segment,
|
||||
start_address,
|
||||
Analyser::Static::MSX::Cartridge::KonamiWithSCC,
|
||||
float( address_counts[0x5000] +
|
||||
address_counts[0x7000] +
|
||||
address_counts[0x9000] +
|
||||
address_counts[0xb000]) / total_hits));
|
||||
float(konami_with_scc_total) / float(total_hits)));
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user