1
0
mirror of https://github.com/TomHarte/CLK.git synced 2024-08-23 02:29:01 +00:00

Merge pull request #1165 from TomHarte/MSX2Detection

Improve MSX cartridge type detection.
This commit is contained in:
Thomas Harte 2023-09-06 22:50:53 -04:00 committed by GitHub
commit b7a27fbc6b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 98 additions and 134 deletions

View File

@ -316,5 +316,5 @@ Disassembly Analyser::Static::MOS6502::Disassemble(
const std::vector<uint8_t> &memory,
const std::function<std::size_t(uint16_t)> &address_mapper,
std::vector<uint16_t> entry_points) {
return Analyser::Static::Disassembly::Disassemble<Disassembly, uint16_t, MOS6502Disassembler>(memory, address_mapper, entry_points);
return Analyser::Static::Disassembly::Disassemble<Disassembly, uint16_t, MOS6502Disassembler>(memory, address_mapper, entry_points, false);
}

View File

@ -14,30 +14,54 @@ namespace Analyser::Static::Disassembly {
template <typename D, typename S> struct PartialDisassembly {
D disassembly;
std::vector<S> remaining_entry_points;
std::map<S, S> touched; // Maps from start of range to end.
};
template <typename D, typename S, typename Disassembler> D Disassemble(
const std::vector<uint8_t> &memory,
const std::function<std::size_t(S)> &address_mapper,
std::vector<S> entry_points) {
std::vector<S> entry_points,
bool exhaustive)
{
PartialDisassembly<D, S> partial_disassembly;
partial_disassembly.remaining_entry_points = entry_points;
while(!partial_disassembly.remaining_entry_points.empty()) {
// pull the next entry point from the back of the vector
S next_entry_point = partial_disassembly.remaining_entry_points.back();
partial_disassembly.remaining_entry_points.pop_back();
// Do a recursive-style disassembly for all current entry points.
while(!partial_disassembly.remaining_entry_points.empty()) {
// Pull the next entry point from the back of the vector.
const S next_entry_point = partial_disassembly.remaining_entry_points.back();
partial_disassembly.remaining_entry_points.pop_back();
// if that address has already been visited, forget about it
if( partial_disassembly.disassembly.instructions_by_address.find(next_entry_point)
!= partial_disassembly.disassembly.instructions_by_address.end()) continue;
// If that address has already been visited, forget about it.
if( partial_disassembly.disassembly.instructions_by_address.find(next_entry_point)
!= partial_disassembly.disassembly.instructions_by_address.end()) continue;
// if it's outgoing, log it as such and forget about it; otherwise disassemble
std::size_t mapped_entry_point = address_mapper(next_entry_point);
if(mapped_entry_point >= memory.size())
partial_disassembly.disassembly.outward_calls.insert(next_entry_point);
else
Disassembler::AddToDisassembly(partial_disassembly, memory, address_mapper, next_entry_point);
// If it's outgoing, log it as such and forget about it; otherwise disassemble.
std::size_t mapped_entry_point = address_mapper(next_entry_point);
if(mapped_entry_point >= memory.size())
partial_disassembly.disassembly.outward_calls.insert(next_entry_point);
else
Disassembler::AddToDisassembly(partial_disassembly, memory, address_mapper, next_entry_point);
}
// If this is not an exhaustive disassembly, that's your lot.
if(!exhaustive) {
break;
}
// Otherwise, find the first area between or just beyond a disassembled range
// that isn't yet disassembled and chuck it onto the list.
for(const auto &pair: partial_disassembly.touched) {
const auto end = pair.second;
if(partial_disassembly.touched.find(end) == partial_disassembly.touched.end()) {
if(address_mapper(end) < memory.size()) {
partial_disassembly.remaining_entry_points.push_back(end);
}
break;
}
}
}
return partial_disassembly.disassembly;

View File

@ -546,6 +546,9 @@ struct Z80Disassembler {
disassembly.disassembly.internal_calls.insert(entry_point);
Accessor accessor(memory, address_mapper, entry_point);
auto &touched = disassembly.touched[entry_point];
touched = entry_point;
while(!accessor.at_end()) {
Instruction instruction;
instruction.address = accessor.address();
@ -558,6 +561,9 @@ struct Z80Disassembler {
// Store the instruction away.
disassembly.disassembly.instructions_by_address[instruction.address] = instruction;
// Apply all touches.
touched = accessor.address();
// Update access tables.
int access_type =
((instruction.source == Instruction::Location::Operand_Indirect) ? 1 : 0) |
@ -611,9 +617,18 @@ struct Z80Disassembler {
} // end of anonymous namespace
Disassembly Analyser::Static::Z80::Disassemble(
const std::vector<uint8_t> &memory,
const std::function<std::size_t(uint16_t)> &address_mapper,
std::vector<uint16_t> entry_points) {
return Analyser::Static::Disassembly::Disassemble<Disassembly, uint16_t, Z80Disassembler>(memory, address_mapper, entry_points);
std::vector<uint16_t> entry_points,
Approach approach)
{
return Analyser::Static::Disassembly::Disassemble<Disassembly, uint16_t, Z80Disassembler>(
memory,
address_mapper,
entry_points,
approach == Approach::Exhaustive
);
}

View File

@ -76,10 +76,19 @@ struct Disassembly {
std::set<uint16_t> internal_stores, internal_loads, internal_modifies;
};
enum class Approach {
/// Disassemble from the supplied entry points until an indeterminate branch or return only, adding other fully-static
/// entry points as they are observed.
Recursive,
/// Disassemble all supplied bytes, regardless of what nonsense may be encountered by accidental parsing of data areas.
Exhaustive,
};
Disassembly Disassemble(
const std::vector<uint8_t> &memory,
const std::function<std::size_t(uint16_t)> &address_mapper,
std::vector<uint16_t> entry_points);
std::vector<uint16_t> entry_points,
Approach approach);
}

View File

@ -119,93 +119,12 @@ static Analyser::Static::TargetList CartridgeTargetsFrom(
Analyser::Static::Z80::Disassemble(
first_8k,
Analyser::Static::Disassembler::OffsetMapper(start_address),
{ init_address }
{ init_address },
Analyser::Static::Z80::Approach::Exhaustive
);
// // Look for a indirect store followed by an unconditional JP or CALL into another
// // segment, that's a fairly explicit sign where found.
using Instruction = Analyser::Static::Z80::Instruction;
std::map<uint16_t, Instruction> &instructions = disassembly.instructions_by_address;
bool is_ascii = false;
// auto iterator = instructions.begin();
// while(iterator != instructions.end()) {
// auto next_iterator = iterator;
// next_iterator++;
// if(next_iterator == instructions.end()) break;
//
// if( iterator->second.operation == Instruction::Operation::LD &&
// iterator->second.destination == Instruction::Location::Operand_Indirect &&
// (
// iterator->second.operand == 0x5000 ||
// iterator->second.operand == 0x6000 ||
// iterator->second.operand == 0x6800 ||
// iterator->second.operand == 0x7000 ||
// iterator->second.operand == 0x77ff ||
// iterator->second.operand == 0x7800 ||
// iterator->second.operand == 0x8000 ||
// iterator->second.operand == 0x9000 ||
// iterator->second.operand == 0xa000
// ) &&
// (
// next_iterator->second.operation == Instruction::Operation::CALL ||
// next_iterator->second.operation == Instruction::Operation::JP
// ) &&
// ((next_iterator->second.operand >> 13) != (0x4000 >> 13))
// ) {
// const uint16_t address = uint16_t(next_iterator->second.operand);
// switch(iterator->second.operand) {
// case 0x6000:
// if(address >= 0x6000 && address < 0x8000) {
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC;
// }
// break;
// case 0x6800:
// if(address >= 0x6000 && address < 0x6800) {
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::ASCII8kb;
// }
// break;
// case 0x7000:
// if(address >= 0x6000 && address < 0x8000) {
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC;
// }
// if(address >= 0x7000 && address < 0x7800) {
// is_ascii = true;
// }
// break;
// case 0x77ff:
// if(address >= 0x7000 && address < 0x7800) {
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::ASCII16kb;
// }
// break;
// case 0x7800:
// if(address >= 0xa000 && address < 0xc000) {
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::ASCII8kb;
// }
// break;
// case 0x8000:
// if(address >= 0x8000 && address < 0xa000) {
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC;
// }
// break;
// case 0x9000:
// if(address >= 0x8000 && address < 0xa000) {
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC;
// }
// break;
// case 0xa000:
// if(address >= 0xa000 && address < 0xc000) {
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::Konami;
// }
// break;
// case 0xb000:
// if(address >= 0xa000 && address < 0xc000) {
// target.msx.cartridge_type = Analyser::Static::MSXCartridgeType::KonamiWithSCC;
// }
// break;
// }
// }
//
// iterator = next_iterator;
const std::map<uint16_t, Instruction> &instructions = disassembly.instructions_by_address;
// Look for LD (nnnn), A instructions, and collate those addresses.
std::map<uint16_t, int> address_counts;
@ -217,49 +136,46 @@ static Analyser::Static::TargetList CartridgeTargetsFrom(
}
}
// Weight confidences by number of observed hits.
float total_hits =
float(
address_counts[0x6000] + address_counts[0x6800] +
address_counts[0x7000] + address_counts[0x7800] +
address_counts[0x77ff] + address_counts[0x8000] +
address_counts[0xa000] + address_counts[0x5000] +
address_counts[0x9000] + address_counts[0xb000]
);
// Weight confidences by number of observed hits; if any is above 60% confidence, just use it.
const auto ascii_8kb_total = address_counts[0x6000] + address_counts[0x6800] + address_counts[0x7000] + address_counts[0x7800];
const auto ascii_16kb_total = address_counts[0x6000] + address_counts[0x7000] + address_counts[0x77ff];
const auto konami_total = address_counts[0x6000] + address_counts[0x8000] + address_counts[0xa000];
const auto konami_with_scc_total = address_counts[0x5000] + address_counts[0x7000] + address_counts[0x9000] + address_counts[0xb000];
targets.push_back(CartridgeTarget(
segment,
start_address,
Analyser::Static::MSX::Cartridge::ASCII8kb,
float( address_counts[0x6000] +
address_counts[0x6800] +
address_counts[0x7000] +
address_counts[0x7800]) / total_hits));
targets.push_back(CartridgeTarget(
segment,
start_address,
Analyser::Static::MSX::Cartridge::ASCII16kb,
float( address_counts[0x6000] +
address_counts[0x7000] +
address_counts[0x77ff]) / total_hits));
if(!is_ascii) {
const auto total_hits = ascii_8kb_total + ascii_16kb_total + konami_total + konami_with_scc_total;
const bool is_ascii_8kb = (ascii_8kb_total * 5) / (total_hits * 3);
const bool is_ascii_16kb = (ascii_16kb_total * 5) / (total_hits * 3);
const bool is_konami = (konami_total * 5) / (total_hits * 3);
const bool is_konami_with_scc = (konami_with_scc_total * 5) / (total_hits * 3);
if(!is_ascii_16kb && !is_konami && !is_konami_with_scc) {
targets.push_back(CartridgeTarget(
segment,
start_address,
Analyser::Static::MSX::Cartridge::ASCII8kb,
float(ascii_8kb_total) / float(total_hits)));
}
if(!is_ascii_8kb && !is_konami && !is_konami_with_scc) {
targets.push_back(CartridgeTarget(
segment,
start_address,
Analyser::Static::MSX::Cartridge::ASCII16kb,
float(ascii_16kb_total) / float(total_hits)));
}
if(!is_ascii_8kb && !is_ascii_16kb && !is_konami_with_scc) {
targets.push_back(CartridgeTarget(
segment,
start_address,
Analyser::Static::MSX::Cartridge::Konami,
float( address_counts[0x6000] +
address_counts[0x8000] +
address_counts[0xa000]) / total_hits));
float(konami_total) / float(total_hits)));
}
if(!is_ascii) {
if(!is_ascii_8kb && !is_ascii_16kb && !is_konami) {
targets.push_back(CartridgeTarget(
segment,
start_address,
Analyser::Static::MSX::Cartridge::KonamiWithSCC,
float( address_counts[0x5000] +
address_counts[0x7000] +
address_counts[0x9000] +
address_counts[0xb000]) / total_hits));
float(konami_with_scc_total) / float(total_hits)));
}
}