1
0
mirror of https://github.com/TomHarte/CLK.git synced 2024-12-13 00:29:14 +00:00

Attempt to locate and disassemble machine code.

This commit is contained in:
Thomas Harte 2024-12-04 21:41:05 -05:00
parent 3d2eefc7e7
commit 65a118d1f3
6 changed files with 116 additions and 72 deletions

View File

@ -1,47 +0,0 @@
//
// File.cpp
// Clock Signal
//
// Created by Thomas Harte on 10/09/2016.
// Copyright 2016 Thomas Harte. All rights reserved.
//
#include "File.hpp"
bool Analyser::Static::Commodore::File::is_basic() {
// BASIC files are always relocatable (?)
if(type != File::RelocatableProgram) return false;
uint16_t line_address = starting_address;
int line_number = -1;
// decide whether this is a BASIC file based on the proposition that:
// (1) they're always relocatable; and
// (2) they have a per-line structure of:
// [4 bytes: address of start of next line]
// [4 bytes: this line number]
// ... null-terminated code ...
// (with a next line address of 0000 indicating end of program)
while(1) {
if(size_t(line_address - starting_address) + 1 >= data.size()) break;
uint16_t next_line_address = data[line_address - starting_address];
next_line_address |= data[line_address - starting_address + 1] << 8;
if(!next_line_address) {
return true;
}
if(next_line_address < line_address + 5) break;
if(size_t(line_address - starting_address) + 3 >= data.size()) break;
uint16_t next_line_number = data[line_address - starting_address + 2];
next_line_number |= data[line_address - starting_address + 3] << 8;
if(next_line_number <= line_number) break;
line_number = uint16_t(next_line_number);
line_address = next_line_address;
}
return false;
}

View File

@ -29,8 +29,6 @@ struct File {
Relative
} type;
std::vector<uint8_t> data;
bool is_basic();
};
}

View File

@ -15,23 +15,28 @@
#include "../../../Storage/Cartridge/Encodings/CommodoreROM.hpp"
#include "../../../Outputs/Log.hpp"
#include "../Disassembler/6502.hpp"
#include "../Disassembler/AddressMapper.hpp"
#include <algorithm>
#include <cstring>
#include <sstream>
using namespace Analyser::Static::Commodore;
static std::vector<std::shared_ptr<Storage::Cartridge::Cartridge>>
namespace {
std::vector<std::shared_ptr<Storage::Cartridge::Cartridge>>
Vic20CartridgesFrom(const std::vector<std::shared_ptr<Storage::Cartridge::Cartridge>> &cartridges) {
std::vector<std::shared_ptr<Storage::Cartridge::Cartridge>> vic20_cartridges;
for(const auto &cartridge : cartridges) {
const auto &segments = cartridge->get_segments();
// only one mapped item is allowed
// Only one mapped item is allowed ...
if(segments.size() != 1) continue;
// which must be 16 kb in size
// ... which must be 16 kb in size.
Storage::Cartridge::Cartridge::Segment segment = segments.front();
if(segment.start_address != 0xa000) continue;
if(!Storage::Cartridge::Encodings::CommodoreROM::isROM(segment.data)) continue;
@ -39,28 +44,111 @@ Vic20CartridgesFrom(const std::vector<std::shared_ptr<Storage::Cartridge::Cartri
vic20_cartridges.push_back(cartridge);
}
// TODO: other machines?
return vic20_cartridges;
}
struct BASICAnalysis {
enum class Version {
BASIC2,
BASIC4,
BASIC3_5,
} minimum_version = Version::BASIC2;
std::vector<uint16_t> machine_code_addresses;
};
std::optional<BASICAnalysis> analyse(const File &file) {
// Accept only 'program' types.
if(file.type != File::RelocatableProgram && file.type != File::NonRelocatableProgram) {
return std::nullopt;
}
uint16_t line_address = file.starting_address;
int previous_line_number = -1;
const auto byte = [&](uint16_t address) {
return file.data[address - file.starting_address];
};
const auto word = [&](uint16_t address) {
return uint16_t(byte(address) | byte(address + 1) << 8);
};
// BASIC programs have a per-line structure of:
// [2 bytes: address of start of next line]
// [2 bytes: this line number]
// ... null-terminated code ...
// (with a next line address of 0000 indicating end of program)
//
// If a SYS is encountered that jumps into the BASIC program then treat that as
// a machine code entry point.
BASICAnalysis analysis;
while(true) {
// Analysis has failed if there isn't at least one complete BASIC line from here.
if(size_t(line_address - file.starting_address) + 5 >= file.data.size()) {
return std::nullopt;
}
const auto next_line_address = word(line_address);
const auto line_number = word(line_address + 2);
uint16_t code = line_address + 4;
const auto next = [&]() -> uint8_t {
if(code >= file.starting_address + file.data.size()) {
return 0;
}
return byte(code++);
};
while(true) {
const auto token = next();
if(!token) break;
switch(token) {
case 0x9e: { // SYS; parse following ASCII argument.
uint16_t address = 0;
while(true) {
const auto c = next();
if(c < '0' || c > '9') {
break;
}
address = (address * 10) + (c - '0');
};
analysis.machine_code_addresses.push_back(address);
} break;
}
}
if(!next_line_address) {
break;
}
previous_line_number = line_number;
line_address = next_line_address;
}
return analysis;
}
}
Analyser::Static::TargetList Analyser::Static::Commodore::GetTargets(
const Media &media,
const std::string &file_name,
TargetPlatform::IntType
) {
TargetList destination;
auto target = std::make_unique<Target>();
target->machine = Machine::Vic20; // TODO: machine estimation
target->confidence = 0.5; // TODO: a proper estimation
int device = 0;
std::vector<File> files;
bool is_disk = false;
// strip out inappropriate cartridges
// Strip out inappropriate cartridges.
target->media.cartridges = Vic20CartridgesFrom(media.cartridges);
// check disks
// Find all valid Commodore files on disks.
for(auto &disk : media.disks) {
std::vector<File> disk_files = GetFiles(disk);
if(!disk_files.empty()) {
@ -71,7 +159,7 @@ Analyser::Static::TargetList Analyser::Static::Commodore::GetTargets(
}
}
// check tapes
// Find all valid Commodore files on tapes.
for(auto &tape : media.tapes) {
std::vector<File> tape_files = GetFiles(tape);
tape->reset();
@ -82,15 +170,28 @@ Analyser::Static::TargetList Analyser::Static::Commodore::GetTargets(
}
}
// Inspect discovered files to try to divine machine and memory model.
if(!files.empty()) {
const auto &file = files.front();
auto memory_model = Target::MemoryModel::Unexpanded;
std::ostringstream string_stream;
string_stream << "LOAD\"" << (is_disk ? "*" : "") << "\"," << device << ",";
if(files.front().is_basic()) {
string_stream << "0";
} else {
const auto analysis = analyse(file);
if(!analysis->machine_code_addresses.empty()) {
string_stream << "1";
const auto disassembly = Analyser::Static::MOS6502::Disassemble(
file.data,
Analyser::Static::Disassembler::OffsetMapper(file.starting_address),
analysis->machine_code_addresses
);
// TODO: disassemble.
printf("");
}
string_stream << "\nRUN\n";
target->loading_command = string_stream.str();

View File

@ -64,7 +64,7 @@ struct Target {
Machine machine;
Media media;
float confidence = 0.0f;
float confidence = 0.5f;
};
typedef std::vector<std::unique_ptr<Target>> TargetList;

View File

@ -565,7 +565,6 @@
4B778F5E23A5F3230000D260 /* Oric.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B8805F91DCFF807003085B1 /* Oric.cpp */; };
4B778F6023A5F3460000D260 /* Disk.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B8944EC201967B4007DE474 /* Disk.cpp */; };
4B778F6123A5F3560000D260 /* Disk.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B8944FC201967B4007DE474 /* Disk.cpp */; };
4B778F6223A5F35F0000D260 /* File.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B894500201967B4007DE474 /* File.cpp */; };
4B778F6323A5F3630000D260 /* Tape.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B894501201967B4007DE474 /* Tape.cpp */; };
4B7962A02819681F008130F9 /* Decoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B79629F2819681F008130F9 /* Decoder.cpp */; };
4B7962A12819681F008130F9 /* Decoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B79629F2819681F008130F9 /* Decoder.cpp */; };
@ -639,8 +638,6 @@
4B894527201967B4007DE474 /* StaticAnalyser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B8944FA201967B4007DE474 /* StaticAnalyser.cpp */; };
4B894528201967B4007DE474 /* Disk.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B8944FC201967B4007DE474 /* Disk.cpp */; };
4B894529201967B4007DE474 /* Disk.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B8944FC201967B4007DE474 /* Disk.cpp */; };
4B89452A201967B4007DE474 /* File.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B894500201967B4007DE474 /* File.cpp */; };
4B89452B201967B4007DE474 /* File.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B894500201967B4007DE474 /* File.cpp */; };
4B89452C201967B4007DE474 /* Tape.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B894501201967B4007DE474 /* Tape.cpp */; };
4B89452D201967B4007DE474 /* Tape.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B894501201967B4007DE474 /* Tape.cpp */; };
4B89452E201967B4007DE474 /* StaticAnalyser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B894503201967B4007DE474 /* StaticAnalyser.cpp */; };
@ -1756,7 +1753,6 @@
4B8944FD201967B4007DE474 /* StaticAnalyser.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = StaticAnalyser.hpp; sourceTree = "<group>"; };
4B8944FE201967B4007DE474 /* File.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = File.hpp; sourceTree = "<group>"; };
4B8944FF201967B4007DE474 /* Tape.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = Tape.hpp; sourceTree = "<group>"; };
4B894500201967B4007DE474 /* File.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = File.cpp; sourceTree = "<group>"; };
4B894501201967B4007DE474 /* Tape.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Tape.cpp; sourceTree = "<group>"; };
4B894502201967B4007DE474 /* Disk.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = Disk.hpp; sourceTree = "<group>"; };
4B894503201967B4007DE474 /* StaticAnalyser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = StaticAnalyser.cpp; sourceTree = "<group>"; };
@ -3810,7 +3806,6 @@
isa = PBXGroup;
children = (
4B8944FC201967B4007DE474 /* Disk.cpp */,
4B894500201967B4007DE474 /* File.cpp */,
4B894503201967B4007DE474 /* StaticAnalyser.cpp */,
4B894501201967B4007DE474 /* Tape.cpp */,
4B894502201967B4007DE474 /* Disk.hpp */,
@ -6006,7 +6001,6 @@
4B055AA31FAE85DF0060FFFF /* ImplicitSectors.cpp in Sources */,
4B8318B322D3E540006DB630 /* Audio.cpp in Sources */,
4B055AAE1FAE85FD0060FFFF /* TrackSerialiser.cpp in Sources */,
4B89452B201967B4007DE474 /* File.cpp in Sources */,
4B6AAEAC230E40250078E864 /* SCSI.cpp in Sources */,
4B055A981FAE85C50060FFFF /* Drive.cpp in Sources */,
4BD424E62193B5830097291A /* Shader.cpp in Sources */,
@ -6331,7 +6325,6 @@
4B228CD524D773B40077EF25 /* CSScanTarget.mm in Sources */,
4BCD634922D6756400F567F1 /* MacintoshDoubleDensityDrive.cpp in Sources */,
4B0F94FE208C1A1600FE41D9 /* NIB.cpp in Sources */,
4B89452A201967B4007DE474 /* File.cpp in Sources */,
4BC080D026A257A200D03FD8 /* StaticAnalyser.cpp in Sources */,
4B4DC8211D2C2425003C5BF8 /* Vic20.cpp in Sources */,
4B71368E1F788112008B8ED9 /* Parser.cpp in Sources */,
@ -6581,7 +6574,6 @@
4BEDA3BB25B25563000C2DBD /* Decoder.cpp in Sources */,
4B778F2423A5EDEE0000D260 /* PRG.cpp in Sources */,
4B778F5A23A5F2D50000D260 /* 6502.cpp in Sources */,
4B778F6223A5F35F0000D260 /* File.cpp in Sources */,
4B06AB0F2C6461780034D014 /* MultiProducer.cpp in Sources */,
4B778F3523A5F1040000D260 /* SCSI.cpp in Sources */,
4BD388882239E198002D14B5 /* 68000Tests.mm in Sources */,

View File

@ -29,11 +29,11 @@ PRG::PRG(const std::string &file_name) {
int loading_address = fgetc(file);
loading_address |= fgetc(file) << 8;
std::size_t data_length = size_t(file_stats.st_size) - 2;
const std::size_t data_length = size_t(file_stats.st_size) - 2;
std::size_t padded_data_length = 1;
while(padded_data_length < data_length) padded_data_length <<= 1;
std::vector<uint8_t> contents(padded_data_length);
std::size_t length = std::fread(contents.data(), 1, size_t(data_length), file);
const std::size_t length = std::fread(contents.data(), 1, size_t(data_length), file);
std::fclose(file);
// accept only files intended to load at 0xa000