From 65a118d1f386858f474c20b90516d3de81592ee2 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Wed, 4 Dec 2024 21:41:05 -0500 Subject: [PATCH] Attempt to locate and disassemble machine code. --- Analyser/Static/Commodore/File.cpp | 47 ------- Analyser/Static/Commodore/File.hpp | 2 - Analyser/Static/Commodore/StaticAnalyser.cpp | 125 ++++++++++++++++-- Analyser/Static/StaticAnalyser.hpp | 2 +- .../Clock Signal.xcodeproj/project.pbxproj | 8 -- Storage/Cartridge/Formats/PRG.cpp | 4 +- 6 files changed, 116 insertions(+), 72 deletions(-) delete mode 100644 Analyser/Static/Commodore/File.cpp diff --git a/Analyser/Static/Commodore/File.cpp b/Analyser/Static/Commodore/File.cpp deleted file mode 100644 index 6bcb9bfa6..000000000 --- a/Analyser/Static/Commodore/File.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// -// File.cpp -// Clock Signal -// -// Created by Thomas Harte on 10/09/2016. -// Copyright 2016 Thomas Harte. All rights reserved. -// - -#include "File.hpp" - -bool Analyser::Static::Commodore::File::is_basic() { - // BASIC files are always relocatable (?) - if(type != File::RelocatableProgram) return false; - - uint16_t line_address = starting_address; - int line_number = -1; - - // decide whether this is a BASIC file based on the proposition that: - // (1) they're always relocatable; and - // (2) they have a per-line structure of: - // [4 bytes: address of start of next line] - // [4 bytes: this line number] - // ... null-terminated code ... - // (with a next line address of 0000 indicating end of program) - while(1) { - if(size_t(line_address - starting_address) + 1 >= data.size()) break; - - uint16_t next_line_address = data[line_address - starting_address]; - next_line_address |= data[line_address - starting_address + 1] << 8; - - if(!next_line_address) { - return true; - } - if(next_line_address < line_address + 5) break; - - if(size_t(line_address - starting_address) + 3 >= data.size()) break; - uint16_t next_line_number = data[line_address - starting_address + 2]; - next_line_number |= data[line_address - starting_address + 3] << 8; - - if(next_line_number <= line_number) break; - - line_number = uint16_t(next_line_number); - line_address = next_line_address; - } - - return false; -} diff --git a/Analyser/Static/Commodore/File.hpp b/Analyser/Static/Commodore/File.hpp index e84b00d50..35394ce11 100644 --- a/Analyser/Static/Commodore/File.hpp +++ b/Analyser/Static/Commodore/File.hpp @@ -29,8 +29,6 @@ struct File { Relative } type; std::vector data; - - bool is_basic(); }; } diff --git a/Analyser/Static/Commodore/StaticAnalyser.cpp b/Analyser/Static/Commodore/StaticAnalyser.cpp index dbe78bbc5..f4da89d1e 100644 --- a/Analyser/Static/Commodore/StaticAnalyser.cpp +++ b/Analyser/Static/Commodore/StaticAnalyser.cpp @@ -15,23 +15,28 @@ #include "../../../Storage/Cartridge/Encodings/CommodoreROM.hpp" #include "../../../Outputs/Log.hpp" +#include "../Disassembler/6502.hpp" +#include "../Disassembler/AddressMapper.hpp" + #include #include #include using namespace Analyser::Static::Commodore; -static std::vector> +namespace { + +std::vector> Vic20CartridgesFrom(const std::vector> &cartridges) { std::vector> vic20_cartridges; for(const auto &cartridge : cartridges) { const auto &segments = cartridge->get_segments(); - // only one mapped item is allowed + // Only one mapped item is allowed ... if(segments.size() != 1) continue; - // which must be 16 kb in size + // ... which must be 16 kb in size. Storage::Cartridge::Cartridge::Segment segment = segments.front(); if(segment.start_address != 0xa000) continue; if(!Storage::Cartridge::Encodings::CommodoreROM::isROM(segment.data)) continue; @@ -39,28 +44,111 @@ Vic20CartridgesFrom(const std::vector machine_code_addresses; +}; + +std::optional analyse(const File &file) { + // Accept only 'program' types. + if(file.type != File::RelocatableProgram && file.type != File::NonRelocatableProgram) { + return std::nullopt; + } + + uint16_t line_address = file.starting_address; + int previous_line_number = -1; + + const auto byte = [&](uint16_t address) { + return file.data[address - file.starting_address]; + }; + const auto word = [&](uint16_t address) { + return uint16_t(byte(address) | byte(address + 1) << 8); + }; + + // BASIC programs have a per-line structure of: + // [2 bytes: address of start of next line] + // [2 bytes: this line number] + // ... null-terminated code ... + // (with a next line address of 0000 indicating end of program) + // + // If a SYS is encountered that jumps into the BASIC program then treat that as + // a machine code entry point. + + BASICAnalysis analysis; + while(true) { + // Analysis has failed if there isn't at least one complete BASIC line from here. + if(size_t(line_address - file.starting_address) + 5 >= file.data.size()) { + return std::nullopt; + } + + const auto next_line_address = word(line_address); + const auto line_number = word(line_address + 2); + + uint16_t code = line_address + 4; + const auto next = [&]() -> uint8_t { + if(code >= file.starting_address + file.data.size()) { + return 0; + } + return byte(code++); + }; + + while(true) { + const auto token = next(); + if(!token) break; + + switch(token) { + case 0x9e: { // SYS; parse following ASCII argument. + uint16_t address = 0; + while(true) { + const auto c = next(); + if(c < '0' || c > '9') { + break; + } + address = (address * 10) + (c - '0'); + }; + analysis.machine_code_addresses.push_back(address); + } break; + } + } + + if(!next_line_address) { + break; + } + + previous_line_number = line_number; + line_address = next_line_address; + } + + return analysis; +} + +} + Analyser::Static::TargetList Analyser::Static::Commodore::GetTargets( const Media &media, const std::string &file_name, TargetPlatform::IntType ) { TargetList destination; - auto target = std::make_unique(); - target->machine = Machine::Vic20; // TODO: machine estimation - target->confidence = 0.5; // TODO: a proper estimation int device = 0; std::vector files; bool is_disk = false; - // strip out inappropriate cartridges + // Strip out inappropriate cartridges. target->media.cartridges = Vic20CartridgesFrom(media.cartridges); - // check disks + // Find all valid Commodore files on disks. for(auto &disk : media.disks) { std::vector disk_files = GetFiles(disk); if(!disk_files.empty()) { @@ -71,7 +159,7 @@ Analyser::Static::TargetList Analyser::Static::Commodore::GetTargets( } } - // check tapes + // Find all valid Commodore files on tapes. for(auto &tape : media.tapes) { std::vector tape_files = GetFiles(tape); tape->reset(); @@ -82,15 +170,28 @@ Analyser::Static::TargetList Analyser::Static::Commodore::GetTargets( } } + // Inspect discovered files to try to divine machine and memory model. if(!files.empty()) { + const auto &file = files.front(); + auto memory_model = Target::MemoryModel::Unexpanded; std::ostringstream string_stream; string_stream << "LOAD\"" << (is_disk ? "*" : "") << "\"," << device << ","; - if(files.front().is_basic()) { - string_stream << "0"; - } else { + + const auto analysis = analyse(file); + if(!analysis->machine_code_addresses.empty()) { string_stream << "1"; + + const auto disassembly = Analyser::Static::MOS6502::Disassemble( + file.data, + Analyser::Static::Disassembler::OffsetMapper(file.starting_address), + analysis->machine_code_addresses + ); + // TODO: disassemble. + + printf(""); } + string_stream << "\nRUN\n"; target->loading_command = string_stream.str(); diff --git a/Analyser/Static/StaticAnalyser.hpp b/Analyser/Static/StaticAnalyser.hpp index 8e8a1648a..acfe1d9f9 100644 --- a/Analyser/Static/StaticAnalyser.hpp +++ b/Analyser/Static/StaticAnalyser.hpp @@ -64,7 +64,7 @@ struct Target { Machine machine; Media media; - float confidence = 0.0f; + float confidence = 0.5f; }; typedef std::vector> TargetList; diff --git a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj index aae7c1a32..2eb27a5c2 100644 --- a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj +++ b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj @@ -565,7 +565,6 @@ 4B778F5E23A5F3230000D260 /* Oric.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B8805F91DCFF807003085B1 /* Oric.cpp */; }; 4B778F6023A5F3460000D260 /* Disk.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B8944EC201967B4007DE474 /* Disk.cpp */; }; 4B778F6123A5F3560000D260 /* Disk.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B8944FC201967B4007DE474 /* Disk.cpp */; }; - 4B778F6223A5F35F0000D260 /* File.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B894500201967B4007DE474 /* File.cpp */; }; 4B778F6323A5F3630000D260 /* Tape.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B894501201967B4007DE474 /* Tape.cpp */; }; 4B7962A02819681F008130F9 /* Decoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B79629F2819681F008130F9 /* Decoder.cpp */; }; 4B7962A12819681F008130F9 /* Decoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B79629F2819681F008130F9 /* Decoder.cpp */; }; @@ -639,8 +638,6 @@ 4B894527201967B4007DE474 /* StaticAnalyser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B8944FA201967B4007DE474 /* StaticAnalyser.cpp */; }; 4B894528201967B4007DE474 /* Disk.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B8944FC201967B4007DE474 /* Disk.cpp */; }; 4B894529201967B4007DE474 /* Disk.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B8944FC201967B4007DE474 /* Disk.cpp */; }; - 4B89452A201967B4007DE474 /* File.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B894500201967B4007DE474 /* File.cpp */; }; - 4B89452B201967B4007DE474 /* File.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B894500201967B4007DE474 /* File.cpp */; }; 4B89452C201967B4007DE474 /* Tape.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B894501201967B4007DE474 /* Tape.cpp */; }; 4B89452D201967B4007DE474 /* Tape.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B894501201967B4007DE474 /* Tape.cpp */; }; 4B89452E201967B4007DE474 /* StaticAnalyser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B894503201967B4007DE474 /* StaticAnalyser.cpp */; }; @@ -1756,7 +1753,6 @@ 4B8944FD201967B4007DE474 /* StaticAnalyser.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = StaticAnalyser.hpp; sourceTree = ""; }; 4B8944FE201967B4007DE474 /* File.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = File.hpp; sourceTree = ""; }; 4B8944FF201967B4007DE474 /* Tape.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = Tape.hpp; sourceTree = ""; }; - 4B894500201967B4007DE474 /* File.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = File.cpp; sourceTree = ""; }; 4B894501201967B4007DE474 /* Tape.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Tape.cpp; sourceTree = ""; }; 4B894502201967B4007DE474 /* Disk.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = Disk.hpp; sourceTree = ""; }; 4B894503201967B4007DE474 /* StaticAnalyser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = StaticAnalyser.cpp; sourceTree = ""; }; @@ -3810,7 +3806,6 @@ isa = PBXGroup; children = ( 4B8944FC201967B4007DE474 /* Disk.cpp */, - 4B894500201967B4007DE474 /* File.cpp */, 4B894503201967B4007DE474 /* StaticAnalyser.cpp */, 4B894501201967B4007DE474 /* Tape.cpp */, 4B894502201967B4007DE474 /* Disk.hpp */, @@ -6006,7 +6001,6 @@ 4B055AA31FAE85DF0060FFFF /* ImplicitSectors.cpp in Sources */, 4B8318B322D3E540006DB630 /* Audio.cpp in Sources */, 4B055AAE1FAE85FD0060FFFF /* TrackSerialiser.cpp in Sources */, - 4B89452B201967B4007DE474 /* File.cpp in Sources */, 4B6AAEAC230E40250078E864 /* SCSI.cpp in Sources */, 4B055A981FAE85C50060FFFF /* Drive.cpp in Sources */, 4BD424E62193B5830097291A /* Shader.cpp in Sources */, @@ -6331,7 +6325,6 @@ 4B228CD524D773B40077EF25 /* CSScanTarget.mm in Sources */, 4BCD634922D6756400F567F1 /* MacintoshDoubleDensityDrive.cpp in Sources */, 4B0F94FE208C1A1600FE41D9 /* NIB.cpp in Sources */, - 4B89452A201967B4007DE474 /* File.cpp in Sources */, 4BC080D026A257A200D03FD8 /* StaticAnalyser.cpp in Sources */, 4B4DC8211D2C2425003C5BF8 /* Vic20.cpp in Sources */, 4B71368E1F788112008B8ED9 /* Parser.cpp in Sources */, @@ -6581,7 +6574,6 @@ 4BEDA3BB25B25563000C2DBD /* Decoder.cpp in Sources */, 4B778F2423A5EDEE0000D260 /* PRG.cpp in Sources */, 4B778F5A23A5F2D50000D260 /* 6502.cpp in Sources */, - 4B778F6223A5F35F0000D260 /* File.cpp in Sources */, 4B06AB0F2C6461780034D014 /* MultiProducer.cpp in Sources */, 4B778F3523A5F1040000D260 /* SCSI.cpp in Sources */, 4BD388882239E198002D14B5 /* 68000Tests.mm in Sources */, diff --git a/Storage/Cartridge/Formats/PRG.cpp b/Storage/Cartridge/Formats/PRG.cpp index 0bcc4af28..de94446ac 100644 --- a/Storage/Cartridge/Formats/PRG.cpp +++ b/Storage/Cartridge/Formats/PRG.cpp @@ -29,11 +29,11 @@ PRG::PRG(const std::string &file_name) { int loading_address = fgetc(file); loading_address |= fgetc(file) << 8; - std::size_t data_length = size_t(file_stats.st_size) - 2; + const std::size_t data_length = size_t(file_stats.st_size) - 2; std::size_t padded_data_length = 1; while(padded_data_length < data_length) padded_data_length <<= 1; std::vector contents(padded_data_length); - std::size_t length = std::fread(contents.data(), 1, size_t(data_length), file); + const std::size_t length = std::fread(contents.data(), 1, size_t(data_length), file); std::fclose(file); // accept only files intended to load at 0xa000