From 1c255b9e7dfced356685d1823056dc266341b684 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Sun, 31 Dec 2017 18:49:35 -0500 Subject: [PATCH 1/4] Generalises some of the disassembler, and provides Z80 logic to create a [first attempt at a] Z80 disassembler. --- .../Clock Signal.xcodeproj/project.pbxproj | 30 +- StaticAnalyser/Atari/StaticAnalyser.cpp | 2 +- .../{Disassembler6502.cpp => 6502.cpp} | 49 +- StaticAnalyser/Disassembler/6502.hpp | 99 +++ StaticAnalyser/Disassembler/AddressMapper.cpp | 9 + StaticAnalyser/Disassembler/AddressMapper.hpp | 30 + .../Disassembler/Disassembler6502.hpp | 79 --- StaticAnalyser/Disassembler/Kernel.hpp | 50 ++ StaticAnalyser/Disassembler/Z80.cpp | 619 ++++++++++++++++++ StaticAnalyser/Disassembler/Z80.hpp | 87 +++ StaticAnalyser/Oric/StaticAnalyser.cpp | 5 +- 11 files changed, 936 insertions(+), 123 deletions(-) rename StaticAnalyser/Disassembler/{Disassembler6502.cpp => 6502.cpp} (87%) create mode 100644 StaticAnalyser/Disassembler/6502.hpp create mode 100644 StaticAnalyser/Disassembler/AddressMapper.cpp create mode 100644 StaticAnalyser/Disassembler/AddressMapper.hpp delete mode 100644 StaticAnalyser/Disassembler/Disassembler6502.hpp create mode 100644 StaticAnalyser/Disassembler/Kernel.hpp create mode 100644 StaticAnalyser/Disassembler/Z80.cpp create mode 100644 StaticAnalyser/Disassembler/Z80.hpp diff --git a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj index d8ccf7d1f..f9e560b36 100644 --- a/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj +++ b/OSBindings/Mac/Clock Signal.xcodeproj/project.pbxproj @@ -20,7 +20,7 @@ 4B055A851FAE85480060FFFF /* File.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4BE77A2C1D84ADFB00BC3827 /* File.cpp */; }; 4B055A861FAE854C0060FFFF /* StaticAnalyser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4BC5E4901D7ED365008CF980 /* StaticAnalyser.cpp */; }; 4B055A871FAE854F0060FFFF /* Tape.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4BC830CF1D6E7C690000A26F /* Tape.cpp */; }; - 4B055A881FAE85530060FFFF /* Disassembler6502.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B5A12551DD55862007A2231 /* Disassembler6502.cpp */; }; + 4B055A881FAE85530060FFFF /* 6502.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B5A12551DD55862007A2231 /* 6502.cpp */; }; 4B055A891FAE85580060FFFF /* StaticAnalyser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4BCF1FA91DADD41B0039D2E7 /* StaticAnalyser.cpp */; }; 4B055A8A1FAE855B0060FFFF /* Tape.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B8805FC1DD02552003085B1 /* Tape.cpp */; }; 4B055A8B1FAE85670060FFFF /* StaticAnalyser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B1497891EE4AC5E00CE2596 /* StaticAnalyser.cpp */; }; @@ -219,7 +219,7 @@ 4B55CE5F1C3B7D960093A61B /* MachineDocument.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4B55CE5E1C3B7D960093A61B /* MachineDocument.swift */; }; 4B58601E1F806AB200AEE2E3 /* MFMSectorDump.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B58601C1F806AB200AEE2E3 /* MFMSectorDump.cpp */; }; 4B59199C1DAC6C46005BB85C /* OricTAP.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B59199A1DAC6C46005BB85C /* OricTAP.cpp */; }; - 4B5A12571DD55862007A2231 /* Disassembler6502.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B5A12551DD55862007A2231 /* Disassembler6502.cpp */; }; + 4B5A12571DD55862007A2231 /* 6502.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B5A12551DD55862007A2231 /* 6502.cpp */; }; 4B5FADBA1DE3151600AEC565 /* FileHolder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B5FADB81DE3151600AEC565 /* FileHolder.cpp */; }; 4B5FADC01DE3BF2B00AEC565 /* Microdisc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B5FADBE1DE3BF2B00AEC565 /* Microdisc.cpp */; }; 4B643F3A1D77AD1900D431D6 /* CSStaticAnalyser.mm in Sources */ = {isa = PBXBuildFile; fileRef = 4B643F391D77AD1900D431D6 /* CSStaticAnalyser.mm */; }; @@ -267,6 +267,8 @@ 4B92EACA1B7C112B00246143 /* 6502TimingTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4B92EAC91B7C112B00246143 /* 6502TimingTests.swift */; }; 4B95FA9D1F11893B0008E395 /* ZX8081OptionsPanel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4B95FA9C1F11893B0008E395 /* ZX8081OptionsPanel.swift */; }; 4B96F7221D75119A0058BB2D /* Tape.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B96F7201D75119A0058BB2D /* Tape.cpp */; }; + 4B9C9D751FF81CC00030A129 /* Z80.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B9C9D731FF81CC00030A129 /* Z80.cpp */; }; + 4B9C9D781FF81ED30030A129 /* AddressMapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4B9C9D761FF81ED30030A129 /* AddressMapper.cpp */; }; 4B9CCDA11DA279CA0098B625 /* Vic20OptionsPanel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4B9CCDA01DA279CA0098B625 /* Vic20OptionsPanel.swift */; }; 4BA0F68E1EEA0E8400E9489E /* ZX8081.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4BA0F68C1EEA0E8400E9489E /* ZX8081.cpp */; }; 4BA22B071D8817CE0008C640 /* Disk.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4BA22B051D8817CE0008C640 /* Disk.cpp */; }; @@ -795,6 +797,7 @@ 4B5073051DDD3B9400C48FBD /* ArrayBuilder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ArrayBuilder.cpp; sourceTree = ""; }; 4B5073061DDD3B9400C48FBD /* ArrayBuilder.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = ArrayBuilder.hpp; sourceTree = ""; }; 4B5073091DDFCFDF00C48FBD /* ArrayBuilderTests.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = ArrayBuilderTests.mm; sourceTree = ""; }; + 4B5342211FF9A30800D42660 /* Kernel.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = Kernel.hpp; path = ../../StaticAnalyser/Disassembler/Kernel.hpp; sourceTree = ""; }; 4B54C0BB1F8D8E790050900F /* KeyboardMachine.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = KeyboardMachine.cpp; sourceTree = ""; }; 4B54C0BD1F8D8F450050900F /* Keyboard.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = Keyboard.cpp; path = Oric/Keyboard.cpp; sourceTree = ""; }; 4B54C0BE1F8D8F450050900F /* Keyboard.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = Keyboard.hpp; path = Oric/Keyboard.hpp; sourceTree = ""; }; @@ -813,8 +816,8 @@ 4B58601D1F806AB200AEE2E3 /* MFMSectorDump.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; path = MFMSectorDump.hpp; sourceTree = ""; }; 4B59199A1DAC6C46005BB85C /* OricTAP.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = OricTAP.cpp; sourceTree = ""; }; 4B59199B1DAC6C46005BB85C /* OricTAP.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = OricTAP.hpp; sourceTree = ""; }; - 4B5A12551DD55862007A2231 /* Disassembler6502.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Disassembler6502.cpp; path = ../../StaticAnalyser/Disassembler/Disassembler6502.cpp; sourceTree = ""; }; - 4B5A12561DD55862007A2231 /* Disassembler6502.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = Disassembler6502.hpp; path = ../../StaticAnalyser/Disassembler/Disassembler6502.hpp; sourceTree = ""; }; + 4B5A12551DD55862007A2231 /* 6502.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = 6502.cpp; path = ../../StaticAnalyser/Disassembler/6502.cpp; sourceTree = ""; }; + 4B5A12561DD55862007A2231 /* 6502.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = 6502.hpp; path = ../../StaticAnalyser/Disassembler/6502.hpp; sourceTree = ""; }; 4B5FADB81DE3151600AEC565 /* FileHolder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FileHolder.cpp; sourceTree = ""; }; 4B5FADB91DE3151600AEC565 /* FileHolder.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = FileHolder.hpp; sourceTree = ""; }; 4B5FADBE1DE3BF2B00AEC565 /* Microdisc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Microdisc.cpp; path = Oric/Microdisc.cpp; sourceTree = ""; }; @@ -898,6 +901,10 @@ 4B95FA9C1F11893B0008E395 /* ZX8081OptionsPanel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ZX8081OptionsPanel.swift; sourceTree = ""; }; 4B96F7201D75119A0058BB2D /* Tape.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Tape.cpp; path = ../../StaticAnalyser/Acorn/Tape.cpp; sourceTree = ""; }; 4B96F7211D75119A0058BB2D /* Tape.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = Tape.hpp; path = ../../StaticAnalyser/Acorn/Tape.hpp; sourceTree = ""; }; + 4B9C9D731FF81CC00030A129 /* Z80.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = Z80.cpp; path = ../../StaticAnalyser/Disassembler/Z80.cpp; sourceTree = ""; }; + 4B9C9D741FF81CC00030A129 /* Z80.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = Z80.hpp; path = ../../StaticAnalyser/Disassembler/Z80.hpp; sourceTree = ""; }; + 4B9C9D761FF81ED30030A129 /* AddressMapper.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = AddressMapper.cpp; path = ../../StaticAnalyser/Disassembler/AddressMapper.cpp; sourceTree = ""; }; + 4B9C9D771FF81ED30030A129 /* AddressMapper.hpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.h; name = AddressMapper.hpp; path = ../../StaticAnalyser/Disassembler/AddressMapper.hpp; sourceTree = ""; }; 4B9CCDA01DA279CA0098B625 /* Vic20OptionsPanel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Vic20OptionsPanel.swift; sourceTree = ""; }; 4BA0F68C1EEA0E8400E9489E /* ZX8081.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ZX8081.cpp; path = Data/ZX8081.cpp; sourceTree = ""; }; 4BA0F68D1EEA0E8400E9489E /* ZX8081.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = ZX8081.hpp; path = Data/ZX8081.hpp; sourceTree = ""; }; @@ -1832,8 +1839,13 @@ 4B5A12581DD55873007A2231 /* Disassembler */ = { isa = PBXGroup; children = ( - 4B5A12551DD55862007A2231 /* Disassembler6502.cpp */, - 4B5A12561DD55862007A2231 /* Disassembler6502.hpp */, + 4B5A12551DD55862007A2231 /* 6502.cpp */, + 4B9C9D761FF81ED30030A129 /* AddressMapper.cpp */, + 4B9C9D731FF81CC00030A129 /* Z80.cpp */, + 4B5A12561DD55862007A2231 /* 6502.hpp */, + 4B9C9D771FF81ED30030A129 /* AddressMapper.hpp */, + 4B5342211FF9A30800D42660 /* Kernel.hpp */, + 4B9C9D741FF81CC00030A129 /* Z80.hpp */, ); name = Disassembler; sourceTree = ""; @@ -3287,7 +3299,7 @@ 4B055AB31FAE860F0060FFFF /* CSW.cpp in Sources */, 4B055ACF1FAE9B030060FFFF /* SoundGenerator.cpp in Sources */, 4B055AEE1FAE9BBF0060FFFF /* Keyboard.cpp in Sources */, - 4B055A881FAE85530060FFFF /* Disassembler6502.cpp in Sources */, + 4B055A881FAE85530060FFFF /* 6502.cpp in Sources */, 4B055AED1FAE9BA20060FFFF /* Z80Storage.cpp in Sources */, 4B055AD11FAE9B030060FFFF /* Video.cpp in Sources */, 4B055AA21FAE85DA0060FFFF /* SSD.cpp in Sources */, @@ -3393,6 +3405,7 @@ 4BA799951D8B656E0045123D /* StaticAnalyser.cpp in Sources */, 4B54C0BF1F8D8F450050900F /* Keyboard.cpp in Sources */, 4B3FE75E1F3CF68B00448EE4 /* CPM.cpp in Sources */, + 4B9C9D781FF81ED30030A129 /* AddressMapper.cpp in Sources */, 4B2BFDB21DAEF5FF001A68B8 /* Video.cpp in Sources */, 4B4DC82B1D2C27A4003C5BF8 /* SerialBus.cpp in Sources */, 4BBFFEE61F7B27F1005F3FEB /* TrackSerialiser.cpp in Sources */, @@ -3406,6 +3419,7 @@ 4B4518811F75E91A00926311 /* PCMPatchedTrack.cpp in Sources */, 4B8805F71DCFF6C9003085B1 /* Commodore.cpp in Sources */, 4BBF99181C8FBA6F0075DAFB /* TextureTarget.cpp in Sources */, + 4B9C9D751FF81CC00030A129 /* Z80.cpp in Sources */, 4BC76E691C98E31700E6EF73 /* FIRFilter.cpp in Sources */, 4B3BF5B01F146265005B6C36 /* CSW.cpp in Sources */, 4B4518A51F75FD1C00926311 /* SSD.cpp in Sources */, @@ -3445,7 +3459,7 @@ 4B5FADBA1DE3151600AEC565 /* FileHolder.cpp in Sources */, 4B643F3A1D77AD1900D431D6 /* CSStaticAnalyser.mm in Sources */, 4B1497881EE4A1DA00CE2596 /* ZX80O81P.cpp in Sources */, - 4B5A12571DD55862007A2231 /* Disassembler6502.cpp in Sources */, + 4B5A12571DD55862007A2231 /* 6502.cpp in Sources */, 4B2B3A4B1F9B8FA70062DABF /* Typer.cpp in Sources */, 4B4518821F75E91A00926311 /* PCMSegment.cpp in Sources */, 4BE7C9181E3D397100A5496D /* TIA.cpp in Sources */, diff --git a/StaticAnalyser/Atari/StaticAnalyser.cpp b/StaticAnalyser/Atari/StaticAnalyser.cpp index a0b8d8170..7490c7861 100644 --- a/StaticAnalyser/Atari/StaticAnalyser.cpp +++ b/StaticAnalyser/Atari/StaticAnalyser.cpp @@ -8,7 +8,7 @@ #include "StaticAnalyser.hpp" -#include "../Disassembler/Disassembler6502.hpp" +#include "../Disassembler/6502.hpp" using namespace StaticAnalyser::Atari; diff --git a/StaticAnalyser/Disassembler/Disassembler6502.cpp b/StaticAnalyser/Disassembler/6502.cpp similarity index 87% rename from StaticAnalyser/Disassembler/Disassembler6502.cpp rename to StaticAnalyser/Disassembler/6502.cpp index 4dd48b51f..6ddfb67a5 100644 --- a/StaticAnalyser/Disassembler/Disassembler6502.cpp +++ b/StaticAnalyser/Disassembler/6502.cpp @@ -6,24 +6,25 @@ // Copyright © 2016 Thomas Harte. All rights reserved. // -#include "Disassembler6502.hpp" -#include +#include "6502.hpp" + +#include "Kernel.hpp" using namespace StaticAnalyser::MOS6502; +namespace { -struct PartialDisassembly { - Disassembly disassembly; - std::vector remaining_entry_points; -}; +using PartialDisassembly = StaticAnalyser::Disassembly::PartialDisassembly; + +struct MOS6502Disassembler { static void AddToDisassembly(PartialDisassembly &disassembly, const std::vector &memory, const std::function &address_mapper, uint16_t entry_point) { disassembly.disassembly.internal_calls.insert(entry_point); uint16_t address = entry_point; - while(1) { + while(true) { std::size_t local_address = address_mapper(address); if(local_address >= memory.size()) return; - struct Instruction instruction; + Instruction instruction; instruction.address = address; address++; @@ -307,31 +308,13 @@ static void AddToDisassembly(PartialDisassembly &disassembly, const std::vector< } } -Disassembly StaticAnalyser::MOS6502::Disassemble(const std::vector &memory, const std::function &address_mapper, std::vector entry_points) { - PartialDisassembly partialDisassembly; - partialDisassembly.remaining_entry_points = entry_points; +}; - while(!partialDisassembly.remaining_entry_points.empty()) { - // pull the next entry point from the back of the vector - uint16_t next_entry_point = partialDisassembly.remaining_entry_points.back(); - partialDisassembly.remaining_entry_points.pop_back(); +} // end of anonymous namespace - // if that address has already bene visited, forget about it - if(partialDisassembly.disassembly.instructions_by_address.find(next_entry_point) != partialDisassembly.disassembly.instructions_by_address.end()) continue; - - // if it's outgoing, log it as such and forget about it; otherwise disassemble - std::size_t mapped_entry_point = address_mapper(next_entry_point); - if(mapped_entry_point >= memory.size()) - partialDisassembly.disassembly.outward_calls.insert(next_entry_point); - else - AddToDisassembly(partialDisassembly, memory, address_mapper, next_entry_point); - } - - return std::move(partialDisassembly.disassembly); -} - -std::function StaticAnalyser::MOS6502::OffsetMapper(uint16_t start_address) { - return [start_address](uint16_t argument) { - return static_cast(argument - start_address); - }; +Disassembly StaticAnalyser::MOS6502::Disassemble( + const std::vector &memory, + const std::function &address_mapper, + std::vector entry_points) { + return StaticAnalyser::Disassembly::Disassemble(memory, address_mapper, entry_points); } diff --git a/StaticAnalyser/Disassembler/6502.hpp b/StaticAnalyser/Disassembler/6502.hpp new file mode 100644 index 000000000..a5b0b1fe4 --- /dev/null +++ b/StaticAnalyser/Disassembler/6502.hpp @@ -0,0 +1,99 @@ +// +// 6502.hpp +// Clock Signal +// +// Created by Thomas Harte on 10/11/2016. +// Copyright © 2016 Thomas Harte. All rights reserved. +// + +#ifndef StaticAnalyser_Disassembler_6502_hpp +#define StaticAnalyser_Disassembler_6502_hpp + +#include +#include +#include +#include +#include +#include + +namespace StaticAnalyser { +namespace MOS6502 { + +/*! + Describes a 6502 instruciton — its address, the operation it performs, its addressing mode + and its operand, if any. +*/ +struct Instruction { + /*! The address this instruction starts at. This is a mapped address. */ + uint16_t address = 0; + /*! The operation this instruction performs. */ + enum { + BRK, JSR, RTI, RTS, JMP, + CLC, SEC, CLD, SED, CLI, SEI, CLV, + NOP, + + SLO, RLA, SRE, RRA, ALR, ARR, + SAX, LAX, DCP, ISC, + ANC, XAA, AXS, + AND, EOR, ORA, BIT, + ADC, SBC, + AHX, SHY, SHX, TAS, LAS, + + LDA, STA, LDX, STX, LDY, STY, + + BPL, BMI, BVC, BVS, BCC, BCS, BNE, BEQ, + + CMP, CPX, CPY, + INC, DEC, DEX, DEY, INX, INY, + ASL, ROL, LSR, ROR, + TAX, TXA, TAY, TYA, TSX, TXS, + PLA, PHA, PLP, PHP, + + KIL + } operation = NOP; + /*! The addressing mode used by the instruction. */ + enum { + Absolute, + AbsoluteX, + AbsoluteY, + Immediate, + Implied, + ZeroPage, + ZeroPageX, + ZeroPageY, + Indirect, + IndexedIndirectX, + IndirectIndexedY, + Relative, + } addressing_mode = Implied; + /*! The instruction's operand, if any. */ + uint16_t operand = 0; +}; + +/*! Represents the disassembled form of a program. */ +struct Disassembly { + /*! All instructions found, mapped by address. */ + std::map instructions_by_address; + /*! The set of all calls or jumps that land outside of the area covered by the data provided for disassembly. */ + std::set outward_calls; + /*! The set of all calls or jumps that land inside of the area covered by the data provided for disassembly. */ + std::set internal_calls; + /*! The sets of all stores, loads and modifies that occur to data outside of the area covered by the data provided for disassembly. */ + std::set external_stores, external_loads, external_modifies; + /*! The sets of all stores, loads and modifies that occur to data inside of the area covered by the data provided for disassembly. */ + std::set internal_stores, internal_loads, internal_modifies; +}; + +/*! + Disassembles the data provided as @c memory, mapping it into the 6502's full address range via the @c address_mapper, + starting disassembly from each of the @c entry_points. +*/ +Disassembly Disassemble( + const std::vector &memory, + const std::function &address_mapper, + std::vector entry_points); + +} +} + +#endif /* Disassembler6502_hpp */ diff --git a/StaticAnalyser/Disassembler/AddressMapper.cpp b/StaticAnalyser/Disassembler/AddressMapper.cpp new file mode 100644 index 000000000..6963e02d7 --- /dev/null +++ b/StaticAnalyser/Disassembler/AddressMapper.cpp @@ -0,0 +1,9 @@ +// +// AddressMapper.cpp +// Clock Signal +// +// Created by Thomas Harte on 30/12/2017. +// Copyright © 2017 Thomas Harte. All rights reserved. +// + +#include "AddressMapper.hpp" diff --git a/StaticAnalyser/Disassembler/AddressMapper.hpp b/StaticAnalyser/Disassembler/AddressMapper.hpp new file mode 100644 index 000000000..ffb3f8d14 --- /dev/null +++ b/StaticAnalyser/Disassembler/AddressMapper.hpp @@ -0,0 +1,30 @@ +// +// AddressMapper.hpp +// Clock Signal +// +// Created by Thomas Harte on 30/12/2017. +// Copyright © 2017 Thomas Harte. All rights reserved. +// + +#ifndef AddressMapper_hpp +#define AddressMapper_hpp + +#include + +namespace StaticAnalyser { +namespace Disassembler { + +/*! + Provides an address mapper that relocates a chunk of memory so that it starts at + address @c start_address. +*/ +template std::function OffsetMapper(T start_address) { + return [start_address](T argument) { + return static_cast(argument - start_address); + }; +} + +} +} + +#endif /* AddressMapper_hpp */ diff --git a/StaticAnalyser/Disassembler/Disassembler6502.hpp b/StaticAnalyser/Disassembler/Disassembler6502.hpp deleted file mode 100644 index c8805a33b..000000000 --- a/StaticAnalyser/Disassembler/Disassembler6502.hpp +++ /dev/null @@ -1,79 +0,0 @@ -// -// Disassembler6502.hpp -// Clock Signal -// -// Created by Thomas Harte on 10/11/2016. -// Copyright © 2016 Thomas Harte. All rights reserved. -// - -#ifndef Disassembler6502_hpp -#define Disassembler6502_hpp - -#include -#include -#include -#include -#include -#include - -namespace StaticAnalyser { -namespace MOS6502 { - -struct Instruction { - uint16_t address; - enum { - BRK, JSR, RTI, RTS, JMP, - CLC, SEC, CLD, SED, CLI, SEI, CLV, - NOP, - - SLO, RLA, SRE, RRA, ALR, ARR, - SAX, LAX, DCP, ISC, - ANC, XAA, AXS, - AND, EOR, ORA, BIT, - ADC, SBC, - AHX, SHY, SHX, TAS, LAS, - - LDA, STA, LDX, STX, LDY, STY, - - BPL, BMI, BVC, BVS, BCC, BCS, BNE, BEQ, - - CMP, CPX, CPY, - INC, DEC, DEX, DEY, INX, INY, - ASL, ROL, LSR, ROR, - TAX, TXA, TAY, TYA, TSX, TXS, - PLA, PHA, PLP, PHP, - - KIL - } operation; - enum { - Absolute, - AbsoluteX, - AbsoluteY, - Immediate, - Implied, - ZeroPage, - ZeroPageX, - ZeroPageY, - Indirect, - IndexedIndirectX, - IndirectIndexedY, - Relative, - } addressing_mode; - uint16_t operand; -}; - -struct Disassembly { - std::map instructions_by_address; - std::set outward_calls; - std::set internal_calls; - std::set external_stores, external_loads, external_modifies; - std::set internal_stores, internal_loads, internal_modifies; -}; - -Disassembly Disassemble(const std::vector &memory, const std::function &address_mapper, std::vector entry_points); -std::function OffsetMapper(uint16_t start_address); - -} -} - -#endif /* Disassembler6502_hpp */ diff --git a/StaticAnalyser/Disassembler/Kernel.hpp b/StaticAnalyser/Disassembler/Kernel.hpp new file mode 100644 index 000000000..6c6d45e7a --- /dev/null +++ b/StaticAnalyser/Disassembler/Kernel.hpp @@ -0,0 +1,50 @@ +// +// Kernel.hpp +// Clock Signal +// +// Created by Thomas Harte on 31/12/2017. +// Copyright © 2017 Thomas Harte. All rights reserved. +// + +#ifndef Kernel_hpp +#define Kernel_hpp + +namespace StaticAnalyser { +namespace Disassembly { + +template struct PartialDisassembly { + D disassembly; + std::vector remaining_entry_points; +}; + +template D Disassemble( + const std::vector &memory, + const std::function &address_mapper, + std::vector entry_points) { + PartialDisassembly partial_disassembly; + partial_disassembly.remaining_entry_points = entry_points; + + while(!partial_disassembly.remaining_entry_points.empty()) { + // pull the next entry point from the back of the vector + S next_entry_point = partial_disassembly.remaining_entry_points.back(); + partial_disassembly.remaining_entry_points.pop_back(); + + // if that address has already been visited, forget about it + if( partial_disassembly.disassembly.instructions_by_address.find(next_entry_point) + != partial_disassembly.disassembly.instructions_by_address.end()) continue; + + // if it's outgoing, log it as such and forget about it; otherwise disassemble + std::size_t mapped_entry_point = address_mapper(next_entry_point); + if(mapped_entry_point >= memory.size()) + partial_disassembly.disassembly.outward_calls.insert(next_entry_point); + else + Disassembler::AddToDisassembly(partial_disassembly, memory, address_mapper, next_entry_point); + } + + return partial_disassembly.disassembly; +} + +} +} + +#endif /* Kernel_hpp */ diff --git a/StaticAnalyser/Disassembler/Z80.cpp b/StaticAnalyser/Disassembler/Z80.cpp new file mode 100644 index 000000000..918a915d3 --- /dev/null +++ b/StaticAnalyser/Disassembler/Z80.cpp @@ -0,0 +1,619 @@ +// +// Z80.cpp +// Clock Signal +// +// Created by Thomas Harte on 30/12/2017. +// Copyright © 2017 Thomas Harte. All rights reserved. +// + +#include "Z80.hpp" + +#include "Kernel.hpp" + +using namespace StaticAnalyser::Z80; +namespace { + +using PartialDisassembly = StaticAnalyser::Disassembly::PartialDisassembly; + +class Accessor { + public: + Accessor(const std::vector &memory, const std::function &address_mapper, uint16_t address) : + memory_(memory), address_mapper_(address_mapper), address_(address) {} + + uint8_t byte() { + std::size_t mapped_address = address_mapper_(address_); + address_++; + if(mapped_address >= memory_.size()) { + overrun_ = true; + return 0xff; + } + return memory_[mapped_address]; + } + + uint16_t word() { + uint8_t low = byte(); + uint8_t high = byte(); + return static_cast(low | (high << 8)); + } + + bool overrun() { + return overrun_; + } + + bool at_end() { + std::size_t mapped_address = address_mapper_(address_); + return mapped_address >= memory_.size(); + } + + uint16_t address() { + return address_; + } + + private: + const std::vector &memory_; + const std::function &address_mapper_; + uint16_t address_; + bool overrun_ = false; +}; + +#define x(v) (v >> 6) +#define y(v) ((v >> 3) & 7) +#define q(v) ((v >> 3) & 1) +#define p(v) ((v >> 4) & 3) +#define z(v) (v & 7) + +Instruction::Condition condition_table[] = { + Instruction::Condition::NZ, Instruction::Condition::Z, + Instruction::Condition::NC, Instruction::Condition::C, + Instruction::Condition::PO, Instruction::Condition::PE, + Instruction::Condition::P, Instruction::Condition::M +}; + +Instruction::Location register_pair_table[] = { + Instruction::Location::BC, + Instruction::Location::DE, + Instruction::Location::HL, + Instruction::Location::SP +}; + +Instruction::Location register_pair_table2[] = { + Instruction::Location::BC, + Instruction::Location::DE, + Instruction::Location::HL, + Instruction::Location::AF +}; + +Instruction::Location RegisterTableEntry(int offset, Accessor &accessor, Instruction &instruction, bool needs_indirect_offset) { + Instruction::Location register_table[] = { + Instruction::Location::B, Instruction::Location::C, + Instruction::Location::D, Instruction::Location::E, + Instruction::Location::H, Instruction::Location::L, + Instruction::Location::HL_Indirect, + Instruction::Location::A + }; + + Instruction::Location location = register_table[offset]; + if(location == Instruction::Location::HL_Indirect && needs_indirect_offset) { + instruction.offset = accessor.byte() - 128; + } + + return location; +} + +Instruction::Operation alu_table[] = { + Instruction::Operation::ADD, + Instruction::Operation::ADC, + Instruction::Operation::SUB, + Instruction::Operation::SBC, + Instruction::Operation::AND, + Instruction::Operation::XOR, + Instruction::Operation::OR, + Instruction::Operation::CP +}; + +Instruction::Operation rotation_table[] = { + Instruction::Operation::RLC, + Instruction::Operation::RRC, + Instruction::Operation::RL, + Instruction::Operation::RR, + Instruction::Operation::SLA, + Instruction::Operation::SRA, + Instruction::Operation::SLL, + Instruction::Operation::SRL +}; + +Instruction::Operation block_table[][4] = { + {Instruction::Operation::LDI, Instruction::Operation::CPI, Instruction::Operation::INI, Instruction::Operation::OUTI}, + {Instruction::Operation::LDD, Instruction::Operation::CPD, Instruction::Operation::IND, Instruction::Operation::OUTD}, + {Instruction::Operation::LDIR, Instruction::Operation::CPIR, Instruction::Operation::INIR, Instruction::Operation::OTIR}, + {Instruction::Operation::LDDR, Instruction::Operation::CPDR, Instruction::Operation::INDR, Instruction::Operation::OTDR}, +}; + +void DisassembleCBPage(Accessor &accessor, Instruction &instruction, bool needs_indirect_offset) { + const uint8_t operation = accessor.byte(); + + if(!x(operation)) { + instruction.operation = rotation_table[y(operation)]; + instruction.source = instruction.destination = RegisterTableEntry(z(operation), accessor, instruction, needs_indirect_offset); + } else { + instruction.destination = RegisterTableEntry(z(operation), accessor, instruction, needs_indirect_offset); + instruction.source = Instruction::Location::Operand; + instruction.operand = y(operation); + + switch(x(operation)) { + case 1: instruction.operation = Instruction::Operation::BIT; break; + case 2: instruction.operation = Instruction::Operation::RES; break; + case 3: instruction.operation = Instruction::Operation::SET; break; + } + } +} + +void DisassembleEDPage(Accessor &accessor, Instruction &instruction, bool needs_indirect_offset) { + const uint8_t operation = accessor.byte(); + + switch(x(operation)) { + default: + instruction.operation = Instruction::Operation::Invalid; + break; + case 2: + if(z(operation) < 4 && y(operation) >= 4) { + instruction.operation = block_table[y(operation)-4][z(operation)]; + } else { + instruction.operation = Instruction::Operation::Invalid; + } + break; + case 3: + switch(z(operation)) { + case 0: + instruction.operation = Instruction::Operation::IN; + instruction.source = Instruction::Location::BC_Indirect; + if(y(operation) == 6) { + instruction.destination = Instruction::Location::None; + } else { + instruction.destination = RegisterTableEntry(y(operation), accessor, instruction, needs_indirect_offset); + } + break; + case 1: + instruction.operation = Instruction::Operation::OUT; + instruction.destination = Instruction::Location::BC_Indirect; + if(y(operation) == 6) { + instruction.source = Instruction::Location::None; + } else { + instruction.source = RegisterTableEntry(y(operation), accessor, instruction, needs_indirect_offset); + } + break; + case 2: + instruction.operation = (y(operation)&1) ? Instruction::Operation::ADC : Instruction::Operation::SBC; + instruction.destination = Instruction::Location::HL; + instruction.source = register_pair_table[y(operation) >> 1]; + break; + case 3: + instruction.operation = Instruction::Operation::LD; + if(q(operation)) { + instruction.destination = RegisterTableEntry(p(operation), accessor, instruction, needs_indirect_offset); + instruction.source = Instruction::Location::Operand_Indirect; + } else { + instruction.destination = Instruction::Location::Operand_Indirect; + instruction.source = RegisterTableEntry(p(operation), accessor, instruction, needs_indirect_offset); + } + instruction.operand = accessor.word(); + break; + case 4: + instruction.operation = Instruction::Operation::NEG; + break; + case 5: + instruction.operation = (y(operation) == 1) ? Instruction::Operation::RETI : Instruction::Operation::RETN; + break; + case 6: + instruction.operation = Instruction::Operation::IM; + instruction.source = Instruction::Location::Operand; + switch(y(operation)&3) { + case 0: instruction.operand = 0; break; + case 1: instruction.operand = 0; break; + case 2: instruction.operand = 1; break; + case 3: instruction.operand = 2; break; + } + break; + case 7: + switch(y(operation)) { + case 0: + instruction.operation = Instruction::Operation::LD; + instruction.destination = Instruction::Location::I; + instruction.source = Instruction::Location::A; + break; + case 1: + instruction.operation = Instruction::Operation::LD; + instruction.destination = Instruction::Location::R; + instruction.source = Instruction::Location::A; + break; + case 2: + instruction.operation = Instruction::Operation::LD; + instruction.destination = Instruction::Location::A; + instruction.source = Instruction::Location::I; + break; + case 3: + instruction.operation = Instruction::Operation::LD; + instruction.destination = Instruction::Location::A; + instruction.source = Instruction::Location::R; + break; + case 4: instruction.operation = Instruction::Operation::RRD; break; + case 5: instruction.operation = Instruction::Operation::RLD; break; + default: instruction.operation = Instruction::Operation::NOP; break; + } + break; + } + break; + } +} + +void DisassembleMainPage(Accessor &accessor, Instruction &instruction) { + bool needs_indirect_offset = false; + enum HLSubstitution { + None, IX, IY + } hl_substitution = None; + + while(true) { + uint8_t operation = accessor.byte(); + + switch(x(operation)) { + case 0: + switch(z(operation)) { + case 0: + switch(y(operation)) { + case 0: instruction.operation = Instruction::Operation::NOP; break; + case 1: instruction.operation = Instruction::Operation::EXAFAFd; break; + case 2: + instruction.operation = Instruction::Operation::DJNZ; + instruction.operand = accessor.byte() - 128; + break; + default: + instruction.operation = Instruction::Operation::JR; + instruction.operand = accessor.byte() - 128; + if(y(operation) >= 4) instruction.condition = condition_table[y(operation) - 4]; + break; + } + break; + case 1: + if(y(operation)&1) { + instruction.operation = Instruction::Operation::ADD; + instruction.destination = Instruction::Location::HL; + instruction.source = register_pair_table[y(operation) >> 1]; + } else { + instruction.operation = Instruction::Operation::LD; + instruction.destination = register_pair_table[y(operation) >> 1]; + instruction.source = Instruction::Location::Operand; + instruction.operand = accessor.word(); + } + break; + case 2: + switch(y(operation)) { + case 0: + instruction.operation = Instruction::Operation::LD; + instruction.destination = Instruction::Location::BC_Indirect; + instruction.source = Instruction::Location::A; + break; + case 1: + instruction.operation = Instruction::Operation::LD; + instruction.destination = Instruction::Location::A; + instruction.source = Instruction::Location::BC_Indirect; + break; + case 2: + instruction.operation = Instruction::Operation::LD; + instruction.destination = Instruction::Location::DE_Indirect; + instruction.source = Instruction::Location::A; + break; + case 3: + instruction.operation = Instruction::Operation::LD; + instruction.destination = Instruction::Location::A; + instruction.source = Instruction::Location::DE_Indirect; + break; + case 4: + instruction.operation = Instruction::Operation::LD; + instruction.destination = Instruction::Location::Operand_Indirect; + instruction.source = Instruction::Location::HL; + break; + case 5: + instruction.operation = Instruction::Operation::LD; + instruction.destination = Instruction::Location::HL; + instruction.source = Instruction::Location::Operand_Indirect; + break; + case 6: + instruction.operation = Instruction::Operation::LD; + instruction.destination = Instruction::Location::Operand_Indirect; + instruction.source = Instruction::Location::A; + break; + case 7: + instruction.operation = Instruction::Operation::LD; + instruction.destination = Instruction::Location::A; + instruction.source = Instruction::Location::Operand_Indirect; + break; + } + + if(y(operation) > 3) { + instruction.operand = accessor.word(); + } + break; + case 3: + if(y(operation)&1) { + instruction.operation = Instruction::Operation::DEC; + } else { + instruction.operation = Instruction::Operation::INC; + } + instruction.source = instruction.destination = register_pair_table[y(operation) >> 1]; + break; + case 4: + instruction.operation = Instruction::Operation::INC; + instruction.source = instruction.destination = RegisterTableEntry(y(operation), accessor, instruction, needs_indirect_offset); + break; + case 5: + instruction.operation = Instruction::Operation::DEC; + instruction.source = instruction.destination = RegisterTableEntry(y(operation), accessor, instruction, needs_indirect_offset); + break; + case 6: + instruction.operation = Instruction::Operation::LD; + instruction.destination = RegisterTableEntry(y(operation), accessor, instruction, needs_indirect_offset); + instruction.source = Instruction::Location::Operand; + instruction.operand = accessor.byte(); + break; + case 7: + switch(y(operation)) { + case 0: instruction.operation = Instruction::Operation::RLCA; break; + case 1: instruction.operation = Instruction::Operation::RRCA; break; + case 2: instruction.operation = Instruction::Operation::RLA; break; + case 3: instruction.operation = Instruction::Operation::RRA; break; + case 4: instruction.operation = Instruction::Operation::DAA; break; + case 5: instruction.operation = Instruction::Operation::CPL; break; + case 6: instruction.operation = Instruction::Operation::SCF; break; + case 7: instruction.operation = Instruction::Operation::CCF; break; + } + break; + } + break; + case 1: + if(y(operation) == 6 && z(operation) == 6) { + instruction.operation = Instruction::Operation::HALT; + } else { + instruction.operation = Instruction::Operation::LD; + instruction.source = RegisterTableEntry(z(operation), accessor, instruction, needs_indirect_offset); + instruction.destination = RegisterTableEntry(y(operation), accessor, instruction, needs_indirect_offset); + } + break; + case 2: + instruction.operation = alu_table[y(operation)]; + instruction.source = RegisterTableEntry(z(operation), accessor, instruction, needs_indirect_offset); + instruction.destination = Instruction::Location::A; + break; + case 3: + switch(z(operation)) { + case 0: + instruction.operation = Instruction::Operation::RET; + instruction.condition = condition_table[y(operation)]; + break; + case 1: + switch(y(operation)) { + default: + instruction.operation = Instruction::Operation::POP; + instruction.source = register_pair_table2[y(operation) >> 1]; + break; + case 1: + instruction.operation = Instruction::Operation::RET; + break; + case 3: + instruction.operation = Instruction::Operation::EXX; + break; + case 5: + instruction.operation = Instruction::Operation::JP; + instruction.source = Instruction::Location::HL; + break; + case 7: + instruction.operation = Instruction::Operation::LD; + instruction.destination = Instruction::Location::SP; + instruction.source = Instruction::Location::HL; + break; + } + break; + case 2: + instruction.operation = Instruction::Operation::JP; + instruction.condition = condition_table[y(operation)]; + instruction.operand = accessor.word(); + break; + case 3: + switch(y(operation)) { + case 0: + instruction.operation = Instruction::Operation::JP; + instruction.source = Instruction::Location::Operand; + instruction.operand = accessor.word(); + break; + case 1: + DisassembleCBPage(accessor, instruction, needs_indirect_offset); + break; + case 2: + instruction.operation = Instruction::Operation::OUT; + instruction.source = Instruction::Location::A; + instruction.destination = Instruction::Location::Operand_Indirect; + instruction.operand = accessor.byte(); + break; + case 3: + instruction.operation = Instruction::Operation::IN; + instruction.destination = Instruction::Location::A; + instruction.source = Instruction::Location::Operand_Indirect; + instruction.operand = accessor.byte(); + break; + case 4: + instruction.operation = Instruction::Operation::EX; + instruction.destination = Instruction::Location::SP_Indirect; + instruction.source = Instruction::Location::HL; + break; + case 5: + instruction.operation = Instruction::Operation::EX; + instruction.destination = Instruction::Location::DE; + instruction.source = Instruction::Location::HL; + break; + case 6: + instruction.operation = Instruction::Operation::DI; + break; + case 7: + instruction.operation = Instruction::Operation::EI; + break; + } + break; + case 4: + instruction.operation = Instruction::Operation::CALL; + instruction.source = Instruction::Location::Operand_Indirect; + instruction.operand = accessor.word(); + instruction.condition = condition_table[y(operation)]; + break; + case 5: + switch(y(operation)) { + default: + instruction.operation = Instruction::Operation::PUSH; + instruction.source = register_pair_table2[y(operation) >> 1]; + break; + case 1: + instruction.operation = Instruction::Operation::CALL; + instruction.source = Instruction::Location::Operand; + instruction.operand = accessor.word(); + break; + case 3: + needs_indirect_offset = true; + hl_substitution = IX; + continue; // i.e. repeat loop. + case 5: + DisassembleEDPage(accessor, instruction, needs_indirect_offset); + break; + case 7: + needs_indirect_offset = true; + hl_substitution = IY; + continue; // i.e. repeat loop. + } + break; + case 6: + instruction.operation = alu_table[y(operation)]; + instruction.source = Instruction::Location::Operand; + instruction.destination = Instruction::Location::A; + instruction.operand = accessor.byte(); + break; + case 7: + instruction.operation = Instruction::Operation::RST; + instruction.source = Instruction::Location::Operand; + instruction.operand = y(operation) << 3; + break; + } + break; + } + + // This while(true) isn't an infinite loop for everything except those paths that opt in + // via continue. + break; + } + + // Perform IX/IY substitution for HL, if applicable. + if(hl_substitution != None) { + // EX DE, HL is not affected. + if(instruction.operation == Instruction::Operation::EX) return; + + // If an (HL) is involved, switch it for IX+d or IY+d. + if( instruction.source == Instruction::Location::HL_Indirect || + instruction.destination == Instruction::Location::HL_Indirect) { + + if(instruction.source == Instruction::Location::HL_Indirect) { + instruction.source = (hl_substitution == IX) ? Instruction::Location::IX_Indirect_Offset : Instruction::Location::IY_Indirect_Offset; + } + if(instruction.destination == Instruction::Location::HL_Indirect) { + instruction.destination = (hl_substitution == IX) ? Instruction::Location::IX_Indirect_Offset : Instruction::Location::IY_Indirect_Offset; + } + return; + } + + // Otherwise, switch either of H or L for I[X/Y]h and I[X/Y]l. + if(instruction.source == Instruction::Location::H) { + instruction.source = (hl_substitution == IX) ? Instruction::Location::IXh : Instruction::Location::IYh; + } + if(instruction.source == Instruction::Location::L) { + instruction.source = (hl_substitution == IX) ? Instruction::Location::IXl : Instruction::Location::IYl; + } + if(instruction.destination == Instruction::Location::H) { + instruction.destination = (hl_substitution == IX) ? Instruction::Location::IXh : Instruction::Location::IYh; + } + if(instruction.destination == Instruction::Location::L) { + instruction.destination = (hl_substitution == IX) ? Instruction::Location::IXl : Instruction::Location::IYl; + } + } +} + +struct Z80Disassembler { + static void AddToDisassembly(PartialDisassembly &disassembly, const std::vector &memory, const std::function &address_mapper, uint16_t entry_point) { + disassembly.disassembly.internal_calls.insert(entry_point); + Accessor accessor(memory, address_mapper, entry_point); + + while(!accessor.at_end()) { + Instruction instruction; + instruction.address = accessor.address(); + + DisassembleMainPage(accessor, instruction); + + // If any memory access was invalid, end disassembly. + if(accessor.overrun()) return; + + // Store the instruction away. + disassembly.disassembly.instructions_by_address[instruction.address] = instruction; + + // Update access tables. + int access_type = + ((instruction.source == Instruction::Location::Operand_Indirect) ? 1 : 0) | + ((instruction.destination == Instruction::Location::Operand_Indirect) ? 2 : 0); + uint16_t address = static_cast(instruction.operand); + bool is_internal = address_mapper(address) < memory.size(); + switch(access_type) { + default: break; + case 1: + if(is_internal) { + disassembly.disassembly.internal_loads.insert(address); + } else { + disassembly.disassembly.external_loads.insert(address); + } + break; + case 2: + if(is_internal) { + disassembly.disassembly.internal_stores.insert(address); + } else { + disassembly.disassembly.external_stores.insert(address); + } + break; + case 3: + if(is_internal) { + disassembly.disassembly.internal_modifies.insert(address); + } else { + disassembly.disassembly.internal_modifies.insert(address); + } + break; + } + + // Add any (potentially) newly discovered entry point. + if( instruction.operation == Instruction::Operation::JP || + instruction.operation == Instruction::Operation::JR || + instruction.operation == Instruction::Operation::CALL || + instruction.operation == Instruction::Operation::RST) { + disassembly.remaining_entry_points.push_back(static_cast(instruction.operand)); + } + + // This is it if: an unconditional RET, RETI, RETN, JP or JR is found. + if(instruction.condition != Instruction::Condition::None) continue; + + if(instruction.operation == Instruction::Operation::RET) return; + if(instruction.operation == Instruction::Operation::RETI) return; + if(instruction.operation == Instruction::Operation::RETN) return; + if(instruction.operation == Instruction::Operation::JP) return; + if(instruction.operation == Instruction::Operation::JR) return; + } + } +}; + +} // end of anonymous namespace + +Disassembly StaticAnalyser::Z80::Disassemble( + const std::vector &memory, + const std::function &address_mapper, + std::vector entry_points) { + return StaticAnalyser::Disassembly::Disassemble(memory, address_mapper, entry_points); +} diff --git a/StaticAnalyser/Disassembler/Z80.hpp b/StaticAnalyser/Disassembler/Z80.hpp new file mode 100644 index 000000000..a26396b9b --- /dev/null +++ b/StaticAnalyser/Disassembler/Z80.hpp @@ -0,0 +1,87 @@ +// +// Z80.hpp +// Clock Signal +// +// Created by Thomas Harte on 30/12/2017. +// Copyright © 2017 Thomas Harte. All rights reserved. +// + +#ifndef StaticAnalyser_Disassembler_Z80_hpp +#define StaticAnalyser_Disassembler_Z80_hpp + +#include +#include +#include +#include + +namespace StaticAnalyser { +namespace Z80 { + +struct Instruction { + /*! The address this instruction starts at. This is a mapped address. */ + uint16_t address = 0; + + /*! The operation this instruction performs. */ + enum class Operation { + NOP, + EXAFAFd, EXX, EX, + LD, HALT, + ADD, ADC, SUB, SBC, AND, XOR, OR, CP, + INC, DEC, + RLCA, RRCA, RLA, RRA, DAA, CPL, SCF, CCF, + RLD, RRD, + DJNZ, JR, JP, CALL, RST, RET, RETI, RETN, + PUSH, POP, + IN, OUT, + EI, DI, + RLC, RRC, RL, RR, SLA, SRA, SLL, SRL, + BIT, RES, SET, + LDI, CPI, INI, OUTI, + LDD, CPD, IND, OUTD, + LDIR, CPIR, INIR, OTIR, + LDDR, CPDR, INDR, OTDR, + NEG, + IM, + Invalid + } operation = Operation::NOP; + + /*! The condition required for this instruction to take effect. */ + enum class Condition { + None, NZ, Z, NC, C, PO, PE, P, M + } condition = Condition::None; + + enum class Location { + B, C, D, E, H, L, HL_Indirect, A, I, R, + BC, DE, HL, SP, AF, Operand, + IX_Indirect_Offset, IY_Indirect_Offset, IXh, IXl, IYh, IYl, + Operand_Indirect, + BC_Indirect, DE_Indirect, SP_Indirect, + None + }; + /*! The locations of source data for this instruction. */ + Location source = Location::None; + /*! The locations of destination data from this instruction. */ + Location destination = Location::None; + /*! The operand, if any; if this is used then it'll be referenced by either the source or destination location. */ + int operand = 0; + /*! The offset to apply, if any; applies to IX_Indirect_Offset and IY_Indirect_Offset locations. */ + int offset = 0; +}; + +struct Disassembly { + std::map instructions_by_address; + std::set outward_calls; + std::set internal_calls; + std::set external_stores, external_loads, external_modifies; + std::set internal_stores, internal_loads, internal_modifies; +}; + +Disassembly Disassemble( + const std::vector &memory, + const std::function &address_mapper, + std::vector entry_points); + +} +} + +#endif /* StaticAnalyser_Disassembler_Z80_hpp */ diff --git a/StaticAnalyser/Oric/StaticAnalyser.cpp b/StaticAnalyser/Oric/StaticAnalyser.cpp index cfc8b22ac..09ec12734 100644 --- a/StaticAnalyser/Oric/StaticAnalyser.cpp +++ b/StaticAnalyser/Oric/StaticAnalyser.cpp @@ -9,7 +9,8 @@ #include "StaticAnalyser.hpp" #include "Tape.hpp" -#include "../Disassembler/Disassembler6502.hpp" +#include "../Disassembler/6502.hpp" +#include "../Disassembler/AddressMapper.hpp" using namespace StaticAnalyser::Oric; @@ -88,7 +89,7 @@ void StaticAnalyser::Oric::AddTargets(const Media &media, std::list &des if(file.data_type == File::MachineCode) { std::vector entry_points = {file.starting_address}; StaticAnalyser::MOS6502::Disassembly disassembly = - StaticAnalyser::MOS6502::Disassemble(file.data, StaticAnalyser::MOS6502::OffsetMapper(file.starting_address), entry_points); + StaticAnalyser::MOS6502::Disassemble(file.data, StaticAnalyser::Disassembler::OffsetMapper(file.starting_address), entry_points); int basic10_score = Basic10Score(disassembly); int basic11_score = Basic11Score(disassembly); From c70dbc6a498b96082b03cc8fd5ac7a4d41a174a3 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Sun, 31 Dec 2017 21:23:30 -0500 Subject: [PATCH 2/4] Introduces the most basic attempt to guess MSX cartridge type. --- StaticAnalyser/MSX/StaticAnalyser.cpp | 46 +++++++++++++++++++++++++-- StaticAnalyser/StaticAnalyser.hpp | 13 ++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/StaticAnalyser/MSX/StaticAnalyser.cpp b/StaticAnalyser/MSX/StaticAnalyser.cpp index 3d4d340a0..12705c65e 100644 --- a/StaticAnalyser/MSX/StaticAnalyser.cpp +++ b/StaticAnalyser/MSX/StaticAnalyser.cpp @@ -9,6 +9,8 @@ #include "StaticAnalyser.hpp" #include "Tape.hpp" +#include "../Disassembler/Z80.hpp" +#include "../Disassembler/AddressMapper.hpp" /* Expected standard cartridge format: @@ -21,7 +23,7 @@ DEFS 6,0 ; room reserved for future extensions */ static std::list> - MSXCartridgesFrom(const std::list> &cartridges) { + MSXCartridgesFrom(const std::list> &cartridges, StaticAnalyser::Target &target) { std::list> msx_cartridges; for(const auto &cartridge : cartridges) { @@ -41,6 +43,46 @@ static std::list> // Check for the expansion ROM header and the reserved bytes. if(segment.data[0] != 0x41 || segment.data[1] != 0x42) continue; + uint16_t init_address = static_cast(segment.data[2] | (segment.data[3] << 8)); + // TODO: check for a rational init address? + + // If this ROM is greater than 32kb in size then some sort of MegaROM scheme must + // be at play; disassemble to try to figure it out. + if(data_size > 0x4000) { + std::vector first_segment; + first_segment.insert(first_segment.begin(), segment.data.begin(), segment.data.begin() + 32768); + StaticAnalyser::Z80::Disassembly disassembly = + StaticAnalyser::Z80::Disassemble( + first_segment, + StaticAnalyser::Disassembler::OffsetMapper(0x4000), + { init_address } + ); + + // Look for LD (nnnn), A instructions, and collate those addresses. + using Instruction = StaticAnalyser::Z80::Instruction; + std::map address_counts; + for(const auto &instruction_pair : disassembly.instructions_by_address) { + if( instruction_pair.second.operation == Instruction::Operation::LD && + instruction_pair.second.destination == Instruction::Location::Operand_Indirect && + instruction_pair.second.source == Instruction::Location::A) { + address_counts[static_cast(instruction_pair.second.operand)]++; + } + } + + // Sort possible cartridge types. + using Possibility = std::pair; + std::vector possibilities; + possibilities.push_back(std::make_pair(StaticAnalyser::MSXCartridgeType::Konami, address_counts[0x6000] + address_counts[0x8000] + address_counts[0xa000])); + possibilities.push_back(std::make_pair(StaticAnalyser::MSXCartridgeType::KonamiWithSCC, address_counts[0x5000] + address_counts[0x7000] + address_counts[0x9000] + address_counts[0xb000])); + possibilities.push_back(std::make_pair(StaticAnalyser::MSXCartridgeType::ASCII8kb, address_counts[0x6000] + address_counts[0x6800] + address_counts[0x7000] + address_counts[0x7800])); + possibilities.push_back(std::make_pair(StaticAnalyser::MSXCartridgeType::ASCII16kb, address_counts[0x6000] + address_counts[0x7000] + address_counts[0x77ff])); + std::sort(possibilities.begin(), possibilities.end(), [](const Possibility &a, const Possibility &b) { + return a.second > b.second; + }); + + target.msx.paging_model = possibilities[0].first; + } + // Apply the standard MSX start address. msx_cartridges.emplace_back(new Storage::Cartridge::Cartridge({ Storage::Cartridge::Cartridge::Segment(0x4000, segment.data) @@ -54,7 +96,7 @@ void StaticAnalyser::MSX::AddTargets(const Media &media, std::list &dest Target target; // Obtain only those cartridges which it looks like an MSX would understand. - target.media.cartridges = MSXCartridgesFrom(media.cartridges); + target.media.cartridges = MSXCartridgesFrom(media.cartridges, target); // Check tapes for loadable files. for(const auto &tape : media.tapes) { diff --git a/StaticAnalyser/StaticAnalyser.hpp b/StaticAnalyser/StaticAnalyser.hpp index 227d982bf..693c4a9cf 100644 --- a/StaticAnalyser/StaticAnalyser.hpp +++ b/StaticAnalyser/StaticAnalyser.hpp @@ -40,6 +40,15 @@ enum class Atari2600PagingModel { Pitfall2 }; +enum class MSXCartridgeType { + None, + Konami, + KonamiWithSCC, + ASCII8kb, + ASCII16kb, + RType +}; + enum class ZX8081MemoryModel { Unexpanded, SixteenKB, @@ -113,6 +122,10 @@ struct Target { struct { AmstradCPCModel model; } amstradcpc; + + struct { + MSXCartridgeType paging_model; + } msx; }; std::string loading_command; From 4f6abc90598b010e952ef2422a77e88eeb75e7d2 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Sun, 31 Dec 2017 21:34:35 -0500 Subject: [PATCH 3/4] Introduces missing header. --- StaticAnalyser/Disassembler/Z80.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/StaticAnalyser/Disassembler/Z80.hpp b/StaticAnalyser/Disassembler/Z80.hpp index a26396b9b..8d822f7da 100644 --- a/StaticAnalyser/Disassembler/Z80.hpp +++ b/StaticAnalyser/Disassembler/Z80.hpp @@ -10,6 +10,7 @@ #define StaticAnalyser_Disassembler_Z80_hpp #include +#include #include #include #include From f75590253d66d9bde94636949ef6125181525bd6 Mon Sep 17 00:00:00 2001 From: Thomas Harte Date: Sun, 31 Dec 2017 21:36:24 -0500 Subject: [PATCH 4/4] Introduces necessary header for std::sort. --- StaticAnalyser/MSX/StaticAnalyser.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/StaticAnalyser/MSX/StaticAnalyser.cpp b/StaticAnalyser/MSX/StaticAnalyser.cpp index 12705c65e..3006b7b6c 100644 --- a/StaticAnalyser/MSX/StaticAnalyser.cpp +++ b/StaticAnalyser/MSX/StaticAnalyser.cpp @@ -12,6 +12,8 @@ #include "../Disassembler/Z80.hpp" #include "../Disassembler/AddressMapper.hpp" +#include + /* Expected standard cartridge format: