diff --git a/6502.py b/6502.py new file mode 100755 index 0000000..9c41bd2 --- /dev/null +++ b/6502.py @@ -0,0 +1,212 @@ +########################################################################## +# +# Processor specific code + +# CPU = "6502" +# Description = "MOS Technology (and others) 6502 8-bit microprocessor." +# DataWidth = 8 # 8-bit data +# AddressWidth = 16 # 16-bit addresses + +# Maximum length of an instruction (for formatting purposes) +maxLength = 3 + +# Leadin bytes for multibyte instructions +leadInBytes = [] + +# Addressing mode table +# List of addressing modes and corresponding format strings for operands. +addressModeTable = { +"implicit" : "", +"absolute" : "${1:02X}{0:02X}", +"absolutex" : "${1:02X}{0:02X},x", +"absolutey" : "${1:02X}{0:02X},y", +"accumulator" : "a", +"immediate" : "#${0:02X}", +"indirectx" : "(${0:02X},x)", +"indirecty" : "(${0:02X}),y", +"indirect" : "(${1:02X}{0:02X})", +"relative" : "${0:04X}", +"zeropage" : "${0:02X}", +"zeropagex" : "${0:02X},x", +"zeropagey" : "${0:02X},y", +} + +# Op Code Table +# Key is numeric opcode (possibly multiple bytes) +# Value is a list: +# # bytes +# mnemonic +# addressing mode +# flags (e.g. pcr) +opcodeTable = { +0x00 : [ 1, "brk", "implicit" ], +0x01 : [ 2, "ora", "indirectx" ], +0x05 : [ 2, "ora", "zeropage" ], +0x06 : [ 2, "asl", "zeropage" ], +0x08 : [ 1, "php", "implicit" ], +0x09 : [ 2, "ora", "immediate" ], +0x0a : [ 1, "asl", "accumulator" ], +0x0d : [ 3, "ora", "absolute" ], +0x0e : [ 3, "asl", "absolute" ], + +0x10 : [ 2, "bpl", "relative", pcr ], +0x11 : [ 2, "ora", "indirecty" ], +0x15 : [ 2, "ora", "zeropagex" ], +0x16 : [ 2, "asl", "zeropagex" ], +0x18 : [ 1, "clc", "implicit" ], +0x19 : [ 3, "ora", "absolutey" ], +0x1d : [ 3, "ora", "absolutex" ], +0x1e : [ 3, "asl", "absolutex" ], + +0x20 : [ 3, "jsr", "absolute" ], +0x21 : [ 2, "and", "indirectx" ], +0x24 : [ 2, "bit", "zeropage" ], +0x25 : [ 2, "and", "zeropage" ], +0x26 : [ 2, "rol", "zeropage" ], +0x28 : [ 1, "plp", "implicit" ], +0x29 : [ 2, "and", "immediate" ], +0x2a : [ 1, "rol", "accumulator" ], +0x2c : [ 3, "bit", "absolute" ], +0x2d : [ 3, "and", "absolute" ], +0x2e : [ 3, "rol", "absolute" ], + +0x30 : [ 2, "bmi", "relative", pcr ], +0x31 : [ 2, "and", "indirecty" ], +0x35 : [ 2, "and", "zeropagex" ], +0x36 : [ 2, "rol", "zeropagex" ], +0x38 : [ 1, "sec", "implicit" ], +0x39 : [ 3, "and", "absolutey" ], +0x3d : [ 3, "and", "absolutex" ], +0x3e : [ 3, "rol", "absolutex" ], + +0x40 : [ 1, "rti", "implicit" ], +0x41 : [ 2, "eor", "indirectx" ], +0x45 : [ 2, "eor", "zeropage" ], +0x46 : [ 2, "lsr", "zeropage" ], +0x48 : [ 1, "pha", "implicit" ], +0x49 : [ 2, "eor", "immediate" ], +0x4a : [ 1, "lsr", "accumulator" ], +0x4c : [ 3, "jmp", "absolute" ], +0x4d : [ 3, "eor", "absolute" ], +0x4e : [ 3, "lsr", "absolute" ], + +0x50 : [ 2, "bvc", "relative", pcr ], +0x51 : [ 2, "eor", "indirecty" ], +0x55 : [ 2, "eor", "zeropagex" ], +0x56 : [ 2, "lsr", "zeropagex" ], +0x58 : [ 1, "cli", "implicit" ], +0x59 : [ 3, "eor", "absolutey" ], +0x5d : [ 3, "eor", "absolutex" ], +0x5e : [ 3, "lsr", "absolutex" ], + +0x60 : [ 1, "rts", "implicit" ], +0x61 : [ 2, "adc", "indirectx" ], +0x65 : [ 2, "adc", "zeropage" ], +0x66 : [ 2, "ror", "zeropage" ], +0x68 : [ 1, "pla", "implicit" ], +0x69 : [ 2, "adc", "immediate" ], +0x6a : [ 1, "ror", "accumulator" ], +0x6c : [ 3, "jmp", "indirect" ], +0x6d : [ 3, "adc", "absolute" ], +0x6e : [ 3, "ror", "absolute" ], + +0x70 : [ 2, "bvs", "relative", pcr ], +0x71 : [ 2, "adc", "indirecty" ], +0x75 : [ 2, "adc", "zeropagex" ], +0x76 : [ 2, "ror", "zeropagex" ], +0x78 : [ 1, "sei", "implicit" ], +0x79 : [ 3, "adc", "absolutey" ], +0x7d : [ 3, "adc", "absolutex" ], +0x7e : [ 3, "ror", "absolutex" ], + +0x81 : [ 2, "sta", "indirectx" ], +0x84 : [ 2, "sty", "zeropage" ], +0x85 : [ 2, "sta", "zeropage" ], +0x86 : [ 2, "stx", "zeropage" ], +0x88 : [ 1, "dey", "implicit" ], +0x8a : [ 1, "txa", "implicit" ], +0x8c : [ 3, "sty", "absolute" ], +0x8d : [ 3, "sta", "absolute" ], +0x8e : [ 3, "stx", "absolute" ], + +0x90 : [ 2, "bcc", "relative", pcr ], +0x91 : [ 2, "sta", "indirecty" ], +0x94 : [ 2, "sty", "zeropagex" ], +0x95 : [ 2, "sta", "zeropagex" ], +0x96 : [ 2, "stx", "zeropagey" ], +0x98 : [ 1, "tya", "implicit" ], +0x99 : [ 3, "sta", "absolutey" ], +0x9a : [ 1, "txs", "implicit" ], +0x9d : [ 3, "sta", "absolutex" ], + +0xa0 : [ 2, "ldy", "immediate" ], +0xa1 : [ 2, "lda", "indirectx" ], +0xa2 : [ 2, "ldx", "immediate" ], +0xa4 : [ 2, "ldy", "zeropage" ], +0xa5 : [ 2, "lda", "zeropage" ], +0xa6 : [ 2, "ldx", "zeropage" ], +0xa8 : [ 1, "tay", "implicit" ], +0xa9 : [ 2, "lda", "immediate" ], +0xaa : [ 1, "tax", "implicit" ], +0xac : [ 3, "ldy", "absolute" ], +0xad : [ 3, "lda", "absolute" ], +0xae : [ 3, "ldx", "absolute" ], + +0xb0 : [ 2, "bcs", "relative", pcr ], +0xb1 : [ 2, "lda", "indirecty" ], +0xb4 : [ 2, "ldy", "zeropagex" ], +0xb5 : [ 2, "lda", "zeropagex" ], +0xb6 : [ 2, "ldx", "zeropagey" ], +0xb8 : [ 1, "clv", "implicit" ], +0xb9 : [ 3, "lda", "absolutey" ], +0xba : [ 1, "tsx", "implicit" ], +0xbc : [ 3, "ldy", "absolutex" ], +0xbd : [ 3, "lda", "absolutex" ], +0xbe : [ 3, "ldx", "absolutey" ], + +0xc0 : [ 2, "cpy", "immediate" ], +0xc1 : [ 2, "cmp", "indirectx" ], +0xc4 : [ 2, "cpy", "zeropage" ], +0xc5 : [ 2, "cmp", "zeropage" ], +0xc6 : [ 2, "dec", "zeropage" ], +0xc8 : [ 1, "iny", "implicit" ], +0xc9 : [ 2, "cmp", "immediate" ], +0xca : [ 1, "dex", "implicit" ], +0xcc : [ 3, "cpy", "absolute" ], +0xcd : [ 3, "cmp", "absolute" ], +0xce : [ 3, "dec", "absolute" ], + +0xd0 : [ 2, "bne", "relative", pcr ], +0xd1 : [ 2, "cmp", "indirecty" ], +0xd5 : [ 2, "cmp", "zeropagex" ], +0xd6 : [ 2, "dec", "zeropagex" ], +0xd8 : [ 1, "cld", "implicit" ], +0xd9 : [ 3, "cmp", "absolutey" ], +0xdd : [ 3, "cmp", "absolutex" ], +0xde : [ 3, "dec", "absolutex" ], + +0xe0 : [ 2, "cpx", "immediate" ], +0xe1 : [ 2, "sbc", "indirectx" ], +0xe4 : [ 2, "cpx", "zeropage" ], +0xe5 : [ 2, "sbc", "zeropage" ], +0xe6 : [ 2, "inc", "zeropage" ], +0xe8 : [ 1, "inx", "implicit" ], +0xe9 : [ 2, "sbc", "immediate" ], +0xea : [ 1, "nop", "implicit" ], +0xec : [ 3, "cpx", "absolute" ], +0xed : [ 3, "sbc", "absolute" ], +0xee : [ 3, "inc", "absolute" ], + +0xf0 : [ 2, "beq", "relative", pcr ], +0xf1 : [ 2, "sbc", "indirecty" ], +0xf5 : [ 2, "sbc", "zeropagex" ], +0xf6 : [ 2, "inc", "zeropagex" ], +0xf8 : [ 1, "sed", "implicit" ], +0xf9 : [ 3, "sbc", "absolutey" ], +0xfd : [ 3, "sbc", "absolutex" ], +0xfe : [ 3, "inc", "absolutex" ], + +} + +# End of processor specific code +########################################################################## diff --git a/6811.py b/6811.py new file mode 100755 index 0000000..c249309 --- /dev/null +++ b/6811.py @@ -0,0 +1,44 @@ +########################################################################## +# +# Processor specific code + +CPU = "6811" +Description = "FreeScale 68HC11 8-bit microcontroller." +DataWidth = 8 # 8-bit data +AddressWidth = 16 # 16-bit addresses + +# Maximum length of an instruction (for formatting purposes) +maxLength = 5; + +# Leadin bytes for multbyte instructions +leadInBytes = [0x18, 0x1a, 0xcd] + +# Addressing mode table +addressModeTable = { +"inherent" : "", +"immediate" : "#${0:02X}", +"direct" : "${0:02X}", +"extended" : "${0:02X}{1:02X}", +"indirectx" : "($:0:02X)),X", +"indirecty" : "(${0:02X}),Y", +"relative" : "${0:04X}", +} + +# Op Code Table +# Key is numeric opcode (possibly multiple bytes) +# Value is a list: +# # bytes +# mnemonic +# addressing mode. +# flags (e.g. pcr) +opcodeTable = { +0x00 : [ 1, "test", "inherent" ], +0x01 : [ 1, "nop", "inherent" ], +0x02 : [ 2, "ora", "direct" ], +0x03 : [ 3, "jmp", "extended" ], +0x183a : [ 2, "aby", "inherent" ], +0x18a9 : [ 5, "adca", "indirecty" ], +} + +# End of processor specific code +########################################################################## diff --git a/README.md b/README.md index 1f75d58..26846cf 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,37 @@ -# udis Universal Disassembler program for 8-bit microprocessors + +This is a simple disassembler for various 8-bit microprocessors. It +reads a binary file specified on the command line and produces a +disassembly. It requires Python 3. It has been tested on Linux but +should work on any platform that supports Python. See the source code +for more details. + +The following CPUs are either supported or planned to be supported: + +CPU Status +--- ------ +1802 planned +6502 implemented +65816 planned +65C02 planned +6800 planned +6809 planned +6811 planned +8008 planned +8051 planned +F8 planned +Z80 planned + +usage: udis.py [-h] [-c CPU] [-n] [-a ADDRESS] [-i] filename + +positional arguments: + filename Binary file to disassemble + +optional arguments: + -h, --help show this help message and exit + -c CPU, --cpu CPU Specify CPU type (defaults to 6502) + -n, --nolist Don't list instruction bytes (make output suitable for + assembler) + -a ADDRESS, --address ADDRESS + Specify decimal starting address (defaults to 0) + -i, --invalid Show invalid opcodes as ??? rather than constants diff --git a/udis.py b/udis.py new file mode 100755 index 0000000..7bdb829 --- /dev/null +++ b/udis.py @@ -0,0 +1,215 @@ +#! /usr/bin/env python3 +# +# Universal Disassembler +# Copyright (c) 2013-2015 by Jeff Tranter +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import fileinput +import argparse +import signal + +# Flags + + +pcr = 1 + +# Functions + + +def isprint(c): + "Return if character is printable ASCII" + if c >= '@' and c <= '~': + return True + else: + return False + + +# Avoids an error when output piped, e.g. to "less" +signal.signal(signal.SIGPIPE, signal.SIG_DFL) + +# TODO: Add command line option to show available CPUs. + +# Parse command line options +parser = argparse.ArgumentParser() +parser.add_argument("filename", help="Binary file to disassemble") +parser.add_argument("-c", "--cpu", help="Specify CPU type (defaults to 6502)", default="6502") +parser.add_argument("-n", "--nolist", help="Don't list instruction bytes (make output suitable for assembler)", action="store_true") +parser.add_argument("-a", "--address", help="Specify decimal starting address (defaults to 0)", default=0, type=int) +parser.add_argument("-i", "--invalid", help="Show invalid opcodes as ??? rather than constants", action="store_true") +args = parser.parse_args() + +# Load CPU plugin based on command line option. +plugin = args.cpu + ".py" +try: + exec(open(plugin).read()) +except FileNotFoundError: + print(("error: cpu plugin file '{}' not found.".format(plugin)), file=sys.stderr) + sys.exit(1) + +# Get filename from command line arguments. +filename = args.filename + +# Current instruction address. Silently force it to be in valid range. +address = args.address & 0xffff + +# Any flags for current instruction. +flags = 0 + +# Contains a line of output. +line = "" + +# Open input file. +# Display error and exit if filename does not exist. +try: + f = open(filename, "rb") +except FileNotFoundError: + print(("error: input file '{}' not found.".format(filename)), file=sys.stderr) + sys.exit(1) + +# Variables: +# address - current instruction address +# opcode - binary instruction opcode (may be multiple bytes) +# length - length of current instruction +# mnemonic - assembler mnemonic for current instruction +# format - operand format string +# line - line to output +# leadin - extended opcode (true/false) + +# Print initial origin address +if args.nolist is False: + print("{0:04X} .org ${1:04X}".format(address, address)) +else: + print(" .org ${0:04X}".format(address)) + +while True: + try: + b = f.read(1) # Get binary byte from file + + if len(b) == 0: # handle EOF + if args.nolist is False: + print("{0:04X} end".format(address)) + break + + # Get op code + opcode = ord(b) + + # Handle if opcode is a leadin byte + if opcode in leadInBytes: + b = f.read(1) # Get next byte of extended opcode + opcode = (opcode << 8) + ord(b) + leadin = True + else: + leadin = False + + # Given opcode, get data from opcode table and address mode table for CPU. + if opcode in opcodeTable: + length = opcodeTable[opcode][0] + mnemonic = opcodeTable[opcode][1] + mode = opcodeTable[opcode][2] + if len(opcodeTable[opcode]) > 3: + flags = opcodeTable[opcode][3] # Get optional flags + else: + flags = 0 + if mode in addressModeTable: + format = addressModeTable[mode] + else: + print(("error: mode '{}' not found in addressModeTable.".format(mode)), file=sys.stderr) + sys.exit(1) + else: + length = 1 # Invalid opcode + format = "" + mnemonic = "???" + +# Disassembly format: +# XXXX XX XX XX XX XX nop ($1234,X) +# With --nolist option: +# nop ($1234,X) + + # Add current address to output line + if args.nolist is False: + if leadin is True: + line += "{0:04X} {1:02X} {2:02X}".format(address, opcode // 256, opcode % 256) + length -= 1 + else: + line += "{0:04X} {1:02X}".format(address, opcode) + + op = {} # Array to hold operands + + # Get any operands and stor in an array + for i in range(1, maxLength): + if (i < length): + op[i] = ord(f.read(1)) # Get operand bytes + if args.nolist is False: + line += " {0:02X}".format(op[i]) + else: + if args.nolist is False: + line += " " + + # Handle relative addresses. Indicated by the flag pcr being set. + # TODO: Needs changes if more flags are added. + if flags == pcr: + if op[1] < 128: + op[1] = address + op[1] + 2 + else: + op[1] = address - (256 - op[1]) + 2 + if op[1] < 0: + op[1] = 65536 + op[1] + + # Format the operand using format string and any operands. + if length == 1: + operand = format + elif length == 2: + operand = format.format(op[1]) + elif length == 3: + operand = format.format(op[1], op[2]) + elif length == 4: + operand = format.format(op[1], op[2], op[3]) + elif length == 5: + operand = format.format(op[1], op[2], op[3], op[4]) + elif length == 6: + operand = format.format(op[1], op[2], op[3], op[4], op[5]) + elif length == 7: + operand = format.format(op[1], op[2], op[3], op[4], op[5], op[6]) + + # Special check for invalid op code. Display as ??? or .byte depending on command line option. + if mnemonic == "???" and not args.invalid: + if isprint(chr(opcode)): + mnemonic = ".byte '{0:c}'".format(opcode) + else: + mnemonic = ".byte ${0:02X}".format(opcode) + + # Need one more space if not in no list mode. + if args.nolist is False: + line += " " + + # Add mnemonic and any operands to the output line. + if operand == "": + line += " {0:s}".format(mnemonic) + else: + line += " {0:s} {1:s}".format(mnemonic, operand) + + # Print line of output + print(line) + + # Update address, handlng wraparound at 64K. + address = (address + length) & 0xffff + + # Reset variables for next line of output. + line = "" + flags = 0 + + except KeyboardInterrupt: + print("Interrupted by Control-C", file=sys.stderr) + break