#!/usr/bin/env python3 import sys def line_preprocess(line): "split line into comment, strings and everything else" result = [] part = "" comment = None quotes = None for char in line: # are we inside comment? if comment: comment += char continue # are we inside quotes? if quotes: part += char if char == quotes: # end of quotes # if previous part was also quoted, we have to combine them, # because "a""b" really means a"b if result and result[-1][-1] == quotes: part = result[-1][:-1] + "\\" + part result.pop() # use singlequotes for 1-char strings if len(part) == 3: part = "'" + part[1] + "'" # escape backslash, singlequote, doublequote if part == "'\\'": part = "'\\\\'" elif part == "'''": part = "'\\''" elif part == '"\\""': part = "'\"'" result.append(part) # move quoted string to result list part = "" quotes = None continue # not in quotes if char == '"' or char == "'": # new quotes, so finish old part if part and part != ' ': result.append(part) # ...and start new one part = char quotes = char continue # comment? if char == ';': # finish old part if part and part != ' ': result.append(part) part = "" # ...and start comment comment = char continue ## tab-to-space: #if char == '\t': # char = ' ' # skip blanks after blank if part.endswith(' ') and char == ' ': pass else: # all other characters: part += char # quotes still open at end of line? if quotes: raise Exception("Unterminated string constant in input data") # append last part if part: result.append(part) return result, comment def single_out(items, substring): "split any item containing substring into first part, substring, second part. empty parts are dropped." result = [] for i in items: while substring in i: parts = i.partition(substring) if parts[0]: result.append(parts[0]) result.append(substring) i = parts[2] if i: result.append(i) return result def unquoted_tokenize(part): "split part into tokens (so do not pass string literals!)" # split at spaces (and throw away all spaces) items = part.split() # split at commas, braces, ... items = single_out(items, ',') items = single_out(items, '/') items = single_out(items, '=') items = single_out(items, '+') items = single_out(items, '-') items = single_out(items, '*') return items opcodes_to_keep = [ # std 6502: "brk", "rti", "rts", "nop", "php", "plp", "pha", "pla", "bpl", "bmi", "bvc", "bvs", "bcc", "bcs", "bne", "beq", "clc", "sec", "cli", "sei", "clv", "cld", "sed", "dex", "dey", "inx", "iny", "tax", "tay", "txa", "tya", "tsx", "txs", # new in 65c02: "phx", "plx", "phy", "ply", "bra" # inc, dec ] opcodes_with_arg = [ # std 6502: "ora", "and", "eor", "adc", "sta", "lda", "cmp", "sbc", "asl", "rol", "lsr", "ror", "dec", "inc", "ldx", "stx", "cpx", "ldy", "sty", "cpy", "jsr", "jmp", "bit", # new in 65c02: "tsb", "trb", "stz" ] token_substitutions = { ".": "*", # program counter ":not:": "not", # operator ":eor:": "xor", # operator ":msb:": ">", # operator? } opcodes_to_replace = { "org": "*=", "cpu": ";!cpu", # TODO: support properly! "=": "!tx", "$": "!wo", # actually & instead of $, but substitution was done earlier "end": "!eof", "assert": "+assert", "lnk": ";!source", # TODO: support properly! "asla": "\tasl", "lsra": "\tlsr", "rola": "\trol", "rora": "\tror", "dea": "\tdec", # 65c02 "ina": "\tinc", # 65c02 } opcodes_to_rename = { "clr": "stz" # 65c02 } def convert_opcodes(parts): "convert mnemonics and pseudo opcodes" if not parts: return parts op = parts[0] parts = parts[1:] if op in opcodes_to_keep: return ["\t" + op] + parts if op in opcodes_to_replace: return [opcodes_to_replace[op]] + parts # wtf?! if op == "jmi": op = "jmpi" elif op == "jmix": op = "jmpxi" # convert addressing modes if len(op) > 3: oldop = op am = op[3:] op = op[:3] if am == "im": if parts[0] == '/': parts[0] = "#>" elif parts[0][0] >= 'a' and parts[0][0] <= 'z': parts[0] = "#< " + parts[0] else: parts[0] = "#" + parts[0] elif am == "ax" or am == "zx": parts[-1] = parts[-1] + ", x" elif am == "ay" or am == "zy": parts[-1] = parts[-1] + ", y" elif am == "xi": parts[0] = "(" + parts[0] parts[-1] = parts[-1] + ", x)" elif am == "iy": parts[0] = "(" + parts[0] parts[-1] = parts[-1] + "), y" elif am == "i": parts[0] = "(" + parts[0] parts[-1] = parts[-1] + ")" else: op = oldop # convert if op in opcodes_to_rename: op = opcodes_to_rename[op] if op in opcodes_with_arg: return ["\t" + op] + parts return [op] + parts def process_code(parts): "split code parts at special characters" prefix = "" # remember if line starts with space indented = (parts[0][0] == " ") # because now spaces are dropped result = [] for part in parts: # do not process quoted strings any further if part.startswith("'") or part.startswith('"'): result.append(part) continue # convert to lower case part = part.lower() # substitute: & becomes $ part = "$".join(part.split("&")) # all other parts are split up into tokens result.extend(unquoted_tokenize(part)) # convert some tokens (string literals are not in danger, as they include quotes) parts = result result = [] for part in parts: if part in token_substitutions: part = token_substitutions[part] result.append(part) # now convert label = "" if indented: # code result = convert_opcodes(result) else: # label or symbol definition if len(result) > 1 and result[1] == "*": # symbol definition symdef = result[0] + "\t=" result = [symdef] + convert_opcodes(result[2:]) else: # label label = result[0] result = convert_opcodes(result[1:]) if result: label = label + "\t" return label, result def process_line(line): "process a single line of input and return converted version" # remove line ending, if there is one. don't care if NL or CR or combination while len(line) != 0 and (line[-1] == "\r" or line[-1] == "\n"): line = line[:-1] # step 1: split into strings, comments and everything else codeparts, comment = line_preprocess(line) # step 2: if there is anything before comment, process that if codeparts: prefix, codeparts = process_code(codeparts) # reassemble line line = prefix + " ".join(codeparts) else: line = "" if comment: line = line + comment return line + "\n" def convert_file(input, output): "convert input file to output file line-by-line" with open(input, "rt") as infile: with open(output, "wt") as outfile: outfile.write(";ACME 0.97\n") for line in infile: outfile.write(process_line(line)) if __name__ == '__main__': if len(sys.argv) != 3: sys.exit( "Error: wrong number of arguments\n" "\n" "masm2acme.py converts a file from MASM to ACME syntax.\n" "Usage: masm2acme.py INPUTFILE OUTPUTFILE\n" ) convert_file(sys.argv[1], sys.argv[2])