mirror of
https://github.com/uffejakobsen/acme.git
synced 2025-01-09 13:30:54 +00:00
278 lines
8.3 KiB
Python
278 lines
8.3 KiB
Python
|
#!/usr/bin/env python3
|
||
|
import sys
|
||
|
|
||
|
def line_preprocess(line):
|
||
|
"split line into comment, strings and everything else"
|
||
|
result = []
|
||
|
part = ""
|
||
|
comment = None
|
||
|
quotes = None
|
||
|
for char in line:
|
||
|
# are we inside comment?
|
||
|
if comment:
|
||
|
comment += char
|
||
|
continue
|
||
|
# are we inside quotes?
|
||
|
if quotes:
|
||
|
part += char
|
||
|
if char == quotes:
|
||
|
# end of quotes
|
||
|
# if previous part was also quoted, we have to combine them,
|
||
|
# because "a""b" really means a"b
|
||
|
if result and result[-1][-1] == quotes:
|
||
|
part = result[-1][:-1] + "\\" + part
|
||
|
result.pop()
|
||
|
# use singlequotes for 1-char strings
|
||
|
if len(part) == 3:
|
||
|
part = "'" + part[1] + "'"
|
||
|
# escape backslash, singlequote, doublequote
|
||
|
if part == "'\\'":
|
||
|
part = "'\\\\'"
|
||
|
elif part == "'''":
|
||
|
part = "'\\''"
|
||
|
elif part == '"\\""':
|
||
|
part = "'\"'"
|
||
|
result.append(part) # move quoted string to result list
|
||
|
part = ""
|
||
|
quotes = None
|
||
|
continue
|
||
|
# not in quotes
|
||
|
if char == '"' or char == "'":
|
||
|
# new quotes, so finish old part
|
||
|
if part and part != ' ':
|
||
|
result.append(part)
|
||
|
# ...and start new one
|
||
|
part = char
|
||
|
quotes = char
|
||
|
continue
|
||
|
# comment?
|
||
|
if char == ';':
|
||
|
# finish old part
|
||
|
if part and part != ' ':
|
||
|
result.append(part)
|
||
|
part = ""
|
||
|
# ...and start comment
|
||
|
comment = char
|
||
|
continue
|
||
|
## tab-to-space:
|
||
|
#if char == '\t':
|
||
|
# char = ' '
|
||
|
# skip blanks after blank
|
||
|
if part.endswith(' ') and char == ' ':
|
||
|
pass
|
||
|
else:
|
||
|
# all other characters:
|
||
|
part += char
|
||
|
# quotes still open at end of line?
|
||
|
if quotes:
|
||
|
raise Exception("Unterminated string constant in input data")
|
||
|
# append last part
|
||
|
if part:
|
||
|
result.append(part)
|
||
|
return result, comment
|
||
|
|
||
|
def single_out(items, substring):
|
||
|
"split any item containing substring into first part, substring, second part. empty parts are dropped."
|
||
|
result = []
|
||
|
for i in items:
|
||
|
while substring in i:
|
||
|
parts = i.partition(substring)
|
||
|
if parts[0]:
|
||
|
result.append(parts[0])
|
||
|
result.append(substring)
|
||
|
i = parts[2]
|
||
|
if i:
|
||
|
result.append(i)
|
||
|
return result
|
||
|
|
||
|
def unquoted_tokenize(part):
|
||
|
"split part into tokens (so do not pass string literals!)"
|
||
|
# split at spaces (and throw away all spaces)
|
||
|
items = part.split()
|
||
|
# split at commas, braces, ...
|
||
|
items = single_out(items, ',')
|
||
|
items = single_out(items, '/')
|
||
|
items = single_out(items, '=')
|
||
|
items = single_out(items, '+')
|
||
|
items = single_out(items, '-')
|
||
|
items = single_out(items, '*')
|
||
|
return items
|
||
|
|
||
|
opcodes_to_keep = [
|
||
|
# std 6502:
|
||
|
"brk", "rti", "rts", "nop",
|
||
|
"php", "plp", "pha", "pla",
|
||
|
"bpl", "bmi", "bvc", "bvs", "bcc", "bcs", "bne", "beq",
|
||
|
"clc", "sec", "cli", "sei", "clv", "cld", "sed",
|
||
|
"dex", "dey", "inx", "iny",
|
||
|
"tax", "tay", "txa", "tya", "tsx", "txs",
|
||
|
# new in 65c02:
|
||
|
"phx", "plx", "phy", "ply", "bra" # inc, dec
|
||
|
]
|
||
|
|
||
|
opcodes_with_arg = [
|
||
|
# std 6502:
|
||
|
"ora", "and", "eor", "adc", "sta", "lda", "cmp", "sbc",
|
||
|
"asl", "rol", "lsr", "ror", "dec", "inc",
|
||
|
"ldx", "stx", "cpx", "ldy", "sty", "cpy",
|
||
|
"jsr", "jmp", "bit",
|
||
|
# new in 65c02:
|
||
|
"tsb", "trb", "stz"
|
||
|
]
|
||
|
|
||
|
token_substitutions = {
|
||
|
".": "*", # program counter
|
||
|
":not:": "not", # operator
|
||
|
":eor:": "xor", # operator
|
||
|
":msb:": ">", # operator?
|
||
|
}
|
||
|
|
||
|
opcodes_to_replace = {
|
||
|
"org": "*=",
|
||
|
"cpu": ";!cpu", # TODO: support properly!
|
||
|
"=": "!tx",
|
||
|
"$": "!wo", # actually & instead of $, but substitution was done earlier
|
||
|
"end": "!eof",
|
||
|
"assert": "+assert",
|
||
|
"lnk": ";!source", # TODO: support properly!
|
||
|
"asla": "\tasl",
|
||
|
"lsra": "\tlsr",
|
||
|
"rola": "\trol",
|
||
|
"rora": "\tror",
|
||
|
"dea": "\tdec", # 65c02
|
||
|
"ina": "\tinc", # 65c02
|
||
|
}
|
||
|
|
||
|
opcodes_to_rename = {
|
||
|
"clr": "stz" # 65c02
|
||
|
}
|
||
|
|
||
|
def convert_opcodes(parts):
|
||
|
"convert mnemonics and pseudo opcodes"
|
||
|
if not parts:
|
||
|
return parts
|
||
|
op = parts[0]
|
||
|
parts = parts[1:]
|
||
|
if op in opcodes_to_keep:
|
||
|
return ["\t" + op] + parts
|
||
|
if op in opcodes_to_replace:
|
||
|
return [opcodes_to_replace[op]] + parts
|
||
|
# wtf?!
|
||
|
if op == "jmi":
|
||
|
op = "jmpi"
|
||
|
elif op == "jmix":
|
||
|
op = "jmpxi"
|
||
|
# convert addressing modes
|
||
|
if len(op) > 3:
|
||
|
oldop = op
|
||
|
am = op[3:]
|
||
|
op = op[:3]
|
||
|
if am == "im":
|
||
|
if parts[0] == '/':
|
||
|
parts[0] = "#>"
|
||
|
elif parts[0][0] >= 'a' and parts[0][0] <= 'z':
|
||
|
parts[0] = "#< " + parts[0]
|
||
|
else:
|
||
|
parts[0] = "#" + parts[0]
|
||
|
elif am == "ax" or am == "zx":
|
||
|
parts[-1] = parts[-1] + ", x"
|
||
|
elif am == "ay" or am == "zy":
|
||
|
parts[-1] = parts[-1] + ", y"
|
||
|
elif am == "xi":
|
||
|
parts[0] = "(" + parts[0]
|
||
|
parts[-1] = parts[-1] + ", x)"
|
||
|
elif am == "iy":
|
||
|
parts[0] = "(" + parts[0]
|
||
|
parts[-1] = parts[-1] + "), y"
|
||
|
elif am == "i":
|
||
|
parts[0] = "(" + parts[0]
|
||
|
parts[-1] = parts[-1] + ")"
|
||
|
else:
|
||
|
op = oldop
|
||
|
# convert
|
||
|
if op in opcodes_to_rename:
|
||
|
op = opcodes_to_rename[op]
|
||
|
if op in opcodes_with_arg:
|
||
|
return ["\t" + op] + parts
|
||
|
return [op] + parts
|
||
|
|
||
|
def process_code(parts):
|
||
|
"split code parts at special characters"
|
||
|
prefix = ""
|
||
|
# remember if line starts with space
|
||
|
indented = (parts[0][0] == " ")
|
||
|
# because now spaces are dropped
|
||
|
result = []
|
||
|
for part in parts:
|
||
|
# do not process quoted strings any further
|
||
|
if part.startswith("'") or part.startswith('"'):
|
||
|
result.append(part)
|
||
|
continue
|
||
|
# convert to lower case
|
||
|
part = part.lower()
|
||
|
# substitute: & becomes $
|
||
|
part = "$".join(part.split("&"))
|
||
|
# all other parts are split up into tokens
|
||
|
result.extend(unquoted_tokenize(part))
|
||
|
# convert some tokens (string literals are not in danger, as they include quotes)
|
||
|
parts = result
|
||
|
result = []
|
||
|
for part in parts:
|
||
|
if part in token_substitutions:
|
||
|
part = token_substitutions[part]
|
||
|
result.append(part)
|
||
|
# now convert
|
||
|
label = ""
|
||
|
if indented:
|
||
|
# code
|
||
|
result = convert_opcodes(result)
|
||
|
else:
|
||
|
# label or symbol definition
|
||
|
if len(result) > 1 and result[1] == "*":
|
||
|
# symbol definition
|
||
|
symdef = result[0] + "\t="
|
||
|
result = [symdef] + convert_opcodes(result[2:])
|
||
|
else:
|
||
|
# label
|
||
|
label = result[0]
|
||
|
result = convert_opcodes(result[1:])
|
||
|
if result:
|
||
|
label = label + "\t"
|
||
|
return label, result
|
||
|
|
||
|
def process_line(line):
|
||
|
"process a single line of input and return converted version"
|
||
|
# remove line ending, if there is one. don't care if NL or CR or combination
|
||
|
while len(line) != 0 and (line[-1] == "\r" or line[-1] == "\n"):
|
||
|
line = line[:-1]
|
||
|
# step 1: split into strings, comments and everything else
|
||
|
codeparts, comment = line_preprocess(line)
|
||
|
# step 2: if there is anything before comment, process that
|
||
|
if codeparts:
|
||
|
prefix, codeparts = process_code(codeparts)
|
||
|
# reassemble line
|
||
|
line = prefix + " ".join(codeparts)
|
||
|
else:
|
||
|
line = ""
|
||
|
if comment:
|
||
|
line = line + comment
|
||
|
return line + "\n"
|
||
|
|
||
|
def convert_file(input, output):
|
||
|
"convert input file to output file line-by-line"
|
||
|
with open(input, "rt") as infile:
|
||
|
with open(output, "wt") as outfile:
|
||
|
outfile.write(";ACME 0.97\n")
|
||
|
for line in infile:
|
||
|
outfile.write(process_line(line))
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
if len(sys.argv) != 3:
|
||
|
sys.exit(
|
||
|
"Error: wrong number of arguments\n"
|
||
|
"\n"
|
||
|
"masm2acme.py converts a file from MASM to ACME syntax.\n"
|
||
|
"Usage: masm2acme.py INPUTFILE OUTPUTFILE\n"
|
||
|
)
|
||
|
convert_file(sys.argv[1], sys.argv[2])
|