mirror of
https://github.com/uffejakobsen/acme.git
synced 2024-11-22 03:30:46 +00:00
added python script to convert MASM to ACME syntax.
git-svn-id: https://svn.code.sf.net/p/acme-crossass/code-0/trunk@319 4df02467-bbd4-4a76-a152-e7ce94205b78
This commit is contained in:
parent
550d1a3976
commit
836227f9da
277
contrib/toacme/masm2acme.py
Executable file
277
contrib/toacme/masm2acme.py
Executable file
@ -0,0 +1,277 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
|
||||
def line_preprocess(line):
|
||||
"split line into comment, strings and everything else"
|
||||
result = []
|
||||
part = ""
|
||||
comment = None
|
||||
quotes = None
|
||||
for char in line:
|
||||
# are we inside comment?
|
||||
if comment:
|
||||
comment += char
|
||||
continue
|
||||
# are we inside quotes?
|
||||
if quotes:
|
||||
part += char
|
||||
if char == quotes:
|
||||
# end of quotes
|
||||
# if previous part was also quoted, we have to combine them,
|
||||
# because "a""b" really means a"b
|
||||
if result and result[-1][-1] == quotes:
|
||||
part = result[-1][:-1] + "\\" + part
|
||||
result.pop()
|
||||
# use singlequotes for 1-char strings
|
||||
if len(part) == 3:
|
||||
part = "'" + part[1] + "'"
|
||||
# escape backslash, singlequote, doublequote
|
||||
if part == "'\\'":
|
||||
part = "'\\\\'"
|
||||
elif part == "'''":
|
||||
part = "'\\''"
|
||||
elif part == '"\\""':
|
||||
part = "'\"'"
|
||||
result.append(part) # move quoted string to result list
|
||||
part = ""
|
||||
quotes = None
|
||||
continue
|
||||
# not in quotes
|
||||
if char == '"' or char == "'":
|
||||
# new quotes, so finish old part
|
||||
if part and part != ' ':
|
||||
result.append(part)
|
||||
# ...and start new one
|
||||
part = char
|
||||
quotes = char
|
||||
continue
|
||||
# comment?
|
||||
if char == ';':
|
||||
# finish old part
|
||||
if part and part != ' ':
|
||||
result.append(part)
|
||||
part = ""
|
||||
# ...and start comment
|
||||
comment = char
|
||||
continue
|
||||
## tab-to-space:
|
||||
#if char == '\t':
|
||||
# char = ' '
|
||||
# skip blanks after blank
|
||||
if part.endswith(' ') and char == ' ':
|
||||
pass
|
||||
else:
|
||||
# all other characters:
|
||||
part += char
|
||||
# quotes still open at end of line?
|
||||
if quotes:
|
||||
raise Exception("Unterminated string constant in input data")
|
||||
# append last part
|
||||
if part:
|
||||
result.append(part)
|
||||
return result, comment
|
||||
|
||||
def single_out(items, substring):
|
||||
"split any item containing substring into first part, substring, second part. empty parts are dropped."
|
||||
result = []
|
||||
for i in items:
|
||||
while substring in i:
|
||||
parts = i.partition(substring)
|
||||
if parts[0]:
|
||||
result.append(parts[0])
|
||||
result.append(substring)
|
||||
i = parts[2]
|
||||
if i:
|
||||
result.append(i)
|
||||
return result
|
||||
|
||||
def unquoted_tokenize(part):
|
||||
"split part into tokens (so do not pass string literals!)"
|
||||
# split at spaces (and throw away all spaces)
|
||||
items = part.split()
|
||||
# split at commas, braces, ...
|
||||
items = single_out(items, ',')
|
||||
items = single_out(items, '/')
|
||||
items = single_out(items, '=')
|
||||
items = single_out(items, '+')
|
||||
items = single_out(items, '-')
|
||||
items = single_out(items, '*')
|
||||
return items
|
||||
|
||||
opcodes_to_keep = [
|
||||
# std 6502:
|
||||
"brk", "rti", "rts", "nop",
|
||||
"php", "plp", "pha", "pla",
|
||||
"bpl", "bmi", "bvc", "bvs", "bcc", "bcs", "bne", "beq",
|
||||
"clc", "sec", "cli", "sei", "clv", "cld", "sed",
|
||||
"dex", "dey", "inx", "iny",
|
||||
"tax", "tay", "txa", "tya", "tsx", "txs",
|
||||
# new in 65c02:
|
||||
"phx", "plx", "phy", "ply", "bra" # inc, dec
|
||||
]
|
||||
|
||||
opcodes_with_arg = [
|
||||
# std 6502:
|
||||
"ora", "and", "eor", "adc", "sta", "lda", "cmp", "sbc",
|
||||
"asl", "rol", "lsr", "ror", "dec", "inc",
|
||||
"ldx", "stx", "cpx", "ldy", "sty", "cpy",
|
||||
"jsr", "jmp", "bit",
|
||||
# new in 65c02:
|
||||
"tsb", "trb", "stz"
|
||||
]
|
||||
|
||||
token_substitutions = {
|
||||
".": "*", # program counter
|
||||
":not:": "not", # operator
|
||||
":eor:": "xor", # operator
|
||||
":msb:": ">", # operator?
|
||||
}
|
||||
|
||||
opcodes_to_replace = {
|
||||
"org": "*=",
|
||||
"cpu": ";!cpu", # TODO: support properly!
|
||||
"=": "!tx",
|
||||
"$": "!wo", # actually & instead of $, but substitution was done earlier
|
||||
"end": "!eof",
|
||||
"assert": "+assert",
|
||||
"lnk": ";!source", # TODO: support properly!
|
||||
"asla": "\tasl",
|
||||
"lsra": "\tlsr",
|
||||
"rola": "\trol",
|
||||
"rora": "\tror",
|
||||
"dea": "\tdec", # 65c02
|
||||
"ina": "\tinc", # 65c02
|
||||
}
|
||||
|
||||
opcodes_to_rename = {
|
||||
"clr": "stz" # 65c02
|
||||
}
|
||||
|
||||
def convert_opcodes(parts):
|
||||
"convert mnemonics and pseudo opcodes"
|
||||
if not parts:
|
||||
return parts
|
||||
op = parts[0]
|
||||
parts = parts[1:]
|
||||
if op in opcodes_to_keep:
|
||||
return ["\t" + op] + parts
|
||||
if op in opcodes_to_replace:
|
||||
return [opcodes_to_replace[op]] + parts
|
||||
# wtf?!
|
||||
if op == "jmi":
|
||||
op = "jmpi"
|
||||
elif op == "jmix":
|
||||
op = "jmpxi"
|
||||
# convert addressing modes
|
||||
if len(op) > 3:
|
||||
oldop = op
|
||||
am = op[3:]
|
||||
op = op[:3]
|
||||
if am == "im":
|
||||
if parts[0] == '/':
|
||||
parts[0] = "#>"
|
||||
elif parts[0][0] >= 'a' and parts[0][0] <= 'z':
|
||||
parts[0] = "#< " + parts[0]
|
||||
else:
|
||||
parts[0] = "#" + parts[0]
|
||||
elif am == "ax" or am == "zx":
|
||||
parts[-1] = parts[-1] + ", x"
|
||||
elif am == "ay" or am == "zy":
|
||||
parts[-1] = parts[-1] + ", y"
|
||||
elif am == "xi":
|
||||
parts[0] = "(" + parts[0]
|
||||
parts[-1] = parts[-1] + ", x)"
|
||||
elif am == "iy":
|
||||
parts[0] = "(" + parts[0]
|
||||
parts[-1] = parts[-1] + "), y"
|
||||
elif am == "i":
|
||||
parts[0] = "(" + parts[0]
|
||||
parts[-1] = parts[-1] + ")"
|
||||
else:
|
||||
op = oldop
|
||||
# convert
|
||||
if op in opcodes_to_rename:
|
||||
op = opcodes_to_rename[op]
|
||||
if op in opcodes_with_arg:
|
||||
return ["\t" + op] + parts
|
||||
return [op] + parts
|
||||
|
||||
def process_code(parts):
|
||||
"split code parts at special characters"
|
||||
prefix = ""
|
||||
# remember if line starts with space
|
||||
indented = (parts[0][0] == " ")
|
||||
# because now spaces are dropped
|
||||
result = []
|
||||
for part in parts:
|
||||
# do not process quoted strings any further
|
||||
if part.startswith("'") or part.startswith('"'):
|
||||
result.append(part)
|
||||
continue
|
||||
# convert to lower case
|
||||
part = part.lower()
|
||||
# substitute: & becomes $
|
||||
part = "$".join(part.split("&"))
|
||||
# all other parts are split up into tokens
|
||||
result.extend(unquoted_tokenize(part))
|
||||
# convert some tokens (string literals are not in danger, as they include quotes)
|
||||
parts = result
|
||||
result = []
|
||||
for part in parts:
|
||||
if part in token_substitutions:
|
||||
part = token_substitutions[part]
|
||||
result.append(part)
|
||||
# now convert
|
||||
label = ""
|
||||
if indented:
|
||||
# code
|
||||
result = convert_opcodes(result)
|
||||
else:
|
||||
# label or symbol definition
|
||||
if len(result) > 1 and result[1] == "*":
|
||||
# symbol definition
|
||||
symdef = result[0] + "\t="
|
||||
result = [symdef] + convert_opcodes(result[2:])
|
||||
else:
|
||||
# label
|
||||
label = result[0]
|
||||
result = convert_opcodes(result[1:])
|
||||
if result:
|
||||
label = label + "\t"
|
||||
return label, result
|
||||
|
||||
def process_line(line):
|
||||
"process a single line of input and return converted version"
|
||||
# remove line ending, if there is one. don't care if NL or CR or combination
|
||||
while len(line) != 0 and (line[-1] == "\r" or line[-1] == "\n"):
|
||||
line = line[:-1]
|
||||
# step 1: split into strings, comments and everything else
|
||||
codeparts, comment = line_preprocess(line)
|
||||
# step 2: if there is anything before comment, process that
|
||||
if codeparts:
|
||||
prefix, codeparts = process_code(codeparts)
|
||||
# reassemble line
|
||||
line = prefix + " ".join(codeparts)
|
||||
else:
|
||||
line = ""
|
||||
if comment:
|
||||
line = line + comment
|
||||
return line + "\n"
|
||||
|
||||
def convert_file(input, output):
|
||||
"convert input file to output file line-by-line"
|
||||
with open(input, "rt") as infile:
|
||||
with open(output, "wt") as outfile:
|
||||
outfile.write(";ACME 0.97\n")
|
||||
for line in infile:
|
||||
outfile.write(process_line(line))
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) != 3:
|
||||
sys.exit(
|
||||
"Error: wrong number of arguments\n"
|
||||
"\n"
|
||||
"masm2acme.py converts a file from MASM to ACME syntax.\n"
|
||||
"Usage: masm2acme.py INPUTFILE OUTPUTFILE\n"
|
||||
)
|
||||
convert_file(sys.argv[1], sys.argv[2])
|
Loading…
Reference in New Issue
Block a user