mirror of
https://github.com/maximumspatium/PowerRomDasm.git
synced 2025-01-05 10:30:46 +00:00
336 lines
11 KiB
Python
336 lines
11 KiB
Python
|
'''
|
||
|
FCode DeTokenizer in pure Python.
|
||
|
|
||
|
Author: Max Poliakovski 2019-2021
|
||
|
'''
|
||
|
import struct
|
||
|
|
||
|
FORTH_WORDS = {
|
||
|
0x10 : ('b(lit)', ['num32']),
|
||
|
0x11 : ("b(')", ['fcode_num']),
|
||
|
0x12 : ('b(")', ['p_string']),
|
||
|
0x13 : ('bbranch', ['offset']),
|
||
|
0x14 : ('b?branch', ['offset']),
|
||
|
0x15 : ('b(loop)', ['offset']),
|
||
|
0x16 : ('b(+loop)', ['offset']),
|
||
|
0x17 : ('b(do)', ['offset']),
|
||
|
0x18 : ('b(?do)', ['offset']),
|
||
|
0x19 : ('i', []),
|
||
|
0x1A : ('j', []),
|
||
|
0x1B : ('b(leave)', []),
|
||
|
0x1C : ('b(of)', ['offset']),
|
||
|
0x1D : ('execute', []),
|
||
|
0x1E : ('+', []),
|
||
|
0x1F : ('-', []),
|
||
|
0x20 : ('*', []),
|
||
|
0x21 : ('/', []),
|
||
|
0x22 : ('mod', []),
|
||
|
0x23 : ('and', []),
|
||
|
0x24 : ('or', []),
|
||
|
0x25 : ('xor', []),
|
||
|
0x26 : ('invert', []),
|
||
|
0x27 : ('lshift', []),
|
||
|
0x28 : ('rshift', []),
|
||
|
0x29 : ('>>a', []),
|
||
|
0x2A : ('/mod', []),
|
||
|
0x2B : ('u/mod', []),
|
||
|
0x2C : ('negate', []),
|
||
|
0x2D : ('abs', []),
|
||
|
0x2E : ('min', []),
|
||
|
0x2F : ('max', []),
|
||
|
0x30 : ('>r', []),
|
||
|
0x31 : ('r>', []),
|
||
|
0x32 : ('r@', []),
|
||
|
0x33 : ('exit', []),
|
||
|
0x34 : ('0=', []),
|
||
|
0x35 : ('0<>', []),
|
||
|
0x36 : ('0<', []),
|
||
|
0x37 : ('0<=', []),
|
||
|
0x38 : ('0>', []),
|
||
|
0x39 : ('0>=', []),
|
||
|
0x3A : ('<', []),
|
||
|
0x3B : ('>', []),
|
||
|
0x3C : ('=', []),
|
||
|
0x3D : ('<>', []),
|
||
|
0x3E : ('u>', []),
|
||
|
0x3F : ('u<=', []),
|
||
|
0x40 : ('u<', []),
|
||
|
0x41 : ('u>=', []),
|
||
|
0x42 : ('>=', []),
|
||
|
0x43 : ('<=', []),
|
||
|
0x44 : ('between', []),
|
||
|
0x45 : ('within', []),
|
||
|
0x46 : ('drop', []),
|
||
|
0x47 : ('dup', []),
|
||
|
0x48 : ('over', []),
|
||
|
0x49 : ('swap', []),
|
||
|
0x4A : ('rot', []),
|
||
|
0x4B : ('-rot', []),
|
||
|
0x4C : ('tuck', []),
|
||
|
0x4D : ('nip', []),
|
||
|
0x4E : ('pick', []),
|
||
|
0x4F : ('roll', []),
|
||
|
0x50 : ('?dup', []),
|
||
|
0x51 : ('depth', []),
|
||
|
0x52 : ('2drop', []),
|
||
|
0x53 : ('2dup', []),
|
||
|
0x54 : ('2over', []),
|
||
|
0x55 : ('2swap', []),
|
||
|
0x57 : ('2/', []),
|
||
|
0x59 : ('2*', []),
|
||
|
0x5C : ('/l', []),
|
||
|
0x5E : ('ca+', []),
|
||
|
0x5F : ('wa+', []),
|
||
|
0x60 : ('la+', []),
|
||
|
0x61 : ('na+', []),
|
||
|
0x62 : ('char+', []),
|
||
|
0x63 : ('wa1+', []),
|
||
|
0x64 : ('la1+', []),
|
||
|
0x65 : ('cell+', []),
|
||
|
0x68 : ('/l*', []),
|
||
|
0x69 : ('cells', []),
|
||
|
0x6A : ('on', []),
|
||
|
0x6B : ('off', []),
|
||
|
0x6C : ('+!', []),
|
||
|
0x6D : ('@', []),
|
||
|
0x6E : ('l@', []),
|
||
|
0x6F : ('w@', []),
|
||
|
0x71 : ('c@', []),
|
||
|
0x72 : ('!', []),
|
||
|
0x73 : ('l!', []),
|
||
|
0x74 : ('w!', []),
|
||
|
0x75 : ('c!', []),
|
||
|
0x76 : ('2@', []),
|
||
|
0x77 : ('2!', []),
|
||
|
0x78 : ('move', []),
|
||
|
0x79 : ('fill', []),
|
||
|
0x7A : ('comp', []),
|
||
|
0x7C : ('lwsplit', []),
|
||
|
0x7D : ('wljoin', []),
|
||
|
0x7E : ('lbsplit', []),
|
||
|
0x7F : ('bljoin', []),
|
||
|
0x80 : ('wbflip', []),
|
||
|
0x83 : ('pack', []),
|
||
|
0x84 : ('count', []),
|
||
|
0x85 : ('body>', []),
|
||
|
0x86 : ('>body', []),
|
||
|
0x89 : ('unloop', []),
|
||
|
0x8B : ('alloc-mem', []),
|
||
|
0x8C : ('free-mem', []),
|
||
|
0x8D : ('key?', []),
|
||
|
0x8E : ('key', []),
|
||
|
0x90 : ('type', []),
|
||
|
0x92 : ('cr', []),
|
||
|
0x9D : ('.', []),
|
||
|
0xA0 : ('base', []),
|
||
|
0xA4 : ('-1', []),
|
||
|
0xA5 : ('0', []),
|
||
|
0xA6 : ('1', []),
|
||
|
0xA7 : ('2', []),
|
||
|
0xA8 : ('3', []),
|
||
|
0xA9 : ('bl', []),
|
||
|
0xAA : ('bs', []),
|
||
|
0xAB : ('bell', []),
|
||
|
0xAC : ('bounds', []),
|
||
|
0xAD : ('here', []),
|
||
|
0xAE : ('aligned', []),
|
||
|
0xAF : ('wbsplit', []),
|
||
|
0xB0 : ('bwjoin', []),
|
||
|
0xB1 : ('b(<mark)', []),
|
||
|
0xB2 : ('b(>resolve)', []),
|
||
|
0xB5 : ('new-token', ['unnamed_tok']),
|
||
|
0xB7 : ('b(:)', []),
|
||
|
0xB8 : ('b(value)', ['line_break']),
|
||
|
0xB9 : ('b(variable)', ['line_break']),
|
||
|
0xBA : ('b(constant)', ['line_break']),
|
||
|
0xBB : ('b(create)', ['line_break']),
|
||
|
0xBC : ('b(defer)', ['line_break']),
|
||
|
0xBD : ('b(buffer:)', ['line_break']),
|
||
|
0xBE : ('b(field)', ['line_break']),
|
||
|
0xC0 : ('instance', []),
|
||
|
0xC2 : ('b(;)', ['line_break']),
|
||
|
0xC3 : ('b(to)', ['fcode_num']),
|
||
|
0xC4 : ('b(case)', []),
|
||
|
0xC5 : ('b(endcase)', []),
|
||
|
0xC6 : ('b(endof)', ['offset']),
|
||
|
0xCA : ('external-token', ['named_tok']),
|
||
|
0xD0 : ('c,', []),
|
||
|
0xD1 : ('w,', []),
|
||
|
0xD2 : ('l,', []),
|
||
|
0xD3 : (',', []),
|
||
|
0xD4 : ('um*', []),
|
||
|
0xD5 : ('um/mod', []),
|
||
|
0xD8 : ('d+', []),
|
||
|
0xD9 : ('d-', []),
|
||
|
0xDA : ('get-token', []),
|
||
|
0xDB : ('set-token', []),
|
||
|
0xDC : ('state', []),
|
||
|
0xF1 : ('start1', ['fcode_hdr', 'offset16']),
|
||
|
0xFD : ('version1', ['fcode_hdr', 'offset8']),
|
||
|
0x102 : ('my-address', []),
|
||
|
0x103 : ('my-space', []),
|
||
|
0x110 : ('property', []),
|
||
|
0x111 : ('encode-int', []),
|
||
|
0x112 : ('encode+', []),
|
||
|
0x113 : ('encode-phys', []),
|
||
|
0x114 : ('encode-string', []),
|
||
|
0x115 : ('encode-bytes', []),
|
||
|
0x119 : ('model', []),
|
||
|
0x11A : ('device-type', []),
|
||
|
0x11C : ('is-install', []),
|
||
|
0x11D : ('is-remove', []),
|
||
|
0x125 : ('get-msecs', []),
|
||
|
0x126 : ('ms', []),
|
||
|
0x128 : ('decode-phys', []),
|
||
|
0x150 : ('#lines', []),
|
||
|
0x15A : ('erase-screen', []),
|
||
|
0x166 : ('window-left', []),
|
||
|
0x16A : ('default-font', []),
|
||
|
0x16B : ('set-font', []),
|
||
|
0x16C : ('char-height', []),
|
||
|
0x16D : ('char-width', []),
|
||
|
0x18B : ('fb8-install', []),
|
||
|
0x201 : ('device-name', []),
|
||
|
0x203 : ('my-self', []),
|
||
|
0x207 : ('find-method', []),
|
||
|
0x209 : ('$call-parent', []),
|
||
|
0x20A : ('my-parent', []),
|
||
|
0x20B : ('ihandle>phandle', []),
|
||
|
0x216 : ('abort', []),
|
||
|
0x21A : ('get-my-property', []),
|
||
|
0x21B : ('decode-int', []),
|
||
|
0x21D : ('get-inherited-property', []),
|
||
|
0x21E : ('delete-property', []),
|
||
|
0x226 : ('lwflip', []),
|
||
|
0x227 : ('lbflip', []),
|
||
|
0x230 : ('rb@', []),
|
||
|
0x231 : ('rb!', []),
|
||
|
0x232 : ('rw@', []),
|
||
|
0x233 : ('rw!', []),
|
||
|
0x234 : ('rl@', []),
|
||
|
0x235 : ('rl!', [])
|
||
|
}
|
||
|
|
||
|
class DeTokenizer():
|
||
|
def __init__(self, code_stream, code_len, pos = 0):
|
||
|
self.pos = pos
|
||
|
self.code_stream = code_stream
|
||
|
self.code_length = code_len
|
||
|
self.offset_bits = 8
|
||
|
self.builtin_dict = FORTH_WORDS
|
||
|
self.user_dict = {}
|
||
|
self.new_line = False
|
||
|
|
||
|
def reinit(self, code_stream, code_len, pos = 0):
|
||
|
self.pos = pos
|
||
|
self.code_stream = code_stream
|
||
|
self.code_length = code_len
|
||
|
self.offset_bits = 8
|
||
|
self.new_line = False
|
||
|
|
||
|
def next_toknum(self):
|
||
|
tok_num = self.code_stream[self.pos]
|
||
|
self.pos += 1
|
||
|
if tok_num > 0 and tok_num <= 0xF:
|
||
|
tok_num = (tok_num << 8) | self.code_stream[self.pos]
|
||
|
self.pos += 1
|
||
|
return tok_num
|
||
|
|
||
|
def fcode_hdr(self):
|
||
|
fcode_hdr = struct.unpack('>BHL', self.code_stream[self.pos:self.pos+7])
|
||
|
self.pos += 7
|
||
|
print("FCode header:")
|
||
|
print("- format = 0x%X" % fcode_hdr[0])
|
||
|
print("- checksum = 0x%X" % fcode_hdr[1])
|
||
|
print("- prog_len = 0x%X\n" % fcode_hdr[2])
|
||
|
|
||
|
def offset8(self):
|
||
|
self.offset_bits = 8
|
||
|
|
||
|
def offset16(self):
|
||
|
self.offset_bits = 16
|
||
|
|
||
|
def offset(self):
|
||
|
if self.offset_bits == 8:
|
||
|
val = self.code_stream[self.pos]
|
||
|
self.pos += 1
|
||
|
elif self.offset_bits == 16:
|
||
|
val = (self.code_stream[self.pos] << 8) | (self.code_stream[self.pos+1])
|
||
|
self.pos += 2
|
||
|
sign = 1 << (self.offset_bits - 1)
|
||
|
offset = (val & (sign - 1)) - (val & sign)
|
||
|
print("- offset: %d" % offset)
|
||
|
|
||
|
def num32(self):
|
||
|
num = (self.code_stream[self.pos] << 24) | \
|
||
|
(self.code_stream[self.pos+1] << 16) | \
|
||
|
(self.code_stream[self.pos+2] << 8) | \
|
||
|
self.code_stream[self.pos+3]
|
||
|
self.pos += 4
|
||
|
print("- number: 0x%X" % num)
|
||
|
|
||
|
def fcode_num(self):
|
||
|
num = self.next_toknum()
|
||
|
print("- FCode #: 0x%X" % num)
|
||
|
return num
|
||
|
|
||
|
def unnamed_tok(self):
|
||
|
tok_num = self.fcode_num()
|
||
|
tok_name = 'unnamed_' + format(tok_num, 'x')
|
||
|
#if tok_num not in self.user_dict:
|
||
|
self.user_dict[tok_num] = tok_name
|
||
|
|
||
|
def named_tok(self):
|
||
|
tok_name = self.p_string()
|
||
|
tok_num = self.fcode_num()
|
||
|
#if tok_num not in self.user_dict:
|
||
|
self.user_dict[tok_num] = tok_name
|
||
|
|
||
|
def p_string(self):
|
||
|
len = self.code_stream[self.pos]
|
||
|
self.pos += 1
|
||
|
try:
|
||
|
str = struct.unpack('%ds' % len, \
|
||
|
self.code_stream[self.pos:self.pos+len])[0].decode('utf-8')
|
||
|
self.pos += len
|
||
|
print('- String: " %s"' % str)
|
||
|
return str
|
||
|
except UnicodeDecodeError: # Forth string may contain non-printable chars!
|
||
|
bytes = struct.unpack('%dB' % len, \
|
||
|
self.code_stream[self.pos:self.pos+len])
|
||
|
self.pos += len
|
||
|
print(' '.join(format(x, '02x') for x in bytes))
|
||
|
return bytes
|
||
|
|
||
|
def line_break(self):
|
||
|
print("")
|
||
|
self.new_line = True
|
||
|
|
||
|
def insert_newline(self, tok_num):
|
||
|
if tok_num == 0xCA or tok_num == 0xB5:
|
||
|
if not self.new_line:
|
||
|
self.line_break()
|
||
|
else:
|
||
|
self.new_line = False
|
||
|
|
||
|
def decode_stream(self):
|
||
|
while self.pos < self.code_length:
|
||
|
tok_num = self.next_toknum()
|
||
|
if tok_num == 0:
|
||
|
print('0x00 ; end0')
|
||
|
break
|
||
|
|
||
|
self.insert_newline(tok_num)
|
||
|
|
||
|
if tok_num in self.builtin_dict:
|
||
|
dict_entry = self.builtin_dict[tok_num]
|
||
|
print("0x%X ; %s" % (tok_num, dict_entry[0]))
|
||
|
for fun in dict_entry[1]:
|
||
|
fun_obj = getattr(self, fun)
|
||
|
fun_obj()
|
||
|
elif tok_num in self.user_dict: # check user dictionary
|
||
|
print("0x%X ; %s" % (tok_num, self.user_dict[tok_num]))
|
||
|
else:
|
||
|
print("Token %X not found at offset 0x%X!" % (tok_num, self.pos))
|
||
|
crash()
|