Add OpenFirmware detokenizer.

This commit is contained in:
Maxim Poliakovski 2021-08-07 22:00:29 +02:00
parent b23fd9d47b
commit bb39f6de07
3 changed files with 581 additions and 0 deletions

335
detok.py Normal file
View File

@ -0,0 +1,335 @@
'''
FCode DeTokenizer in pure Python.
Author: Max Poliakovski 2019-2021
'''
import struct
FORTH_WORDS = {
0x10 : ('b(lit)', ['num32']),
0x11 : ("b(')", ['fcode_num']),
0x12 : ('b(")', ['p_string']),
0x13 : ('bbranch', ['offset']),
0x14 : ('b?branch', ['offset']),
0x15 : ('b(loop)', ['offset']),
0x16 : ('b(+loop)', ['offset']),
0x17 : ('b(do)', ['offset']),
0x18 : ('b(?do)', ['offset']),
0x19 : ('i', []),
0x1A : ('j', []),
0x1B : ('b(leave)', []),
0x1C : ('b(of)', ['offset']),
0x1D : ('execute', []),
0x1E : ('+', []),
0x1F : ('-', []),
0x20 : ('*', []),
0x21 : ('/', []),
0x22 : ('mod', []),
0x23 : ('and', []),
0x24 : ('or', []),
0x25 : ('xor', []),
0x26 : ('invert', []),
0x27 : ('lshift', []),
0x28 : ('rshift', []),
0x29 : ('>>a', []),
0x2A : ('/mod', []),
0x2B : ('u/mod', []),
0x2C : ('negate', []),
0x2D : ('abs', []),
0x2E : ('min', []),
0x2F : ('max', []),
0x30 : ('>r', []),
0x31 : ('r>', []),
0x32 : ('r@', []),
0x33 : ('exit', []),
0x34 : ('0=', []),
0x35 : ('0<>', []),
0x36 : ('0<', []),
0x37 : ('0<=', []),
0x38 : ('0>', []),
0x39 : ('0>=', []),
0x3A : ('<', []),
0x3B : ('>', []),
0x3C : ('=', []),
0x3D : ('<>', []),
0x3E : ('u>', []),
0x3F : ('u<=', []),
0x40 : ('u<', []),
0x41 : ('u>=', []),
0x42 : ('>=', []),
0x43 : ('<=', []),
0x44 : ('between', []),
0x45 : ('within', []),
0x46 : ('drop', []),
0x47 : ('dup', []),
0x48 : ('over', []),
0x49 : ('swap', []),
0x4A : ('rot', []),
0x4B : ('-rot', []),
0x4C : ('tuck', []),
0x4D : ('nip', []),
0x4E : ('pick', []),
0x4F : ('roll', []),
0x50 : ('?dup', []),
0x51 : ('depth', []),
0x52 : ('2drop', []),
0x53 : ('2dup', []),
0x54 : ('2over', []),
0x55 : ('2swap', []),
0x57 : ('2/', []),
0x59 : ('2*', []),
0x5C : ('/l', []),
0x5E : ('ca+', []),
0x5F : ('wa+', []),
0x60 : ('la+', []),
0x61 : ('na+', []),
0x62 : ('char+', []),
0x63 : ('wa1+', []),
0x64 : ('la1+', []),
0x65 : ('cell+', []),
0x68 : ('/l*', []),
0x69 : ('cells', []),
0x6A : ('on', []),
0x6B : ('off', []),
0x6C : ('+!', []),
0x6D : ('@', []),
0x6E : ('l@', []),
0x6F : ('w@', []),
0x71 : ('c@', []),
0x72 : ('!', []),
0x73 : ('l!', []),
0x74 : ('w!', []),
0x75 : ('c!', []),
0x76 : ('2@', []),
0x77 : ('2!', []),
0x78 : ('move', []),
0x79 : ('fill', []),
0x7A : ('comp', []),
0x7C : ('lwsplit', []),
0x7D : ('wljoin', []),
0x7E : ('lbsplit', []),
0x7F : ('bljoin', []),
0x80 : ('wbflip', []),
0x83 : ('pack', []),
0x84 : ('count', []),
0x85 : ('body>', []),
0x86 : ('>body', []),
0x89 : ('unloop', []),
0x8B : ('alloc-mem', []),
0x8C : ('free-mem', []),
0x8D : ('key?', []),
0x8E : ('key', []),
0x90 : ('type', []),
0x92 : ('cr', []),
0x9D : ('.', []),
0xA0 : ('base', []),
0xA4 : ('-1', []),
0xA5 : ('0', []),
0xA6 : ('1', []),
0xA7 : ('2', []),
0xA8 : ('3', []),
0xA9 : ('bl', []),
0xAA : ('bs', []),
0xAB : ('bell', []),
0xAC : ('bounds', []),
0xAD : ('here', []),
0xAE : ('aligned', []),
0xAF : ('wbsplit', []),
0xB0 : ('bwjoin', []),
0xB1 : ('b(<mark)', []),
0xB2 : ('b(>resolve)', []),
0xB5 : ('new-token', ['unnamed_tok']),
0xB7 : ('b(:)', []),
0xB8 : ('b(value)', ['line_break']),
0xB9 : ('b(variable)', ['line_break']),
0xBA : ('b(constant)', ['line_break']),
0xBB : ('b(create)', ['line_break']),
0xBC : ('b(defer)', ['line_break']),
0xBD : ('b(buffer:)', ['line_break']),
0xBE : ('b(field)', ['line_break']),
0xC0 : ('instance', []),
0xC2 : ('b(;)', ['line_break']),
0xC3 : ('b(to)', ['fcode_num']),
0xC4 : ('b(case)', []),
0xC5 : ('b(endcase)', []),
0xC6 : ('b(endof)', ['offset']),
0xCA : ('external-token', ['named_tok']),
0xD0 : ('c,', []),
0xD1 : ('w,', []),
0xD2 : ('l,', []),
0xD3 : (',', []),
0xD4 : ('um*', []),
0xD5 : ('um/mod', []),
0xD8 : ('d+', []),
0xD9 : ('d-', []),
0xDA : ('get-token', []),
0xDB : ('set-token', []),
0xDC : ('state', []),
0xF1 : ('start1', ['fcode_hdr', 'offset16']),
0xFD : ('version1', ['fcode_hdr', 'offset8']),
0x102 : ('my-address', []),
0x103 : ('my-space', []),
0x110 : ('property', []),
0x111 : ('encode-int', []),
0x112 : ('encode+', []),
0x113 : ('encode-phys', []),
0x114 : ('encode-string', []),
0x115 : ('encode-bytes', []),
0x119 : ('model', []),
0x11A : ('device-type', []),
0x11C : ('is-install', []),
0x11D : ('is-remove', []),
0x125 : ('get-msecs', []),
0x126 : ('ms', []),
0x128 : ('decode-phys', []),
0x150 : ('#lines', []),
0x15A : ('erase-screen', []),
0x166 : ('window-left', []),
0x16A : ('default-font', []),
0x16B : ('set-font', []),
0x16C : ('char-height', []),
0x16D : ('char-width', []),
0x18B : ('fb8-install', []),
0x201 : ('device-name', []),
0x203 : ('my-self', []),
0x207 : ('find-method', []),
0x209 : ('$call-parent', []),
0x20A : ('my-parent', []),
0x20B : ('ihandle>phandle', []),
0x216 : ('abort', []),
0x21A : ('get-my-property', []),
0x21B : ('decode-int', []),
0x21D : ('get-inherited-property', []),
0x21E : ('delete-property', []),
0x226 : ('lwflip', []),
0x227 : ('lbflip', []),
0x230 : ('rb@', []),
0x231 : ('rb!', []),
0x232 : ('rw@', []),
0x233 : ('rw!', []),
0x234 : ('rl@', []),
0x235 : ('rl!', [])
}
class DeTokenizer():
def __init__(self, code_stream, code_len, pos = 0):
self.pos = pos
self.code_stream = code_stream
self.code_length = code_len
self.offset_bits = 8
self.builtin_dict = FORTH_WORDS
self.user_dict = {}
self.new_line = False
def reinit(self, code_stream, code_len, pos = 0):
self.pos = pos
self.code_stream = code_stream
self.code_length = code_len
self.offset_bits = 8
self.new_line = False
def next_toknum(self):
tok_num = self.code_stream[self.pos]
self.pos += 1
if tok_num > 0 and tok_num <= 0xF:
tok_num = (tok_num << 8) | self.code_stream[self.pos]
self.pos += 1
return tok_num
def fcode_hdr(self):
fcode_hdr = struct.unpack('>BHL', self.code_stream[self.pos:self.pos+7])
self.pos += 7
print("FCode header:")
print("- format = 0x%X" % fcode_hdr[0])
print("- checksum = 0x%X" % fcode_hdr[1])
print("- prog_len = 0x%X\n" % fcode_hdr[2])
def offset8(self):
self.offset_bits = 8
def offset16(self):
self.offset_bits = 16
def offset(self):
if self.offset_bits == 8:
val = self.code_stream[self.pos]
self.pos += 1
elif self.offset_bits == 16:
val = (self.code_stream[self.pos] << 8) | (self.code_stream[self.pos+1])
self.pos += 2
sign = 1 << (self.offset_bits - 1)
offset = (val & (sign - 1)) - (val & sign)
print("- offset: %d" % offset)
def num32(self):
num = (self.code_stream[self.pos] << 24) | \
(self.code_stream[self.pos+1] << 16) | \
(self.code_stream[self.pos+2] << 8) | \
self.code_stream[self.pos+3]
self.pos += 4
print("- number: 0x%X" % num)
def fcode_num(self):
num = self.next_toknum()
print("- FCode #: 0x%X" % num)
return num
def unnamed_tok(self):
tok_num = self.fcode_num()
tok_name = 'unnamed_' + format(tok_num, 'x')
#if tok_num not in self.user_dict:
self.user_dict[tok_num] = tok_name
def named_tok(self):
tok_name = self.p_string()
tok_num = self.fcode_num()
#if tok_num not in self.user_dict:
self.user_dict[tok_num] = tok_name
def p_string(self):
len = self.code_stream[self.pos]
self.pos += 1
try:
str = struct.unpack('%ds' % len, \
self.code_stream[self.pos:self.pos+len])[0].decode('utf-8')
self.pos += len
print('- String: " %s"' % str)
return str
except UnicodeDecodeError: # Forth string may contain non-printable chars!
bytes = struct.unpack('%dB' % len, \
self.code_stream[self.pos:self.pos+len])
self.pos += len
print(' '.join(format(x, '02x') for x in bytes))
return bytes
def line_break(self):
print("")
self.new_line = True
def insert_newline(self, tok_num):
if tok_num == 0xCA or tok_num == 0xB5:
if not self.new_line:
self.line_break()
else:
self.new_line = False
def decode_stream(self):
while self.pos < self.code_length:
tok_num = self.next_toknum()
if tok_num == 0:
print('0x00 ; end0')
break
self.insert_newline(tok_num)
if tok_num in self.builtin_dict:
dict_entry = self.builtin_dict[tok_num]
print("0x%X ; %s" % (tok_num, dict_entry[0]))
for fun in dict_entry[1]:
fun_obj = getattr(self, fun)
fun_obj()
elif tok_num in self.user_dict: # check user dictionary
print("0x%X ; %s" % (tok_num, self.user_dict[tok_num]))
else:
print("Token %X not found at offset 0x%X!" % (tok_num, self.pos))
crash()

128
detok_of.py Normal file
View File

@ -0,0 +1,128 @@
'''
DeTokenizer for Apple OpenFirmware.
Author: Max Poliakovski 2019-2021
'''
import struct
from argparse import ArgumentParser
from extractdict import parse_coff_container, scan_forth_dict, print_dict
from detok import DeTokenizer
def get_fcode_prog(infile):
# try to get FCode program header
fpos = infile.tell()
fcode_hdr = struct.unpack('>BBHL', infile.read(8))
infile.seek(fpos)
if fcode_hdr[0] != 0xFD and fcode_hdr[0] != 0xF1:
#print("Unsupported FCode header function 0x%X" % fcode_hdr[0])
return (0,0)
prog_len = fcode_hdr[3]
prog_stream = infile.read(prog_len)
return (prog_stream, prog_len)
def decode_package_header(infile):
pkg_hdr = struct.unpack('>LHHLL', infile.read(16))
print("Device package header:")
print("----------------------")
print("Next package offset: %X" % pkg_hdr[0])
print("Device ID: %X" % pkg_hdr[1])
print("Vendor ID: %X" % pkg_hdr[2])
print("Device class: %X" % pkg_hdr[3])
print("Package header size: %X" % pkg_hdr[4])
return (pkg_hdr[0], pkg_hdr[4])
def populate_user_dict(src_dict, dst_dict):
for tok_num, word in src_dict.items():
if tok_num >= 0x100:
dst_dict[tok_num] = word['name']
# add Apple specific FCodes for managing stack frames
for i in range(0,9):
dst_dict[0x407 + i] = '(pushlocals_%s)' % i
for i in range(0,8):
dst_dict[0x410 + i] = '(local@%s)' % i
dst_dict[0x418 + i] = '(local!%s)' % i
def main():
parser = ArgumentParser()
parser.add_argument('--rom_path', type=str,
dest='rom_path',
help='path to ROM file to process',
metavar='ROM_PATH', required=True)
parser.add_argument('--offset', type=lambda x: int(x,0),
dest='of_offset',
help='offset to OF container (autodetect attempt if omitted)',
metavar='OF_OFFSET', required=True)
opts = parser.parse_args()
with open(opts.rom_path, 'rb') as infile:
pos, size = parse_coff_container(infile, opts.of_offset);
if size == 0:
print("No valid OF binary found at offset %X" % opts.of_offset)
exit(1)
print("pos = 0x%X, size = 0x%X" % (pos, size))
dict = scan_forth_dict(infile, opts.of_offset + pos, pos + size)
#print_dict(dict)
print("Detokenizing main OF package...")
print("-------------------------------\n")
infile.seek(opts.of_offset + pos + 8)
prog_offset = struct.unpack('>L', infile.read(4))[0]
print("FCode program offset: %X" % (prog_offset + pos))
infile.seek(opts.of_offset + prog_offset + pos)
prog_stream, prog_size = get_fcode_prog(infile)
detokenizer = DeTokenizer(prog_stream, prog_size)
populate_user_dict(dict, detokenizer.user_dict)
detokenizer.decode_stream()
print("\nDetokenizing device packages...")
print("-------------------------------\n")
infile.seek(opts.of_offset + pos + 0x40)
pkg_offset = struct.unpack('>L', infile.read(4))[0] + pos + opts.of_offset
print("Last OF device package offset: %X" % (pkg_offset))
prev_pkg_offset = pkg_offset
while True:
print("\n")
infile.seek(pkg_offset)
next_pkg_offset, hdr_size = decode_package_header(infile)
prog_stream, prog_size = get_fcode_prog(infile)
if prog_size == 0:
prog_size = prev_pkg_offset - pkg_offset - hdr_size
#print("Headerless FCode program size: %X" % prog_size)
#print("File pos: %X" % infile.tell())
prog_stream = infile.read(prog_size)
print("\nDetokenizing package at offset %X...\n" % pkg_offset)
detokenizer.reinit(prog_stream, prog_size)
detokenizer.decode_stream()
# navigate to previous package or exit if there is no more packages
if next_pkg_offset == 0:
break
prev_pkg_offset = pkg_offset
pkg_offset = (pkg_offset + next_pkg_offset) & 0xFFFFFFFF
if __name__ == '__main__':
main()

118
extractdict.py Normal file
View File

@ -0,0 +1,118 @@
'''
Dictionary extraction script for Apple OpenFirmware.
Author: Max Poliakovski 2019-2021
'''
import os
import struct
from argparse import ArgumentParser
class OFWordHeader:
def __init__(self, infile, pos):
infile.seek(pos)
# get common fields
hdr = struct.unpack('>iBBH', infile.read(8))
self.prev = hdr[0]
self.flags = hdr[1]
self.type = hdr[2]
self.tok_num = hdr[3]
if self.flags & 0x20: # bit 5 means nameless word
# generate artificial name
self.name = 'unnamed_' + format(self.tok_num, 'x')
else:
len = struct.unpack('B', infile.read(1))[0]
self.name = struct.unpack('%ds' % len,
infile.read(len))[0].decode('utf-8')
self.pos = pos
def parse_coff_container(infile, cont_offset):
infile.seek(cont_offset)
# read COFF header
coff_hdr = struct.unpack('>HHL', infile.read(8))
n_sections = coff_hdr[1]
# COFF magic and at least one section are required
if coff_hdr[0] != 0x1DF or n_sections < 1:
print("No valid COFF header found at offset %X" % cont_offset)
return (0, 0)
if coff_hdr[2] == 0x47617279:
print("Detected Macintosh OldWorld OF binary...")
infile.seek(cont_offset + 20) # rewind to sections array
# search for executable code section
for sect in range(n_sections):
sect_desc = struct.unpack('>8sLLLLLLHHL', infile.read(40))
sect_name = sect_desc[0].decode('utf-8').strip('\x00')
if sect_name == '.text':
return (sect_desc[4], sect_desc[3])
return (0, 0)
def scan_forth_dict(infile, pos, end_pos):
# try offset at code_section[0x48] that usually points
# to the header of the last word (cold-load)
infile.seek(pos + 0x48)
dict_last_offset = struct.unpack('>L', infile.read(4))[0]
if (dict_last_offset + 20) >= end_pos:
return 0
word = OFWordHeader(infile, dict_last_offset + pos)
if word.name == 'cold-load':
print("cold-load found at offset %X" % word.pos)
else:
print('Scanning for cold-load not implemented yet')
return 0
print('\n')
forth_dict = {}
word_pos = dict_last_offset + pos
while 1:
forth_dict[word.tok_num] = {'name' : word.name, 'type' : word.type, 'pos' : word.pos}
if word.prev >= 0:
return forth_dict
word_pos += word.prev
del word
word = OFWordHeader(infile, word_pos)
def print_dict(dict):
for tok_num, word in dict.items():
print("Word: %04X, name: %s, type: %02X, offset = %08X" % (tok_num, word['name'], word['type'], word['pos']))
def main():
parser = ArgumentParser()
parser.add_argument('--rom_path', type=str,
dest='rom_path',
help='path to ROM file to process',
metavar='ROM_PATH', required=True)
parser.add_argument('--offset', type=lambda x: int(x,0),
dest='of_offset',
help='offset to OF container (autodetect attempt if omitted)',
metavar='OF_OFFSET', required=True)
opts = parser.parse_args()
with open(opts.rom_path, 'rb') as infile:
pos, size = parse_coff_container(infile, opts.of_offset);
if size == 0:
print("No valid OF binary found at offset %X" % opts.of_offset)
exit(1)
dict = scan_forth_dict(infile, opts.of_offset + pos, pos + size)
print_dict(dict)
if __name__ == '__main__':
main()