mirror of
https://github.com/elliotnunn/powermac-rom.git
synced 2024-05-28 22:41:35 +00:00
324 lines
9.5 KiB
Python
324 lines
9.5 KiB
Python
#!/usr/bin/env python3
|
|
|
|
def label(x):
|
|
for l in x.split():
|
|
print(l)
|
|
|
|
def directive(x, *args):
|
|
argstr = ', '.join(str(x) for x in args)
|
|
if argstr: argstr = '\t' + argstr
|
|
if argstr and len(x) < 4: argstr = '\t' + argstr
|
|
print('\t\t' + x + argstr)
|
|
|
|
def equ(k, v):
|
|
print(str(k) + '\tequ\t' + str(v))
|
|
|
|
def cmt(*args):
|
|
print(';', *args)
|
|
|
|
def r(x):
|
|
return 'r' + str(x)
|
|
|
|
def v(x):
|
|
return 'v' + str(x)
|
|
|
|
def f(x):
|
|
return 'f' + str(x)
|
|
|
|
def normlshift(x):
|
|
while x < 0: x += 32
|
|
while x > 31: x -= 32
|
|
return x
|
|
|
|
|
|
def sequence_aligned_loadstores(n, ralign):
|
|
"""
|
|
How do I load/store n bytes, with their RHS aligned to an 8-boundary
|
|
modulo ralign, while using only naturally aligned instructions?
|
|
|
|
Return a string like this: '8', '4', '22', '121'
|
|
"""
|
|
# (assumes some cheeky cleverness!)
|
|
remaining = n
|
|
s = ''
|
|
while remaining:
|
|
ofs = ralign - remaining
|
|
if ofs & 1:
|
|
g = 1
|
|
elif ofs & 2:
|
|
g = 2
|
|
elif ofs & 4:
|
|
g = 4
|
|
else:
|
|
g = 8
|
|
while g > remaining:
|
|
g >>= 1
|
|
s += str(g)
|
|
remaining -= g
|
|
return s
|
|
|
|
|
|
def permutations_of_aligned_loadstores():
|
|
"""
|
|
Get a set containing every permutation of sequence_aligned_loadstores for
|
|
n of 1, 2, 3 ... 8 and ralign of 0, 1, 2 ... 7.
|
|
"""
|
|
x = set()
|
|
for length in range(1,9):
|
|
for rhsalign in range(8):
|
|
x.add(sequence_aligned_loadstores(length, rhsalign))
|
|
return x
|
|
|
|
PERMUTATIONS_OF_ALIGNED_LOADSTORES = permutations_of_aligned_loadstores()
|
|
|
|
|
|
def list_perms_ending_with(x):
|
|
"""
|
|
Recursive function tuned to help with final_loadstore_list
|
|
"""
|
|
yield x
|
|
subset = set()
|
|
for e in PERMUTATIONS_OF_ALIGNED_LOADSTORES:
|
|
if e.endswith(x) and len(e) > len(x):
|
|
subset.add(e[-len(x)-1])
|
|
for nextlet in sorted(subset):
|
|
yield from list_perms_ending_with(nextlet + x)
|
|
|
|
|
|
def final_load_list():
|
|
"""
|
|
Big waterfall of loads!
|
|
"""
|
|
the_list = []
|
|
|
|
for ender in '8421':
|
|
for x in list_perms_ending_with(ender):
|
|
if x not in ('2','22'):
|
|
the_list.append(x)
|
|
|
|
return list(reversed(the_list))
|
|
|
|
FINAL_LOAD_LIST = final_load_list()
|
|
|
|
def final_store_list():
|
|
"""
|
|
Big waterfall of stores!
|
|
"""
|
|
the_list = []
|
|
|
|
for ender in '8421':
|
|
for x in list_perms_ending_with(ender):
|
|
if x not in []:
|
|
the_list.append(x)
|
|
|
|
return list(reversed(the_list))
|
|
|
|
FINAL_STORE_LIST = final_store_list()
|
|
|
|
|
|
################################################################
|
|
# serious-er part of file. codegen functions only!
|
|
################################################################
|
|
|
|
def MRAlignDispatchTable():
|
|
"""
|
|
Going through this dispatch table (which an int handler does) is the
|
|
only route to access MRAlignLoads
|
|
"""
|
|
directive('align', 10)
|
|
label('MRAlignDispatchTable')
|
|
|
|
hwtab_sizes = ['vector', 1, 2, 3, 4, 5, 6, 7, 8]
|
|
|
|
for howlong in hwtab_sizes:
|
|
cmt(howlong, 'stores/loads')
|
|
for ldst in 'sl':
|
|
for ralign in range(8):
|
|
if howlong == 'vector':
|
|
if ldst == 's':
|
|
target = 'MRStoreVector'
|
|
elif ldst == 'l':
|
|
target = 'MRLoadVector'
|
|
|
|
else: # integer load/store
|
|
if ldst == 's':
|
|
target = 'MRStore'
|
|
elif ldst == 'l':
|
|
target = 'MRLoad'
|
|
|
|
target += sequence_aligned_loadstores(howlong, ralign)
|
|
|
|
directive('dc.w', '%s - FDP - (* - MRAlignDispatchTable)' % target)
|
|
|
|
|
|
# The table at the very end of the FDP, full of vector instructions!
|
|
# called from FDP_0554, which itself comes from the halfwit table, which seems to serve major_0x02ccc
|
|
def MRVectorAlignDispatchTable():
|
|
pairs = [
|
|
('lvx', 'MRExecuted'),
|
|
('lvebx', 'FDP_0DA0'),
|
|
('lvehx', 'FDP_0DA0'),
|
|
('lvewx', 'FDP_0DA0'),
|
|
('stvx', 'MRExecuted'),
|
|
('stvebx', 'FDP_104C'),
|
|
('stvehx', 'FDP_1058'),
|
|
('stvewx', 'FDP_1064'),
|
|
]
|
|
|
|
for firstinst_opcode, secondinst_dest in pairs:
|
|
label(firstint_opcode.upper()+'Array')
|
|
for i in range(32):
|
|
directive(firstinst_opcode, v(i), 0, 'r23')
|
|
directive('b', secondinst_dest)
|
|
|
|
|
|
def MRAlignLoads():
|
|
waterfall = FINAL_LOAD_LIST
|
|
|
|
for wi in range(len(waterfall)):
|
|
sizes = waterfall[wi]
|
|
|
|
label('MRLoad' + sizes)
|
|
|
|
sizes_as_list = [int(x) for x in sizes]
|
|
this_size = sizes_as_list[0]
|
|
total_size = sum(sizes_as_list)
|
|
remain_size = sum(sizes_as_list[1:])
|
|
|
|
|
|
# PART 1: load a number of bytes equal to the first element in "sizes"
|
|
|
|
if sizes == '8': # special case
|
|
directive('lwz', 'mrLow', '-8(mrBase)')
|
|
directive('lwz', 'mrHigh', '-4(mrBase)')
|
|
|
|
elif sizes == '44': # special case: no need for scratch register
|
|
directive('lwz', 'mrHigh', '-%d(mrBase)' % total_size)
|
|
directive('subi', 'mrCtr', 'mrCtr', 2 * this_size)
|
|
|
|
elif sizes == '4': # special case: emulate lwarx if asked
|
|
directive('bc', 'BO_IF', 23, '@atomic')
|
|
directive('lwz', 'mrLow', '-4(mrBase)')
|
|
directive('b', 'MRExecuted')
|
|
label('@atomic')
|
|
directive('li', 'mrScratch', -4)
|
|
directive('lwarx', 'mrScratch', 'mrBase')
|
|
|
|
else: # use an intermediate scratch register then bit-hack onwards
|
|
inst = {1: 'lbz', 2: 'lhz', 4: 'lwz'}[this_size]
|
|
|
|
directive(inst, 'mrScratch', '-%d(mrBase)' % total_size)
|
|
if len(sizes) > 1: directive('subi', 'mrCtr', 'mrCtr', 2 * this_size)
|
|
|
|
for regexponent, regname in [(4,'mrHigh'), (0,'mrLow')]:
|
|
thisexponent = remain_size
|
|
if regexponent >= thisexponent + this_size: continue
|
|
if thisexponent >= regexponent + 4: continue
|
|
|
|
lshift = (regexponent - thisexponent) * 8
|
|
|
|
mask = 0
|
|
for i in range(thisexponent, thisexponent + this_size):
|
|
i -= regexponent
|
|
if not 0 <= i < 4: continue
|
|
mask |= 0xFF << (8 * i)
|
|
|
|
directive('rlwimi', regname, 'mrScratch', normlshift(lshift), '0x%08X' % mask)
|
|
|
|
|
|
# PART 2: jump somewhere that will do the rest of the loads in "sizes"
|
|
|
|
if sizes[1:] == '4': # special case: inline an lwz instead of jumping to 'MRLoad4'
|
|
directive('lwz', 'mrLow', '-4(mrBase)')
|
|
directive('b', 'MRExecuted')
|
|
|
|
elif remain_size == 0: # finished executing
|
|
directive('b', 'MRExecuted')
|
|
|
|
elif wi + 1 < len(waterfall) and waterfall[wi+1] == sizes[1:]: # fall through
|
|
pass
|
|
|
|
else:
|
|
directive('b', 'MRLoad' + sizes[1:])
|
|
|
|
print()
|
|
|
|
def MRAlignStores():
|
|
waterfall = FINAL_STORE_LIST
|
|
|
|
for wi in range(len(waterfall)):
|
|
sizes = waterfall[wi]
|
|
|
|
label('MRStore' + sizes)
|
|
|
|
sizes_as_list = [int(x) for x in sizes]
|
|
this_size = sizes_as_list[0]
|
|
total_size = sum(sizes_as_list)
|
|
remain_size = sum(sizes_as_list[1:])
|
|
|
|
|
|
# PART 1: load a number of bytes equal to the first element in "sizes"
|
|
|
|
inst = {1: 'stb', 2: 'sth', 4: 'stw', 8: 'no way'}[this_size]
|
|
|
|
if sizes == '8': # special case
|
|
directive('stw', 'mrLow', '-8(mrBase)')
|
|
directive('stw', 'mrHigh', '-4(mrBase)')
|
|
|
|
elif remain_size == 4: # straight store!
|
|
directive(inst, 'mrHigh', '-%d(mrBase)' % total_size)
|
|
if len(sizes) > 1: directive('subi', 'mrCtr', 'mrCtr', 2 * this_size)
|
|
|
|
elif sizes == '4': # special case: emulate lwarx if asked
|
|
directive('bc', 'BO_IF', 23, '@atomic')
|
|
directive('stw', 'mrLow', '-4(mrBase)')
|
|
directive('b', 'MRExecuted')
|
|
label('@atomic')
|
|
directive('li', 'mrScratch', -4)
|
|
directive('stwcx.', 'mrScratch', 'mrBase')
|
|
directive('isync')
|
|
directive('mfcr', 'mrScratch')
|
|
directive('rlwimi', 'r13', 'mrScratch', 0, '0xFF000000')
|
|
|
|
else: # arrange intermediate register then store it
|
|
fiddler = 'rlwinm'
|
|
|
|
for regexponent, regname in [(0,'mrLow'), (4,'mrHigh')]:
|
|
thisexponent = remain_size
|
|
if regexponent >= thisexponent + this_size: continue
|
|
if thisexponent >= regexponent + 4: continue
|
|
|
|
lshift = (regexponent - thisexponent) * 8
|
|
|
|
mask = (1 << (8 * this_size)) - 1
|
|
|
|
directive(fiddler, 'mrScratch', regname, normlshift(lshift), '0x%08X' % mask)
|
|
fiddler = 'rlwimi'
|
|
|
|
directive(inst, 'mrScratch', '-%d(mrBase)' % total_size)
|
|
if len(sizes) > 1: directive('subi', 'mrCtr', 'mrCtr', 2 * this_size)
|
|
|
|
|
|
# PART 2: jump somewhere that will do the rest of the loads in "sizes"
|
|
|
|
if wi + 1 < len(waterfall) and waterfall[wi+1] == sizes[1:] and sizes[1:] != '4': # fall through
|
|
# but beware the special case!
|
|
pass
|
|
|
|
elif len(sizes[1:]) == 1: # special case: inline a single store instead of jumping
|
|
inst = {1: 'stb', 2: 'sth', 4: 'stw', 8: 'no way'}[remain_size]
|
|
directive(inst, 'mrLow', '-%d(mrBase)' % remain_size)
|
|
directive('b', 'MRExecuted')
|
|
|
|
elif remain_size == 0: # finished executing
|
|
directive('b', 'MRExecuted')
|
|
|
|
else:
|
|
directive('b', 'MRStore' + sizes[1:])
|
|
|
|
print()
|
|
|
|
|
|
|
|
MRAlignStores()
|