#!/usr/bin/env python3 def label(x): for l in x.split(): print(l) def directive(x, *args): argstr = ', '.join(str(x) for x in args) if argstr: argstr = '\t' + argstr if argstr and len(x) < 4: argstr = '\t' + argstr print('\t\t' + x + argstr) def equ(k, v): print(str(k) + '\tequ\t' + str(v)) def cmt(*args): print(';', *args) def r(x): return 'r' + str(x) def v(x): return 'v' + str(x) def f(x): return 'f' + str(x) def normlshift(x): while x < 0: x += 32 while x > 31: x -= 32 return x def sequence_aligned_loadstores(n, ralign): """ How do I load/store n bytes, with their RHS aligned to an 8-boundary modulo ralign, while using only naturally aligned instructions? Return a string like this: '8', '4', '22', '121' """ # (assumes some cheeky cleverness!) remaining = n s = '' while remaining: ofs = ralign - remaining if ofs & 1: g = 1 elif ofs & 2: g = 2 elif ofs & 4: g = 4 else: g = 8 while g > remaining: g >>= 1 s += str(g) remaining -= g return s def permutations_of_aligned_loadstores(): """ Get a set containing every permutation of sequence_aligned_loadstores for n of 1, 2, 3 ... 8 and ralign of 0, 1, 2 ... 7. """ x = set() for length in range(1,9): for rhsalign in range(8): x.add(sequence_aligned_loadstores(length, rhsalign)) return x PERMUTATIONS_OF_ALIGNED_LOADSTORES = permutations_of_aligned_loadstores() def list_perms_ending_with(x): """ Recursive function tuned to help with final_loadstore_list """ yield x subset = set() for e in PERMUTATIONS_OF_ALIGNED_LOADSTORES: if e.endswith(x) and len(e) > len(x): subset.add(e[-len(x)-1]) for nextlet in sorted(subset): yield from list_perms_ending_with(nextlet + x) def final_load_list(): """ Big waterfall of loads! """ the_list = [] for ender in '8421': for x in list_perms_ending_with(ender): if x not in ('2','22'): the_list.append(x) return list(reversed(the_list)) FINAL_LOAD_LIST = final_load_list() def final_store_list(): """ Big waterfall of stores! """ the_list = [] for ender in '8421': for x in list_perms_ending_with(ender): if x not in []: the_list.append(x) return list(reversed(the_list)) FINAL_STORE_LIST = final_store_list() ################################################################ # serious-er part of file. codegen functions only! ################################################################ def MRAlignDispatchTable(): """ Going through this dispatch table (which an int handler does) is the only route to access MRAlignLoads """ directive('align', 10) label('MRAlignDispatchTable') hwtab_sizes = ['vector', 1, 2, 3, 4, 5, 6, 7, 8] for howlong in hwtab_sizes: cmt(howlong, 'stores/loads') for ldst in 'sl': for ralign in range(8): if howlong == 'vector': if ldst == 's': target = 'MRStoreVector' elif ldst == 'l': target = 'MRLoadVector' else: # integer load/store if ldst == 's': target = 'MRStore' elif ldst == 'l': target = 'MRLoad' target += sequence_aligned_loadstores(howlong, ralign) directive('dc.w', '%s - FDP - (* - MRAlignDispatchTable)' % target) # The table at the very end of the FDP, full of vector instructions! # called from FDP_0554, which itself comes from the halfwit table, which seems to serve major_0x02ccc def MRVectorAlignDispatchTable(): pairs = [ ('lvx', 'MRExecuted'), ('lvebx', 'FDP_0DA0'), ('lvehx', 'FDP_0DA0'), ('lvewx', 'FDP_0DA0'), ('stvx', 'MRExecuted'), ('stvebx', 'FDP_104C'), ('stvehx', 'FDP_1058'), ('stvewx', 'FDP_1064'), ] for firstinst_opcode, secondinst_dest in pairs: label(firstint_opcode.upper()+'Array') for i in range(32): directive(firstinst_opcode, v(i), 0, 'r23') directive('b', secondinst_dest) def MRAlignLoads(): waterfall = FINAL_LOAD_LIST for wi in range(len(waterfall)): sizes = waterfall[wi] label('MRLoad' + sizes) sizes_as_list = [int(x) for x in sizes] this_size = sizes_as_list[0] total_size = sum(sizes_as_list) remain_size = sum(sizes_as_list[1:]) # PART 1: load a number of bytes equal to the first element in "sizes" if sizes == '8': # special case directive('lwz', 'mrLow', '-8(mrBase)') directive('lwz', 'mrHigh', '-4(mrBase)') elif sizes == '44': # special case: no need for scratch register directive('lwz', 'mrHigh', '-%d(mrBase)' % total_size) directive('subi', 'mrCtr', 'mrCtr', 2 * this_size) elif sizes == '4': # special case: emulate lwarx if asked directive('bc', 'BO_IF', 23, '@atomic') directive('lwz', 'mrLow', '-4(mrBase)') directive('b', 'MRExecuted') label('@atomic') directive('li', 'mrScratch', -4) directive('lwarx', 'mrScratch', 'mrBase') else: # use an intermediate scratch register then bit-hack onwards inst = {1: 'lbz', 2: 'lhz', 4: 'lwz'}[this_size] directive(inst, 'mrScratch', '-%d(mrBase)' % total_size) if len(sizes) > 1: directive('subi', 'mrCtr', 'mrCtr', 2 * this_size) for regexponent, regname in [(4,'mrHigh'), (0,'mrLow')]: thisexponent = remain_size if regexponent >= thisexponent + this_size: continue if thisexponent >= regexponent + 4: continue lshift = (regexponent - thisexponent) * 8 mask = 0 for i in range(thisexponent, thisexponent + this_size): i -= regexponent if not 0 <= i < 4: continue mask |= 0xFF << (8 * i) directive('rlwimi', regname, 'mrScratch', normlshift(lshift), '0x%08X' % mask) # PART 2: jump somewhere that will do the rest of the loads in "sizes" if sizes[1:] == '4': # special case: inline an lwz instead of jumping to 'MRLoad4' directive('lwz', 'mrLow', '-4(mrBase)') directive('b', 'MRExecuted') elif remain_size == 0: # finished executing directive('b', 'MRExecuted') elif wi + 1 < len(waterfall) and waterfall[wi+1] == sizes[1:]: # fall through pass else: directive('b', 'MRLoad' + sizes[1:]) print() def MRAlignStores(): waterfall = FINAL_STORE_LIST for wi in range(len(waterfall)): sizes = waterfall[wi] label('MRStore' + sizes) sizes_as_list = [int(x) for x in sizes] this_size = sizes_as_list[0] total_size = sum(sizes_as_list) remain_size = sum(sizes_as_list[1:]) # PART 1: load a number of bytes equal to the first element in "sizes" inst = {1: 'stb', 2: 'sth', 4: 'stw', 8: 'no way'}[this_size] if sizes == '8': # special case directive('stw', 'mrLow', '-8(mrBase)') directive('stw', 'mrHigh', '-4(mrBase)') elif remain_size == 4: # straight store! directive(inst, 'mrHigh', '-%d(mrBase)' % total_size) if len(sizes) > 1: directive('subi', 'mrCtr', 'mrCtr', 2 * this_size) elif sizes == '4': # special case: emulate lwarx if asked directive('bc', 'BO_IF', 23, '@atomic') directive('stw', 'mrLow', '-4(mrBase)') directive('b', 'MRExecuted') label('@atomic') directive('li', 'mrScratch', -4) directive('stwcx.', 'mrScratch', 'mrBase') directive('isync') directive('mfcr', 'mrScratch') directive('rlwimi', 'r13', 'mrScratch', 0, '0xFF000000') else: # arrange intermediate register then store it fiddler = 'rlwinm' for regexponent, regname in [(0,'mrLow'), (4,'mrHigh')]: thisexponent = remain_size if regexponent >= thisexponent + this_size: continue if thisexponent >= regexponent + 4: continue lshift = (regexponent - thisexponent) * 8 mask = (1 << (8 * this_size)) - 1 directive(fiddler, 'mrScratch', regname, normlshift(lshift), '0x%08X' % mask) fiddler = 'rlwimi' directive(inst, 'mrScratch', '-%d(mrBase)' % total_size) if len(sizes) > 1: directive('subi', 'mrCtr', 'mrCtr', 2 * this_size) # PART 2: jump somewhere that will do the rest of the loads in "sizes" if wi + 1 < len(waterfall) and waterfall[wi+1] == sizes[1:] and sizes[1:] != '4': # fall through # but beware the special case! pass elif len(sizes[1:]) == 1: # special case: inline a single store instead of jumping inst = {1: 'stb', 2: 'sth', 4: 'stw', 8: 'no way'}[remain_size] directive(inst, 'mrLow', '-%d(mrBase)' % remain_size) directive('b', 'MRExecuted') elif remain_size == 0: # finished executing directive('b', 'MRExecuted') else: directive('b', 'MRStore' + sizes[1:]) print() MRAlignStores()