powermac-rom/pdp.py

#!/usr/bin/env python3

def label(x):
    for l in x.split():
        print(l)

def directive(x, *args):
    argstr = ', '.join(str(x) for x in args)
    if argstr: argstr = '\t' + argstr
    if argstr and len(x) < 4: argstr = '\t' + argstr
    print('\t\t' + x + argstr)

def equ(k, v):
    print(str(k) + '\tequ\t' + str(v))

def cmt(*args):
    print(';', *args)

def r(x):
    return 'r' + str(x)

def v(x):
    return 'v' + str(x)

def f(x):
    return 'f' + str(x)

def normlshift(x):
    while x < 0: x += 32
    while x > 31: x -= 32
    return x


def sequence_aligned_loadstores(n, ralign):
    """
    How do I load/store n bytes, with their RHS aligned to an 8-boundary
    modulo ralign, while using only naturally aligned instructions?

    Return a string like this: '8', '4', '22', '121'
    """
    # (assumes some cheeky cleverness!)
    remaining = n
    s = ''
    while remaining:
        ofs = ralign - remaining
        if ofs & 1:
            g = 1
        elif ofs & 2:
            g = 2
        elif ofs & 4:
            g = 4
        else:
            g = 8
        while g > remaining:
            g >>= 1
        s += str(g)
        remaining -= g
    return s


def permutations_of_aligned_loadstores():
    """
    Get a set containing every permutation of sequence_aligned_loadstores for
    n of 1, 2, 3 ... 8 and ralign of 0, 1, 2 ... 7.
    """
    x = set()
    for length in range(1,9):
        for rhsalign in range(8):
            x.add(sequence_aligned_loadstores(length, rhsalign))
    return x

PERMUTATIONS_OF_ALIGNED_LOADSTORES = permutations_of_aligned_loadstores()


def list_perms_ending_with(x):
    """
    Recursive function tuned to help with final_loadstore_list
    """
    yield x
    subset = set()
    for e in PERMUTATIONS_OF_ALIGNED_LOADSTORES:
        if e.endswith(x) and len(e) > len(x):
            subset.add(e[-len(x)-1])
    for nextlet in sorted(subset):
        yield from list_perms_ending_with(nextlet + x)


def final_load_list():
    """
    Big waterfall of loads!
    """
    the_list = []

    for ender in '8421':
        for x in list_perms_ending_with(ender):
            if x not in ('2','22'):
                the_list.append(x)

    return list(reversed(the_list))

FINAL_LOAD_LIST = final_load_list()

def final_store_list():
    """
    Big waterfall of stores!
    """
    the_list = []

    for ender in '8421':
        for x in list_perms_ending_with(ender):
            if x not in []:
                the_list.append(x)

    return list(reversed(the_list))

FINAL_STORE_LIST = final_store_list()


################################################################
# serious-er part of file. codegen functions only!
################################################################

def MRAlignDispatchTable():
    """
    Going through this dispatch table (which an int handler does) is the
    only route to access MRAlignLoads
    """
    directive('align', 10)
    label('MRAlignDispatchTable')

    hwtab_sizes = ['vector', 1, 2, 3, 4, 5, 6, 7, 8]

    for howlong in hwtab_sizes:
        cmt(howlong, 'stores/loads')
        for ldst in 'sl':
            for ralign in range(8):
                if howlong == 'vector':
                    if ldst == 's':
                        target = 'MRStoreVector'
                    elif ldst == 'l':
                        target = 'MRLoadVector'

                else: # integer load/store
                    if ldst == 's':
                        target = 'MRStore'
                    elif ldst == 'l':
                        target = 'MRLoad'

                    target += sequence_aligned_loadstores(howlong, ralign)

                directive('dc.w', '%s - FDP - (* - MRAlignDispatchTable)' % target)


# The table at the very end of the FDP, full of vector instructions!
# called from FDP_0554, which itself comes from the halfwit table, which seems to serve major_0x02ccc
def MRVectorAlignDispatchTable():
    pairs = [
        ('lvx',    'MRExecuted'),
        ('lvebx',  'FDP_0DA0'),
        ('lvehx',  'FDP_0DA0'),
        ('lvewx',  'FDP_0DA0'),
        ('stvx',   'MRExecuted'),
        ('stvebx', 'FDP_104C'),
        ('stvehx', 'FDP_1058'),
        ('stvewx', 'FDP_1064'),
    ]

    for firstinst_opcode, secondinst_dest in pairs:
        label(firstint_opcode.upper()+'Array')
        for i in range(32):
            directive(firstinst_opcode, v(i), 0, 'r23')
            directive('b', secondinst_dest)


def MRAlignLoads():
    waterfall = FINAL_LOAD_LIST

    for wi in range(len(waterfall)):
        sizes = waterfall[wi]

        label('MRLoad' + sizes)

        sizes_as_list = [int(x) for x in sizes]
        this_size = sizes_as_list[0]
        total_size = sum(sizes_as_list)
        remain_size = sum(sizes_as_list[1:])


        # PART 1: load a number of bytes equal to the first element in "sizes"

        if sizes == '8': # special case
            directive('lwz', 'mrLow', '-8(mrBase)')
            directive('lwz', 'mrHigh', '-4(mrBase)')

        elif sizes == '44': # special case: no need for scratch register
            directive('lwz', 'mrHigh', '-%d(mrBase)' % total_size)
            directive('subi', 'mrCtr', 'mrCtr', 2 * this_size)

        elif sizes == '4': # special case: emulate lwarx if asked
            directive('bc', 'BO_IF', 23, '@atomic')
            directive('lwz', 'mrLow', '-4(mrBase)')
            directive('b', 'MRExecuted')
            label('@atomic')
            directive('li', 'mrScratch', -4)
            directive('lwarx', 'mrScratch', 'mrBase')

        else: # use an intermediate scratch register then bit-hack onwards
            inst = {1: 'lbz', 2: 'lhz', 4: 'lwz'}[this_size]

            directive(inst, 'mrScratch', '-%d(mrBase)' % total_size)
            if len(sizes) > 1: directive('subi', 'mrCtr', 'mrCtr', 2 * this_size)

            for regexponent, regname in [(4,'mrHigh'), (0,'mrLow')]:
                thisexponent = remain_size
                if regexponent >= thisexponent + this_size: continue
                if thisexponent >= regexponent + 4: continue

                lshift = (regexponent - thisexponent) * 8

                mask = 0
                for i in range(thisexponent, thisexponent + this_size):
                    i -= regexponent
                    if not 0 <= i < 4: continue
                    mask |= 0xFF << (8 * i)

                directive('rlwimi', regname, 'mrScratch', normlshift(lshift), '0x%08X' % mask)


        # PART 2: jump somewhere that will do the rest of the loads in "sizes"

        if sizes[1:] == '4': # special case: inline an lwz instead of jumping to 'MRLoad4'
            directive('lwz', 'mrLow', '-4(mrBase)')
            directive('b', 'MRExecuted')

        elif remain_size == 0: # finished executing
            directive('b', 'MRExecuted')

        elif wi + 1 < len(waterfall) and waterfall[wi+1] == sizes[1:]: # fall through
            pass

        else:
            directive('b', 'MRLoad' + sizes[1:])

        print()

def MRAlignStores():
    waterfall = FINAL_STORE_LIST

    for wi in range(len(waterfall)):
        sizes = waterfall[wi]

        label('MRStore' + sizes)

        sizes_as_list = [int(x) for x in sizes]
        this_size = sizes_as_list[0]
        total_size = sum(sizes_as_list)
        remain_size = sum(sizes_as_list[1:])


        # PART 1: load a number of bytes equal to the first element in "sizes"

        inst = {1: 'stb', 2: 'sth', 4: 'stw', 8: 'no way'}[this_size]

        if sizes == '8': # special case
            directive('stw', 'mrLow', '-8(mrBase)')
            directive('stw', 'mrHigh', '-4(mrBase)')

        elif remain_size == 4: # straight store!
            directive(inst, 'mrHigh', '-%d(mrBase)' % total_size)
            if len(sizes) > 1: directive('subi', 'mrCtr', 'mrCtr', 2 * this_size)

        elif sizes == '4': # special case: emulate lwarx if asked
            directive('bc', 'BO_IF', 23, '@atomic')
            directive('stw', 'mrLow', '-4(mrBase)')
            directive('b', 'MRExecuted')
            label('@atomic')
            directive('li', 'mrScratch', -4)
            directive('stwcx.', 'mrScratch', 'mrBase')
            directive('isync')
            directive('mfcr', 'mrScratch')
            directive('rlwimi', 'r13', 'mrScratch', 0, '0xFF000000')

        else: # arrange intermediate register then store it
            fiddler = 'rlwinm'

            for regexponent, regname in [(0,'mrLow'), (4,'mrHigh')]:
                thisexponent = remain_size
                if regexponent >= thisexponent + this_size: continue
                if thisexponent >= regexponent + 4: continue

                lshift = (regexponent - thisexponent) * 8

                mask = (1 << (8 * this_size)) - 1

                directive(fiddler, 'mrScratch', regname, normlshift(lshift), '0x%08X' % mask)
                fiddler = 'rlwimi'

            directive(inst, 'mrScratch', '-%d(mrBase)' % total_size)
            if len(sizes) > 1: directive('subi', 'mrCtr', 'mrCtr', 2 * this_size)


        # PART 2: jump somewhere that will do the rest of the loads in "sizes"

        if wi + 1 < len(waterfall) and waterfall[wi+1] == sizes[1:] and sizes[1:] != '4': # fall through
            # but beware the special case!
            pass

        elif len(sizes[1:]) == 1: # special case: inline a single store instead of jumping
            inst = {1: 'stb', 2: 'sth', 4: 'stw', 8: 'no way'}[remain_size]
            directive(inst, 'mrLow', '-%d(mrBase)' % remain_size)
            directive('b', 'MRExecuted')

        elif remain_size == 0: # finished executing
            directive('b', 'MRExecuted')

        else:
            directive('b', 'MRStore' + sizes[1:])

        print()


MRAlignStores()