tcc-65816/816-opt.py

#!/usr/bin/python

import sys
import re
import os

#import hotshot
#prof = hotshot.Profile('816-opt.prof')
#prof.start()

verbose = True
if os.getenv('OPT816_QUIET'): verbose = False

# open the assembler file and put lines in array text
text_raw = open(sys.argv[1],'r').readlines()
text = []
for l in text_raw:
  if not l.startswith(';'): text += [l.strip()]

# find .bss section symbols
bss = []
bsson = False
for l in text:
  if l == '.ramsection ".bss" bank $7e slot 2':
    bsson = True
    continue
  if l == '.ends':
    bsson = False
  if bsson:
    bss += [l.split(' ')[0]]
#print 'bss',bss

# checks if the line alters the control flow
def is_control(line):
  if len(line) > 0 and line[0] in 'jb+-' or line.endswith(':'): return True
  return False

def changes_accu(line):
  if (line[2] == 'a' and not line[:3] in ['pha','sta']) or (len(line) == 5 and line.endswith(' a')): return True
  else: return False

totalopt = 0	# total number of optimizations performed
opted = -1	# have we optimized in this pass?
opass = 0	# optimization pass counter
storetopseudo = re.compile('st([axyz]).b tcc__([rf][0-9]*h?)$')
storexytopseudo = re.compile('st([xy]).b tcc__([rf][0-9]*h?)$')
storeatopseudo = re.compile('sta.b tcc__([rf][0-9]*h?)$')
while opted:
  opass += 1
  if verbose: sys.stderr.write('optimization pass ' + str(opass) + ': ')
  opted = 0	# no optimizations performed
  text_opt = [] # optimized code array, will be filled in during this pass
  i = 0
  while i < len(text):
    if text[i].startswith('st'):
      # stores (accu/x/y/zero) to pseudo-registers
      r = storetopseudo.match(text[i])
      if r:
        # eliminate redundant stores
        doopt = False
        for j in range(i+1, min(len(text),i+30)):
          r1 = re.match('st([axyz]).b tcc__' + r.groups()[1] + '$', text[j])
          if r1:
            doopt = True	# another store to the same pregister
            break
          if text[j].startswith('jsr.l ') and not text[j].startswith('jsr.l tcc__'):
            doopt = True	# before function call (will be clobbered anyway)
            break
          # cases in which we don't pursue optimization further
          if is_control(text[j]) or ('tcc__' + r.groups()[1]) in text[j]: break # branch or other use of the preg
          if r.groups()[1].endswith('h') and ('[tcc__' + r.groups()[1].rstrip('h')) in text[j]: break # use as a pointer
        if doopt:
          i += 1 # skip redundant store
          opted += 1
          continue

      # stores (x/y) to pseudo-registers
      r = storexytopseudo.match(text[i])
      if r:
        # store hwreg to preg, push preg, function call -> push hwreg, function call
        if text[i+1] == 'pei (tcc__' + r.groups()[1] + ')' and text[i+2].startswith('jsr.l '):
          text_opt += ['ph' + r.groups()[0]]
          i += 2
          opted += 1
          continue
        # store hwreg to preg, push preg -> store hwreg to preg, push hwreg (shorter)
        if text[i+1] == 'pei (tcc__' + r.groups()[1] + ')':
          text_opt += [text[i]]
          text_opt += ['ph' + r.groups()[0]]
          i += 2
          opted += 1
          continue
        # store hwreg to preg, load hwreg from preg -> store hwreg to preg, transfer hwreg/hwreg (shorter)
        if text[i+1] == 'lda.b tcc__' + r.groups()[1] or text[i+1] == 'lda.b tcc__' + r.groups()[1] + " ; DON'T OPTIMIZE":
          text_opt += [text[i]]
          text_opt += ['t' + r.groups()[0] + 'a'] # FIXME: shouldn't this be marked as DON'T OPTIMIZE again?
          i += 2
          opted += 1
          continue

      # stores (accu only) to pseudo-registers
      r = storeatopseudo.match(text[i])
      if r:
        #sys.stderr.write('looking for lda.b tcc__r' + r.groups()[0] + ' in ' + text[i+1] + '\n')
        # store preg followed by load preg
        if text[i+1] == 'lda.b tcc__' + r.groups()[0]:
          #sys.stderr.write('found!\n')
          text_opt += [text[i]]	# keep store
          i += 2 # omit load
          opted += 1
          continue
        # store preg followed by load preg with ldx/ldy in between
        if (text[i+1].startswith('ldx') or text[i+1].startswith('ldy')) and text[i+2] == 'lda.b tcc__' + r.groups()[0]:
          text_opt += [text[i]]	# keep store
          text_opt += [text[i+1]]
          i += 3 # omit load
          opted += 1
          continue
        # store accu to preg, push preg, function call -> push accu, function call
        if text[i+1] == 'pei (tcc__' + r.groups()[0] + ')' and text[i+2].startswith('jsr.l '):
          text_opt += ['pha']
          i += 2
          opted += 1
          continue
        # store accu to preg, push preg -> store accu to preg, push accu (shorter)
        if text[i+1] == 'pei (tcc__' + r.groups()[0] + ')':
          text_opt += [text[i]]
          text_opt += ['pha']
          i += 2
          opted += 1
          continue
        # store accu to preg1, push preg2, push preg1 -> store accu to preg1, push preg2, push accu
        elif text[i+1].startswith('pei ') and text[i+2] == 'pei (tcc__' + r.groups()[0] + ')':
          text_opt += [text[i+1]]
          text_opt += [text[i]]
          text_opt += ['pha']
          i += 3
          opted += 1
          continue

        # convert incs/decs on pregs incs/decs on hwregs
        cont = False
        for crem in 'inc','dec':
          if text[i+1] == crem + '.b tcc__' + r.groups()[0]:
            # store to preg followed by crement on preg
            if text[i+2] == crem + '.b tcc__' + r.groups()[0] and text[i+3].startswith('lda'):
                # store to preg followed by two crements on preg
                # increment the accu first, then store it to preg
                text_opt += [crem + ' a',crem + ' a','sta.b tcc__' + r.groups()[0]]
                # a subsequent load can be omitted (the right value is already in the accu)
                if text[i+3] == 'lda.b tcc__' + r.groups()[0]: i += 4
                else: i += 3
                opted += 1
                cont = True
                break
            elif text[i+2].startswith('lda'): #text[i+2] == 'lda.b tcc__' + r.groups()[0]:
              # same thing with only one crement (FIXME: there should be a more clever way to do this...)
              text_opt += [crem + ' a','sta.b tcc__' + r.groups()[0]]
              if text[i+2] == 'lda.b tcc__' + r.groups()[0]: i += 3
              else: i += 2
              opted += 1
              cont = True
              break
        if cont: continue

        r1 = re.match('lda.b tcc__([rf][0-9]*)',text[i+1])
        if r1:
          #sys.stderr.write('t '+text[i+2][:3]+'\n')
          if text[i+2][:3] in ['and','ora']:
            # store to preg1, load from preg2, and/or preg1 -> store to preg1, and/or preg2
            #sys.stderr.write('found in line ' + str(i) + '!\n')
            if text[i+2][3:] == '.b tcc__' + r.groups()[0]:
              text_opt += [text[i]] # store
              text_opt += [text[i+2][:3] + '.b tcc__' + r1.groups()[0]]
              i += 3
              opted += 1
              continue

        # store to preg, switch to 8 bits, load from preg => skip the load
        if text[i+1] == 'sep #$20' and text[i+2] == 'lda.b tcc__' + r.groups()[0]:
          text_opt += [text[i]]
          text_opt += [text[i+1]]
          i += 3 # skip load
          opted += 1
          continue

        # two stores to preg without control flow or other uses of preg => skip first store
        if not is_control(text[i+1]) and not ('tcc__' + r.groups()[0]) in text[i+1]:
          if text[i+2] == text[i]:
            text_opt += [text[i+1]]
            text_opt += [text[i+2]]
            i += 3 # skip first store
            opted += 1
            continue

        # store hwreg to preg, load hwreg from preg -> store hwreg to preg, transfer hwreg/hwreg (shorter)
        r1 = re.match('ld([xy]).b tcc__' + r.groups()[0], text[i+1])
        if r1:
          text_opt += [text[i]]
          text_opt += ['ta' + r1.groups()[0]]
          i += 2
          opted += 1
          continue

        # store accu to preg then load accu from preg, with something in-between that does not alter
        # control flow or touch accu or preg => skip load
        if not (is_control(text[i+1]) or changes_accu(text[i+1]) or 'tcc__' + r.groups()[0] in text[i+1]):
          if text[i+2] == 'lda.b tcc__' + r.groups()[0]:
            text_opt += [text[i]]
            text_opt += [text[i+1]]
            i += 3 # skip load
            opted += 1
            continue

        # store preg1, clc, load preg2, add preg1 -> store preg1, clc, add preg2
        if text[i+1] == 'clc':
          r1 = re.match('lda.b tcc__(r[0-9]*)', text[i+2])
          if r1 and text[i+3] == 'adc.b tcc__' + r.groups()[0]:
            text_opt += [text[i]]
            text_opt += [text[i+1]]
            text_opt += ['adc.b tcc__' + r1.groups()[0]]
            i += 4 # skip load
            opted += 1
            continue

        # store accu to preg, asl preg => asl accu, store accu to preg
        # FIXME: is this safe? can we rely on code not making assumptions about the contents of the accu
        # after the shift?
        if text[i+1] == 'asl.b tcc__' + r.groups()[0]:
          text_opt += ['asl a']
          text_opt += [text[i]]
          i += 2
          opted += 1
          continue

      r = re.match('sta (.*),s$', text[i])
      if r:
        if text[i+1] == 'lda ' + r.groups()[0] + ',s':
          text_opt += [text[i]]
          i += 2 # omit load
          opted += 1
          continue
    # end startswith('st')

    if text[i].startswith('ld'):
      r = re.match('ldx #0', text[i])
      if r:
        r1 = re.match('lda.l (.*),x$', text[i+1])
        if r1 and not text[i+3].endswith(',x'):
          text_opt += ['lda.l ' + r1.groups()[0]]
          i += 2
          opted += 1
          continue
        elif r1:
          text_opt += ['lda.l ' + r1.groups()[0]]
          text_opt += [text[i+2]]
          text_opt += [text[i+3].replace(',x','')]
          i += 4
          opted += 1
          continue

      if text[i].startswith('lda.w #') and \
         text[i+1] == 'sta.b tcc__r9' and \
         text[i+2].startswith('lda.w #') and \
         text[i+3] == 'sta.b tcc__r9h' and \
         text[i+4] == 'sep #$20' and \
         text[i+5].startswith('lda.b ') and \
         text[i+6] == 'sta.b [tcc__r9]' and \
         text[i+7] == 'rep #$20':
        text_opt += ['sep #$20']
        text_opt += [text[i+5]]
        text_opt += ['sta.l ' + str(int(text[i+2][7:]) * 65536 + int(text[i][7:]))]
        text_opt += ['rep #$20']
        i += 8
        opted += 1
        #sys.stderr.write('7')
        continue

      if text[i] == 'lda.w #0':
        if text[i+1].startswith('sta.b ') and text[i+2].startswith('lda'):
          text_opt += [text[i+1].replace('sta.','stz.')]
          i += 2
          opted += 1
          continue
      elif text[i].startswith('lda.w #'):
        if text[i+1] == 'sep #$20' and text[i+2].startswith('sta ') and text[i+3] == 'rep #$20' and text[i+4].startswith('lda'):
          text_opt += ['sep #$20', text[i].replace('lda.w', 'lda.b'), text[i+2], text[i+3]]
          i += 4
          opted += 1
          continue

      if text[i].startswith('lda.b') and not is_control(text[i+1]) and not 'a' in text[i+1] and text[i+2].startswith('lda.b'):
        text_opt += [text[i+1],text[i+2]]
        i += 3
        opted += 1
        continue

      # don't write preg high back to stack if it hasn't been updated
      if text[i+1].endswith('h') and text[i+1].startswith('sta.b tcc__r') and text[i].startswith('lda ') and text[i].endswith(',s'):
        #sys.stderr.write('checking lines\n')
        #sys.stderr.write(text[i] + '\n' + text[i+1] + '\n')
        local = text[i][4:]
        reg = text[i+1][6:]
        # lda stack ; store high preg ; ... ; load high preg ; sta stack
        j = i + 2
        while j < len(text) - 2 and not is_control(text[j]) and not reg in text[j]:
          j += 1
        if text[j] == 'lda.b ' + reg and text[j+1] == 'sta ' + local:
          while i < j:
            text_opt += [text[i]]
            i += 1
          i += 2 # skip load high preg ; sta stack
          opted += 1
          continue

      # reorder copying of 32-bit value to preg if it looks as if that could
      # allow further optimization
      # looking for
      #   lda something
      #   sta.b tcc_rX
      #   lda something
      #   sta.b tcc_rYh
      #   ...tcc_rX...
      if text[i].startswith('lda') and text[i+1].startswith('sta.b tcc__r'):
        reg = text[i+1][6:]
        if not reg.endswith('h') and \
           text[i+2].startswith('lda') and not text[i+2].endswith(reg) and \
           text[i+3].startswith('sta.b tcc__r') and text[i+3].endswith('h') and \
           text[i+4].endswith(reg):
          text_opt += [text[i+2], text[i+3]]
          text_opt += [text[i], text[i+1]]
          i += 4
          # this is not an optimization per se, so we don't count it
          continue

      # compare optimizations inspired by optimore
      # These opts simplify compare operations, which are monstrous because
      # they have to take the long long case into account.
      # We try to detect those cases by checking if a tya follows the
      # comparison (not sure if this is reliable, but it passes the test suite)
      if text[i] == 'ldx #1' and \
         text[i+1].startswith('lda.b tcc__') and \
         text[i+2] == 'sec' and \
         text[i+3].startswith('sbc #') and \
         text[i+4] == 'tay' and \
         text[i+5] == 'beq +' and \
         text[i+6] == 'dex' and \
         text[i+7] == '+' and \
         text[i+8].startswith('stx.b tcc__') and \
         text[i+9] == 'txa' and \
         text[i+10] == 'bne +' and \
         text[i+11].startswith('brl ') and \
         text[i+12] == '+' and \
         text[i+13] != 'tya':
        text_opt += [text[i+1]]
        text_opt += ['cmp #' + text[i+3][5:]]
        text_opt += [text[i+5]]
        text_opt += [text[i+11]]	# brl
        text_opt += [text[i+12]]	# +
        i += 13
        opted += 1
        #sys.stderr.write('1')
        continue

      if text[i] == 'ldx #1' and \
         text[i+1] == 'sec' and \
         text[i+2].startswith('sbc #') and \
         text[i+3] == 'tay' and \
         text[i+4] == 'beq +' and \
         text[i+5] == 'dex' and \
         text[i+6] == '+' and \
         text[i+7].startswith('stx.b tcc__') and \
         text[i+8] == 'txa' and \
         text[i+9] == 'bne +' and \
         text[i+10].startswith('brl ') and \
         text[i+11] == '+' and \
         text[i+12] != 'tya':
        text_opt += ['cmp #' + text[i+2][5:]]
        text_opt += [text[i+4]]
        text_opt += [text[i+10]]	# brl
        text_opt += [text[i+11]]	# +
        i += 12
        opted += 1
        #sys.stderr.write('2')
        continue

      if text[i] == 'ldx #1' and \
         text[i+1].startswith('lda.b tcc__r') and \
         text[i+2] == 'sec' and \
         text[i+3].startswith('sbc.b tcc__r') and \
         text[i+4] == 'tay' and \
         text[i+5] == 'beq +' and \
         text[i+6] == 'bcs ++' and \
         text[i+7] == '+ dex' and \
         text[i+8] == '++' and \
         text[i+9].startswith('stx.b tcc__r') and \
         text[i+10] == 'txa' and \
         text[i+11] == 'bne +' and \
         text[i+12].startswith('brl ') and \
         text[i+13] == '+' and \
         text[i+14] != 'tya':
        text_opt += [text[i+1]]
        text_opt += ['cmp.b ' + text[i+3][6:]]
        text_opt += [text[i+5]]
        text_opt += ['bcc +']
        text_opt += ['brl ++']
        text_opt += ['+']
        text_opt += [text[i+12]]
        text_opt += ['++']
        i += 14
        opted += 1
        #sys.stderr.write('3')
        continue

      if text[i] == 'ldx #1' and \
         text[i+1] == 'sec' and \
         text[i+2].startswith('sbc.w #') and \
         text[i+3] == 'tay' and \
         text[i+4] == 'bvc +' and \
         text[i+5] == 'eor #$8000' and \
         text[i+6] == '+' and \
         text[i+7] == 'bmi +++' and \
         text[i+8] == '++' and \
         text[i+9] == 'dex' and \
         text[i+10] == '+++' and \
         text[i+11].startswith('stx.b tcc__r') and \
         text[i+12] == 'txa' and \
         text[i+13] == 'bne +' and \
         text[i+14].startswith('brl ') and \
         text[i+15] == '+' and \
         text[i+16] != 'tya':
        text_opt += [text[i+1]]
        text_opt += [text[i+2]]
        text_opt += [text[i+4]]
        text_opt += ['eor #$8000']
        text_opt += ['+']
        text_opt += ['bmi +']
        text_opt += [text[i+14]]
        text_opt += ['+']
        i += 16
        opted += 1
        #sys.stderr.write('4')
        continue

      if text[i] == 'ldx #1' and \
         text[i+1].startswith('lda.b tcc__r') and \
         text[i+2] == 'sec' and \
         text[i+3].startswith('sbc.b tcc__r') and \
         text[i+4] == 'tay' and \
         text[i+5] == 'bvc +' and \
         text[i+6] == 'eor #$8000' and \
         text[i+7] == '+' and \
         text[i+8] == 'bmi +++' and \
         text[i+9] == '++' and \
         text[i+10] == 'dex' and \
         text[i+11] == '+++' and \
         text[i+12].startswith('stx.b tcc__r') and \
         text[i+13] == 'txa' and \
         text[i+14] == 'bne +' and \
         text[i+15].startswith('brl ') and \
         text[i+16] == '+' and \
         text[i+17] != 'tya':
        text_opt += [text[i+1]]
        text_opt += [text[i+2]]
        text_opt += [text[i+3]]
        text_opt += [text[i+5]]
        text_opt += [text[i+6]]
        text_opt += ['+']
        text_opt += ['bmi +']
        text_opt += [text[i+15]]
        text_opt += ['+']
        i += 17
        opted += 1
        #sys.stderr.write('5')
        continue

      if text[i] == 'ldx #1' and \
         text[i+1] == 'sec' and \
         text[i+2].startswith('sbc.b tcc__r') and \
         text[i+3] == 'tay' and \
         text[i+4] == 'bvc +' and \
         text[i+5] == 'eor #$8000' and \
         text[i+6] == '+' and \
         text[i+7] == 'bmi +++' and \
         text[i+8] == '++' and \
         text[i+9] == 'dex' and \
         text[i+10] == '+++' and \
         text[i+11].startswith('stx.b tcc__r') and \
         text[i+12] == 'txa' and \
         text[i+13] == 'bne +' and \
         text[i+14].startswith('brl ') and \
         text[i+15] == '+' and \
         text[i+16] != 'tya':
        text_opt += [text[i+1]]
        text_opt += [text[i+2]]
        text_opt += [text[i+4]]
        text_opt += [text[i+5]]
        text_opt += ['+']
        text_opt += ['bmi +']
        text_opt += [text[i+14]]
        text_opt += ['+']
        i += 16
        opted += 1
        #sys.stderr.write('6')
        continue

    # end startswith('ld')

    if text[i] == 'rep #$20' and text[i+1] == 'sep #$20':
      i += 2
      opted += 1
      continue

    if text[i] == 'sep #$20' and text[i+1].startswith('lda #') and text[i+2] == 'pha' and text[i+3].startswith('lda #') and text[i+4] == 'pha':
      text_opt += ['pea.w (' + text[i+1].split('#')[1] + ' * 256 + ' + text[i+3].split('#')[1] + ')']
      text_opt += [text[i]]
      i += 5
      opted += 1
      continue

    r = re.match('adc #(.*)$',text[i])
    if r:
      r1 = re.match('sta.b (tcc__[fr][0-9]*)$', text[i+1])
      if r1:
        if text[i+2] == 'inc.b ' + r1.groups()[0] and text[i+3] == 'inc.b ' + r1.groups()[0]:
          text_opt += ['adc #' + r.groups()[0] + ' + 2']
          text_opt += [text[i+1]]
          i += 4
          opted += 1
          continue

    if text[i][:6] in ['lda.l ','sta.l ']:
      cont = False
      for b in bss:
        if text[i][2:].startswith('a.l ' + b + ' '):
          text_opt += [text[i].replace('lda.l','lda.w').replace('sta.l','sta.w')]
          i += 1
          opted += 1
          cont = True
          break
      if cont: continue

    if text[i].startswith('jmp.w ') or text[i].startswith('bra __'):
      j = i + 1
      cont = False
      while j < len(text) and text[j].endswith(':'):
        if text[i].endswith(text[j][:-1]):
          # redundant branch, discard it
          i += 1
          opted += 1
          cont = True
          break
        j += 1
      if cont: continue

    if text[i].startswith('jmp.w '):
      # worst case is a 4-byte instruction, so if the jump target is closer
      # than 32 instructions, we can safely substitute a branch
      label = text[i][6:] + ':'
      cont = False
      for lpos in range(max(0, i - 32), min(len(text), i + 32)):
        if text[lpos] == label:
          text_opt += [text[i].replace('jmp.w','bra')]
          i += 1
          opted += 1
          cont = True
          break
      if cont: continue

    text_opt += [text[i]]
    i += 1
  text = text_opt
  if verbose: sys.stderr.write(str(opted) + ' optimizations performed\n')
  totalopt += opted

for l in text_opt: print l
if verbose: sys.stderr.write(str(totalopt) + ' optimizations performed in total\n')

#prof.stop()