TOC guesser: use reloc opcodes to ID real TVecs

Reloc table opcodes are used to filter out coincidental TVector-like
things.

This problem was identified while trying to debug all the XTOC glue that
didn't make sense. We also ignore this glue.
This commit is contained in:
Elliot Nunn 2019-10-18 13:51:53 +08:00
parent cf5af0f5f1
commit e8a3012b41
1 changed files with 16 additions and 3 deletions

View File

@ -484,6 +484,7 @@ def dump_lowlevel(basepath):
for i in range(runLength):
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('section', sectionC))); relocAddress += 4
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('section', sectionD))); relocAddress += 4
if 'code' in sectionC and 'data' in sectionD: relocations[-2]['likelytv'] = 1
relocAddress += 4
elif subopcode == 0b0011: # RelocTVector8
@ -491,6 +492,7 @@ def dump_lowlevel(basepath):
for i in range(runLength):
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('section', sectionC))); relocAddress += 4
relocations.append(dict(section=sectionIndex, offset=relocAddress, to=('section', sectionD))); relocAddress += 4
if 'code' in sectionC and 'data' in sectionD: relocations[-2]['likelytv'] = 1
elif subopcode == 0b0100: # RelocVTable8
#print('RelocVTable8 runLength=%d' % (runLength))
@ -634,6 +636,7 @@ def dump_highlevel(basepath):
# Relocations in lookup-able form
relocs = read_python(basepath, 'ldump', 'relocations.txt')
likelytv = set((rl['section'], rl['offset']) for rl in relocs if rl.get('likelytv', False))
relocs = {(rl['section'], rl['offset']): rl['to'] for rl in relocs}
# The base of the TOC is not guaranteed to be the base of the data section... what is the TOC of our exported funcs?
@ -654,11 +657,16 @@ def dump_highlevel(basepath):
break
# Sometimes we need to fall back on an educated guess based on our apparent tvectors
# When we export even a single TVector, the TOC can be easily found as
# above. But some fragments, e.g. native sifters (nifts) and some USB
# code, only export some sort of dispatch table in which TVector pointers
# are difficult to identify. So we scan the entire relocation table to
# find things that look like TVectors, then try to identify a consensus
# among the real-looking TVectors.
if not table_of_contents:
guesses = []
for (reloc_sec, reloc_offset), (reloc_kind, reloc_targ_section) in relocs.items():
if 'data' in reloc_sec and reloc_kind == 'section' and 'code' in reloc_targ_section:
if 'data' in reloc_sec and reloc_kind == 'section' and 'code' in reloc_targ_section and (reloc_sec, reloc_offset) in likelytv:
toc_reloc_kind, toc_reloc_targ_section = relocs.get((reloc_sec, reloc_offset+4), (None, None))
if toc_reloc_kind == 'section' and 'data' in toc_reloc_targ_section:
secdata = read_bin(basepath, reloc_sec)
@ -726,7 +734,12 @@ def dump_highlevel(basepath):
if a != b and b != 0xFF: break
else:
toc_ofs, = struct.unpack_from('>h', code, ofs+2)
codelocs_xtocglue.append(dict(section=sec['filename'], offset=ofs, function=toc_imports[toc_ofs]))
try:
codelocs_xtocglue.append(dict(section=sec['filename'], offset=ofs, function=toc_imports[toc_ofs]))
except KeyError:
# The glue points inwards. This is quite rare, so just ignore it
pass
codelocs_xtocglue.sort(key=lambda dct: tuple(dct.values()))
write_python(codelocs_xtocglue, basepath, 'hdump', 'codelocs-xtocglue.txt')