mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-13 09:33:50 +00:00
Revert "[llvm.py] Implement interface to enhanced disassembler"
Chris Lattner says the edis interface is going away. It doesn't make sense to land something that will go away in the near future. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@152508 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5a08cf4d34
commit
309a2c4e3b
@ -1,564 +0,0 @@
|
||||
#===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===#
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
#===------------------------------------------------------------------------===#
|
||||
|
||||
from abc import ABCMeta
|
||||
from abc import abstractmethod
|
||||
|
||||
from ctypes import CFUNCTYPE
|
||||
from ctypes import POINTER
|
||||
from ctypes import byref
|
||||
from ctypes import c_char_p
|
||||
from ctypes import c_int
|
||||
from ctypes import c_ubyte
|
||||
from ctypes import c_uint64
|
||||
from ctypes import c_uint
|
||||
from ctypes import c_void_p
|
||||
from ctypes import memmove
|
||||
|
||||
from .common import CachedProperty
|
||||
from .common import LLVMObject
|
||||
from .common import c_object_p
|
||||
from .common import get_library
|
||||
|
||||
__all__ = [
|
||||
'DisassemblerByteArraySource',
|
||||
'DisassemblerFileSource',
|
||||
'DisassemblerSource',
|
||||
'Disassembler',
|
||||
'Instruction',
|
||||
'Operand',
|
||||
'Token',
|
||||
]
|
||||
|
||||
callbacks = {}
|
||||
|
||||
class DisassemblerSource:
|
||||
"""Abstract base class for disassembler input.
|
||||
|
||||
This defines the interface to which inputs to the disassembler must
|
||||
conform.
|
||||
|
||||
Basically, the disassembler input is a read-only sequence of a finite
|
||||
length.
|
||||
"""
|
||||
__metaclass__ = ABCMeta
|
||||
|
||||
@abstractmethod
|
||||
def __len__(self):
|
||||
"""Returns the number of bytes that are available for input."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_byte(self, address):
|
||||
"""Returns the byte at the specified address."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def start_address(self):
|
||||
"""Returns the address at which to start fetch bytes, as a long."""
|
||||
pass
|
||||
|
||||
class DisassemblerByteArraySource(DisassemblerSource):
|
||||
"""A disassembler source for byte arrays."""
|
||||
|
||||
def __init__(self, b):
|
||||
self._array = b
|
||||
|
||||
def __len__(self):
|
||||
return len(self._array)
|
||||
|
||||
def get_byte(self, address):
|
||||
return self._array[address]
|
||||
|
||||
def start_address(self):
|
||||
return 0
|
||||
|
||||
class DisassemblerFileSource(DisassemblerSource):
|
||||
"""A disassembler source for file segments.
|
||||
|
||||
This allows you to feed in segments of a file into a Disassembler.
|
||||
"""
|
||||
|
||||
def __init__(self, filename, start_offset, length=None, end_offset=None,
|
||||
start_address=None):
|
||||
"""Create a new source from a file.
|
||||
|
||||
A source begins at a specified byte offset and can be defined in terms
|
||||
of byte length of the end byte offset.
|
||||
"""
|
||||
if length is None and end_offset is None:
|
||||
raise Exception('One of length or end_offset must be defined.')
|
||||
|
||||
self._start_address = start_address
|
||||
if self._start_address is None:
|
||||
self._start_address = 0
|
||||
|
||||
count = length
|
||||
if length is None:
|
||||
count = end_offset - start_offset
|
||||
|
||||
with open(filename, 'rb') as fh:
|
||||
fh.seek(start_offset)
|
||||
|
||||
# FIXME handle case where read bytes != requested
|
||||
self._buf = fh.read(count)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._buf)
|
||||
|
||||
def get_byte(self, address):
|
||||
return self._buf[address - self._start_address]
|
||||
|
||||
def start_address(self):
|
||||
return self._start_address
|
||||
|
||||
class Disassembler(LLVMObject):
|
||||
"""Interface to LLVM's enhanced disassembler.
|
||||
|
||||
The API is slightly different from the C API in that we tightly couple a
|
||||
disassembler instance to an input source. This saves an extra level of
|
||||
abstraction and makes the Python implementation easier.
|
||||
"""
|
||||
|
||||
SYNTAX_X86_INTEL = 0
|
||||
SYNTAX_X86_ATT = 1
|
||||
SYNTAX_ARM_UAL = 2
|
||||
|
||||
def __init__(self, triple, source, syntax=0):
|
||||
"""Create a new disassembler instance.
|
||||
|
||||
Arguments:
|
||||
|
||||
triple -- str target type (e.g. x86_64-apple-darwin10)
|
||||
source -- DisassemblerSource instance to be fed into this disassembler.
|
||||
syntax -- The assembly syntax to use. One of the SYNTAX_* class
|
||||
constants. e.g. EnhancedDisassembler.SYNTAX_X86_INTEL
|
||||
"""
|
||||
assert isinstance(source, DisassemblerSource)
|
||||
|
||||
ptr = c_object_p()
|
||||
result = lib.EDGetDisassembler(byref(ptr), c_char_p(triple),
|
||||
c_int(syntax))
|
||||
if result != 0:
|
||||
raise Exception('Non-0 return code.')
|
||||
|
||||
LLVMObject.__init__(self, ptr)
|
||||
|
||||
self._source = source
|
||||
|
||||
def get_instructions(self):
|
||||
"""Obtain the instructions from the input.
|
||||
|
||||
This is a generator for Instruction instances.
|
||||
|
||||
By default, this will return instructions for the entire source which
|
||||
has been defined. It does this by querying the source's start_address()
|
||||
method and continues to request instructions until len(source) is
|
||||
exhausted.
|
||||
"""
|
||||
|
||||
# We currently obtain 1 instruction at a time because it is easiest.
|
||||
|
||||
# This serves as our EDByteReaderCallback. It is a proxy between C and
|
||||
# the Python DisassemblerSource.
|
||||
def byte_reader(dest, address, arg):
|
||||
try:
|
||||
byte = self._source.get_byte(address)
|
||||
memmove(dest, byte, 1)
|
||||
|
||||
return 0
|
||||
except:
|
||||
return -1
|
||||
|
||||
address = self._source.start_address()
|
||||
end_address = address + len(self._source)
|
||||
cb = callbacks['byte_reader'](byte_reader)
|
||||
while address < end_address:
|
||||
ptr = c_object_p()
|
||||
|
||||
result = lib.EDCreateInsts(byref(ptr), c_uint(1), self, cb,
|
||||
address, c_void_p(None))
|
||||
|
||||
if result != 1:
|
||||
raise Exception('Error obtaining instruction at address %d' %
|
||||
address)
|
||||
|
||||
instruction = Instruction(ptr, self)
|
||||
yield instruction
|
||||
|
||||
address += instruction.byte_size
|
||||
|
||||
|
||||
class Instruction(LLVMObject):
|
||||
"""Represents an individual instruction.
|
||||
|
||||
Instruction instances are obtained from Disassembler.get_instructions().
|
||||
"""
|
||||
def __init__(self, ptr, disassembler):
|
||||
"""Create a new instruction.
|
||||
|
||||
Instructions are created from within this module. You should have no
|
||||
need to call this from outside this module.
|
||||
"""
|
||||
assert isinstance(ptr, c_object_p)
|
||||
assert isinstance(disassembler, Disassembler)
|
||||
|
||||
LLVMObject.__init__(self, ptr, disposer=lib.EDReleaseInst)
|
||||
self._disassembler = disassembler
|
||||
|
||||
def __str__(self):
|
||||
s = c_char_p(None)
|
||||
result = lib.EDGetInstString(byref(s), self)
|
||||
if result != 0:
|
||||
raise Exception('Non-0 return code.')
|
||||
|
||||
return s.value
|
||||
|
||||
@CachedProperty
|
||||
def byte_size(self):
|
||||
result = lib.EDInstByteSize(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result
|
||||
|
||||
@CachedProperty
|
||||
def id(self):
|
||||
i = c_uint()
|
||||
result = lib.EDInstID(byref(i), self)
|
||||
if result != 0:
|
||||
raise Exception('Non-0 return code.')
|
||||
|
||||
return i.value
|
||||
|
||||
@CachedProperty
|
||||
def is_branch(self):
|
||||
result = lib.EDInstIsBranch(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result > 0
|
||||
|
||||
@CachedProperty
|
||||
def is_move(self):
|
||||
result = lib.EDInstIsMove(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result > 0
|
||||
|
||||
@CachedProperty
|
||||
def branch_target_id(self):
|
||||
result = lib.EDBranchTargetID(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result
|
||||
|
||||
@CachedProperty
|
||||
def move_source_id(self):
|
||||
result = lib.EDMoveSourceID(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result
|
||||
|
||||
def get_tokens(self):
|
||||
"""Obtain the tokens in this instruction.
|
||||
|
||||
This is a generator for Token instances.
|
||||
"""
|
||||
count = lib.EDNumTokens(self)
|
||||
if count == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
for i in range(0, count):
|
||||
ptr = c_object_p()
|
||||
result = lib.EDGetToken(byref(ptr), self, c_int(i))
|
||||
if result != 0:
|
||||
raise Exception('Non-0 return code.')
|
||||
|
||||
yield Token(ptr, self)
|
||||
|
||||
def get_operands(self):
|
||||
"""Obtain the operands in this instruction.
|
||||
|
||||
This is a generator for Operand instances.
|
||||
"""
|
||||
count = lib.EDNumOperands(self)
|
||||
if count == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
for i in range(0, count):
|
||||
ptr = c_object_p()
|
||||
result = lib.EDGetOperand(byref(ptr), self, c_int(i))
|
||||
if result != 0:
|
||||
raise Exception('Non-0 return code.')
|
||||
|
||||
yield Operand(ptr, self)
|
||||
|
||||
class Token(LLVMObject):
|
||||
def __init__(self, ptr, instruction):
|
||||
assert isinstance(ptr, c_object_p)
|
||||
assert isinstance(instruction, Instruction)
|
||||
|
||||
LLVMObject.__init__(self, ptr)
|
||||
|
||||
self._instruction = instruction
|
||||
|
||||
def __str__(self):
|
||||
s = c_char_p(None)
|
||||
result = lib.EDGetTokenString(byref(s), self)
|
||||
if result != 0:
|
||||
raise Exception('Non-0 return code.')
|
||||
|
||||
return s.value
|
||||
|
||||
@CachedProperty
|
||||
def operand_index(self):
|
||||
result = lib.EDOperandIndexForToken(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result
|
||||
|
||||
@CachedProperty
|
||||
def is_whitespace(self):
|
||||
result = lib.EDTokenIsWhitespace(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result > 0
|
||||
|
||||
@CachedProperty
|
||||
def is_punctuation(self):
|
||||
result = lib.EDTokenIsPunctuation(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result > 0
|
||||
|
||||
@CachedProperty
|
||||
def is_opcode(self):
|
||||
result = lib.EDTokenIsOpcode(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result > 0
|
||||
|
||||
@CachedProperty
|
||||
def is_literal(self):
|
||||
result = lib.EDTokenIsLiteral(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result > 0
|
||||
|
||||
@CachedProperty
|
||||
def is_register(self):
|
||||
result = lib.EDTokenIsRegister(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result > 0
|
||||
|
||||
@CachedProperty
|
||||
def is_negative_literal(self):
|
||||
result = lib.EDTokenIsNegativeLiteral(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result > 0
|
||||
|
||||
@CachedProperty
|
||||
def absolute_value(self):
|
||||
value = c_uint64()
|
||||
result = lib.EDLiteralTokenAbsoluteValue(byref(value), self)
|
||||
if result != 0:
|
||||
raise Exception('Non-0 return code.')
|
||||
|
||||
return value
|
||||
|
||||
@CachedProperty
|
||||
def register_value(self):
|
||||
value = c_uint()
|
||||
result = lib.EDRegisterTokenValue(byref(value), self)
|
||||
if result != 0:
|
||||
raise Exception('Non-0 return code.')
|
||||
|
||||
return value
|
||||
|
||||
class Operand(LLVMObject):
|
||||
"""Represents an operand in an instruction.
|
||||
|
||||
FIXME support register evaluation.
|
||||
"""
|
||||
def __init__(self, ptr, instruction):
|
||||
assert isinstance(ptr, c_object_p)
|
||||
assert isinstance(instruction, Instruction)
|
||||
|
||||
LLVMObject.__init__(self, ptr)
|
||||
|
||||
self._instruction = instruction
|
||||
|
||||
@CachedProperty
|
||||
def is_register(self):
|
||||
result = lib.EDOperandIsRegister(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result > 0
|
||||
|
||||
@CachedProperty
|
||||
def is_immediate(self):
|
||||
result = lib.EDOperandIsImmediate(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result > 0
|
||||
|
||||
@CachedProperty
|
||||
def is_memory(self):
|
||||
result = lib.EDOperandIsMemory(self)
|
||||
if result == -1:
|
||||
raise Exception('Error code returned.')
|
||||
|
||||
return result > 0
|
||||
|
||||
@CachedProperty
|
||||
def register_value(self):
|
||||
value = c_uint()
|
||||
result = lib.EDRegisterOperandValue(byref(value), self)
|
||||
if result != 0:
|
||||
raise Exception('Non-0 return code.')
|
||||
|
||||
return value
|
||||
|
||||
@CachedProperty
|
||||
def immediate_value(self):
|
||||
value = c_uint64()
|
||||
result = lib.EDImmediateOperandValue(byref(value), self)
|
||||
if result != 0:
|
||||
raise Exception('Non-0 return code.')
|
||||
|
||||
return value
|
||||
|
||||
def register_library(library):
|
||||
library.EDGetDisassembler.argtypes = [POINTER(c_object_p), c_char_p, c_int]
|
||||
library.EDGetDisassembler.restype = c_int
|
||||
|
||||
library.EDGetRegisterName.argtypes = [POINTER(c_char_p), Disassembler,
|
||||
c_uint]
|
||||
library.EDGetRegisterName.restype = c_int
|
||||
|
||||
library.EDRegisterIsStackPointer.argtypes = [Disassembler, c_uint]
|
||||
library.EDRegisterIsStackPointer.restype = c_int
|
||||
|
||||
library.EDRegisterIsProgramCounter.argtypes = [Disassembler, c_uint]
|
||||
library.EDRegisterIsProgramCounter.restype = c_int
|
||||
|
||||
library.EDCreateInsts.argtypes = [POINTER(c_object_p), c_uint,
|
||||
Disassembler, callbacks['byte_reader'], c_uint64, c_void_p]
|
||||
library.EDCreateInsts.restype = c_uint
|
||||
|
||||
library.EDReleaseInst.argtypes = [Instruction]
|
||||
|
||||
library.EDInstByteSize.argtypes = [Instruction]
|
||||
library.EDInstByteSize.restype = c_int
|
||||
|
||||
library.EDGetInstString.argtypes = [POINTER(c_char_p), Instruction]
|
||||
library.EDGetInstString.restype = c_int
|
||||
|
||||
library.EDInstID.argtypes = [POINTER(c_uint), Instruction]
|
||||
library.EDInstID.restype = c_int
|
||||
|
||||
library.EDInstIsBranch.argtypes = [Instruction]
|
||||
library.EDInstIsBranch.restype = c_int
|
||||
|
||||
library.EDInstIsMove.argtypes = [Instruction]
|
||||
library.EDInstIsMove.restype = c_int
|
||||
|
||||
library.EDBranchTargetID.argtypes = [Instruction]
|
||||
library.EDBranchTargetID.restype = c_int
|
||||
|
||||
library.EDMoveSourceID.argtypes = [Instruction]
|
||||
library.EDMoveSourceID.restype = c_int
|
||||
|
||||
library.EDMoveTargetID.argtypes = [Instruction]
|
||||
library.EDMoveTargetID.restype = c_int
|
||||
|
||||
library.EDNumTokens.argtypes = [Instruction]
|
||||
library.EDNumTokens.restype = c_int
|
||||
|
||||
library.EDGetToken.argtypes = [POINTER(c_object_p), Instruction, c_int]
|
||||
library.EDGetToken.restype = c_int
|
||||
|
||||
library.EDGetTokenString.argtypes = [POINTER(c_char_p), Token]
|
||||
library.EDGetTokenString.restype = c_int
|
||||
|
||||
library.EDOperandIndexForToken.argtypes = [Token]
|
||||
library.EDOperandIndexForToken.restype = c_int
|
||||
|
||||
library.EDTokenIsWhitespace.argtypes = [Token]
|
||||
library.EDTokenIsWhitespace.restype = c_int
|
||||
|
||||
library.EDTokenIsPunctuation.argtypes = [Token]
|
||||
library.EDTokenIsPunctuation.restype = c_int
|
||||
|
||||
library.EDTokenIsOpcode.argtypes = [Token]
|
||||
library.EDTokenIsOpcode.restype = c_int
|
||||
|
||||
library.EDTokenIsLiteral.argtypes = [Token]
|
||||
library.EDTokenIsLiteral.restype = c_int
|
||||
|
||||
library.EDTokenIsRegister.argtypes = [Token]
|
||||
library.EDTokenIsRegister.restype = c_int
|
||||
|
||||
library.EDTokenIsNegativeLiteral.argtypes = [Token]
|
||||
library.EDTokenIsNegativeLiteral.restype = c_int
|
||||
|
||||
library.EDLiteralTokenAbsoluteValue.argtypes = [POINTER(c_uint64), Token]
|
||||
library.EDLiteralTokenAbsoluteValue.restype = c_int
|
||||
|
||||
library.EDRegisterTokenValue.argtypes = [POINTER(c_uint), Token]
|
||||
library.EDRegisterTokenValue.restype = c_int
|
||||
|
||||
library.EDNumOperands.argtypes = [Instruction]
|
||||
library.EDNumOperands.restype = c_int
|
||||
|
||||
library.EDGetOperand.argtypes = [POINTER(c_object_p), Instruction, c_int]
|
||||
library.EDGetOperand.restype = c_int
|
||||
|
||||
library.EDOperandIsRegister.argtypes = [Operand]
|
||||
library.EDOperandIsRegister.restype = c_int
|
||||
|
||||
library.EDOperandIsImmediate.argtypes = [Operand]
|
||||
library.EDOperandIsImmediate.restype = c_int
|
||||
|
||||
library.EDOperandIsMemory.argtypes = [Operand]
|
||||
library.EDOperandIsMemory.restype = c_int
|
||||
|
||||
library.EDRegisterOperandValue.argtypes = [POINTER(c_uint), Operand]
|
||||
library.EDRegisterOperandValue.restype = c_int
|
||||
|
||||
library.EDImmediateOperandValue.argtypes = [POINTER(c_uint64), Operand]
|
||||
library.EDImmediateOperandValue.restype = c_int
|
||||
|
||||
library.EDEvaluateOperand.argtypes = [c_uint64, Operand,
|
||||
callbacks['register_reader'], c_void_p]
|
||||
library.EDEvaluateOperand.restype = c_int
|
||||
|
||||
# Enhanced disassembler.
|
||||
callbacks['byte_reader'] = CFUNCTYPE(c_int, POINTER(c_ubyte), c_uint64,
|
||||
c_void_p)
|
||||
callbacks['register_reader'] = CFUNCTYPE(c_int, POINTER(c_uint64), c_uint,
|
||||
c_void_p)
|
||||
|
||||
lib = get_library()
|
||||
register_library(lib)
|
@ -1,62 +0,0 @@
|
||||
from unittest import expectedFailure
|
||||
from unittest import skip
|
||||
|
||||
from .base import TestBase
|
||||
from ..disassembler import DisassemblerByteArraySource
|
||||
from ..disassembler import DisassemblerFileSource
|
||||
from ..disassembler import Disassembler
|
||||
from ..object import ObjectFile
|
||||
|
||||
class TestDisassembler(TestBase):
|
||||
def test_simple(self):
|
||||
sequence = '\x67\xe3\x81' # jcxz -127
|
||||
triple = 'i686-apple-darwin9'
|
||||
|
||||
source = DisassemblerByteArraySource(sequence)
|
||||
|
||||
disassembler = Disassembler(triple, source)
|
||||
instructions = list(disassembler.get_instructions())
|
||||
|
||||
self.assertEqual(len(instructions), 1)
|
||||
|
||||
i = instructions[0]
|
||||
self.assertEqual(str(i), '\tjcxz\t-127\n')
|
||||
self.assertEqual(i.byte_size, 3)
|
||||
self.assertEqual(i.id, 1032)
|
||||
self.assertTrue(i.is_branch)
|
||||
self.assertFalse(i.is_move)
|
||||
self.assertEqual(i.branch_target_id, 0)
|
||||
|
||||
tokens = list(i.get_tokens())
|
||||
self.assertEqual(len(tokens), 4)
|
||||
token = tokens[0]
|
||||
self.assertEqual(str(token), 'jcxz')
|
||||
self.assertFalse(token.is_whitespace)
|
||||
self.assertFalse(token.is_punctuation)
|
||||
self.assertTrue(token.is_opcode)
|
||||
self.assertFalse(token.is_literal)
|
||||
self.assertFalse(token.is_register)
|
||||
|
||||
self.assertTrue(tokens[1].is_whitespace)
|
||||
|
||||
operands = list(i.get_operands())
|
||||
self.assertEqual(len(operands), 1)
|
||||
|
||||
# TODO implement operand tests
|
||||
|
||||
@skip('This test is horribly broken and probably not even correct.')
|
||||
def test_read_instructions(self):
|
||||
filename = self.get_test_binary()
|
||||
o = ObjectFile(filename=filename)
|
||||
|
||||
for symbol in o.get_symbols():
|
||||
address = symbol.address
|
||||
offset = symbol.file_offset
|
||||
size = symbol.size
|
||||
|
||||
source = DisassemblerFileSource(filename, offset, length=size,
|
||||
start_address=address)
|
||||
|
||||
disassembler = Disassembler('x86-generic-gnu-linux', source)
|
||||
for instruction in disassembler.get_instructions():
|
||||
print instruction
|
Loading…
x
Reference in New Issue
Block a user