Adding support for printing operands symbolically to llvm's public 'C'

disassembler API.  Hooked this up to the ARM target so such tools as Darwin's
otool(1) can now print things like branch targets for example this:
  blx _puts
instead of this:
  blx #-36
And even print the expression encoded in the Mach-O relocation entried for
things like this:
  movt r0, :upper16:((_foo-_bar)+1234)


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129284 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Kevin Enderby 2011-04-11 18:08:50 +00:00
parent 0fb215a154
commit bd3327654b
8 changed files with 212 additions and 9 deletions

View File

@ -47,6 +47,49 @@ typedef int (*LLVMOpInfoCallback)(void *DisInfo,
int TagType,
void *TagBuf);
/**
* The initial support in LLVM MC for the most general form of a relocatable
* expression is "AddSymbol - SubtractSymbol + Offset". For some Darwin targets
* this full form is encoded in the relocation information so that AddSymbol and
* SubtractSymbol can be link edited independent of each other. Many other
* platforms only allow a relocatable expression of the form AddSymbol + Offset
* to be encoded.
*
* The LLVMOpInfoCallback() for the TagType value of 1 uses the struct
* LLVMOpInfo1. The value of the relocatable expression for the operand,
* including any PC adjustment, is passed in to the call back in the Value
* field. The symbolic information about the operand is returned using all
* the fields of the structure with the Offset of the relocatable expression
* returned in the Value field. It is possible that some symbols in the
* relocatable expression were assembly temporary symbols, for example
* "Ldata - LpicBase + constant", and only the Values of the symbols without
* symbol names are present in the relocation information. The VariantKind
* type is one of the Target specific #defines below and is used to print
* operands like "_foo@GOT", ":lower16:_foo", etc.
*/
struct LLVMOpInfoSymbol1 {
uint64_t Present; /* 1 if this symbol is present */
char *Name; /* symbol name if not NULL */
uint64_t Value; /* symbol value if name is NULL */
};
struct LLVMOpInfo1 {
struct LLVMOpInfoSymbol1 AddSymbol;
struct LLVMOpInfoSymbol1 SubtractSymbol;
uint64_t Value;
uint64_t VariantKind;
};
/**
* The operand VariantKinds for symbolic disassembly.
*/
#define LLVMDisassembler_VariantKind_None 0 /* all targets */
/**
* The ARM target VariantKinds.
*/
#define LLVMDisassembler_VariantKind_ARM_HI16 1 /* :upper16: */
#define LLVMDisassembler_VariantKind_ARM_LO16 2 /* :lower16: */
/**
* The type for the symbol lookup function. This may be called by the
* disassembler for such things like adding a comment for a PC plus a constant

View File

@ -10,12 +10,14 @@
#define MCDISASSEMBLER_H
#include "llvm/Support/DataTypes.h"
#include "llvm-c/Disassembler.h"
namespace llvm {
class MCInst;
class MemoryObject;
class raw_ostream;
class MCContext;
struct EDInstInfo;
@ -24,7 +26,7 @@ struct EDInstInfo;
class MCDisassembler {
public:
/// Constructor - Performs initial setup for the disassembler.
MCDisassembler() {}
MCDisassembler() : GetOpInfo(0), DisInfo(0), Ctx(0) {}
virtual ~MCDisassembler();
@ -53,6 +55,30 @@ public:
/// each MCInst opcode this disassembler returns.
/// NULL if there is no info for this target.
virtual EDInstInfo *getEDInfo() const { return (EDInstInfo*)0; }
private:
//
// Hooks for symbolic disassembly via the public 'C' interface.
//
// The function to get the symbolic information for operands.
LLVMOpInfoCallback GetOpInfo;
// The pointer to the block of symbolic information for above call back.
void *DisInfo;
// The assembly context for creating symbols and MCExprs in place of
// immediate operands when there is symbolic information.
MCContext *Ctx;
public:
void setupForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo,
void *disInfo,
MCContext *ctx) {
GetOpInfo = getOpInfo;
DisInfo = disInfo;
Ctx = ctx;
}
LLVMOpInfoCallback getLLVMOpInfoCallback() const { return GetOpInfo; }
void *getDisInfoBlock() const { return DisInfo; }
MCContext *getMCContext() const { return Ctx; }
};
} // namespace llvm

View File

@ -77,8 +77,9 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
assert(Ctx && "Unable to create MCContext!");
// Set up disassembler.
const MCDisassembler *DisAsm = TheTarget->createMCDisassembler();
MCDisassembler *DisAsm = TheTarget->createMCDisassembler();
assert(DisAsm && "Unable to create disassembler!");
DisAsm->setupForSymbolicDisassembly(GetOpInfo, DisInfo, Ctx);
// Set up the instruction printer.
int AsmPrinterVariant = MAI->getAssemblerDialect();

View File

@ -69,7 +69,7 @@ private:
public:
LLVMDisasmContext(std::string tripleName, void *disInfo, int tagType,
LLVMOpInfoCallback getOpInfo,
LLVMOpInfoCallback getOpInfo,
LLVMSymbolLookupCallback symbolLookUp,
const Target *theTarget, const MCAsmInfo *mAI,
llvm::TargetMachine *tM, const TargetAsmInfo *tai,

View File

@ -422,6 +422,10 @@ bool ARMDisassembler::getInstruction(MCInst &MI,
if (!Builder)
return false;
Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(),
getDisInfoBlock(), getMCContext(),
Address);
if (!Builder->Build(MI, insn))
return false;
@ -504,6 +508,10 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
Builder->SetSession(const_cast<Session *>(&SO));
Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(),
getDisInfoBlock(), getMCContext(),
Address);
if (!Builder->Build(MI, insn))
return false;

View File

@ -17,6 +17,7 @@
#include "ARMDisassemblerCore.h"
#include "ARMAddressingModes.h"
#include "ARMMCExpr.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@ -1066,7 +1067,8 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}).
assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0);
MI.addOperand(MCOperand::CreateImm(Imm16));
if (!B->tryAddingSymbolicOperand(Imm16, 4, MI))
MI.addOperand(MCOperand::CreateImm(Imm16));
++OpIdx;
} else {
// We have a reg/imm form.
@ -3628,3 +3630,80 @@ ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) {
return new ARMBasicMCBuilder(Opcode, Format,
ARMInsts[Opcode].getNumOperands());
}
/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
/// operand in place of the immediate Value in the MCInst. The immediate
/// Value has had any PC adjustment made by the caller. If the getOpInfo()
/// function was set as part of the setupBuilderForSymbolicDisassembly() call
/// then that function is called to get any symbolic information at the
/// builder's Address for this instrution. If that returns non-zero then the
/// symbolic information is returns is used to create an MCExpr and that is
/// added as an operand to the MCInst. This function returns true if it adds
/// an operand to the MCInst and false otherwise.
bool ARMBasicMCBuilder::tryAddingSymbolicOperand(uint64_t Value,
uint64_t InstSize,
MCInst &MI) {
if (!GetOpInfo)
return false;
struct LLVMOpInfo1 SymbolicOp;
SymbolicOp.Value = Value;
if (!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp))
return false;
const MCExpr *Add = NULL;
if (SymbolicOp.AddSymbol.Present) {
if (SymbolicOp.AddSymbol.Name) {
StringRef Name(SymbolicOp.AddSymbol.Name);
MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
Add = MCSymbolRefExpr::Create(Sym, *Ctx);
} else {
Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
}
}
const MCExpr *Sub = NULL;
if (SymbolicOp.SubtractSymbol.Present) {
if (SymbolicOp.SubtractSymbol.Name) {
StringRef Name(SymbolicOp.SubtractSymbol.Name);
MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
} else {
Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
}
}
const MCExpr *Off = NULL;
if (SymbolicOp.Value != 0)
Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
const MCExpr *Expr;
if (Sub) {
const MCExpr *LHS;
if (Add)
LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
else
LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
if (Off != 0)
Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
else
Expr = LHS;
} else if (Add) {
if (Off != 0)
Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
else
Expr = Add;
} else
Expr = Off;
if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16)
MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx)));
else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16)
MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
MI.addOperand(MCOperand::CreateExpr(Expr));
else
assert("bad SymbolicOp.VariantKind");
return true;
}

View File

@ -22,12 +22,17 @@
#define ARMDISASSEMBLERCORE_H
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm-c/Disassembler.h"
#include "ARMBaseInstrInfo.h"
#include "ARMRegisterInfo.h"
#include "ARMDisassembler.h"
namespace llvm {
class MCContext;
class ARMUtils {
public:
@ -202,7 +207,7 @@ private:
public:
ARMBasicMCBuilder(ARMBasicMCBuilder &B)
: Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm),
SP(B.SP) {
SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) {
Err = 0;
}
@ -261,6 +266,44 @@ private:
assert(SP);
return slice(SP->ITState, 7, 4);
}
private:
//
// Hooks for symbolic disassembly via the public 'C' interface.
//
// The function to get the symbolic information for operands.
LLVMOpInfoCallback GetOpInfo;
// The pointer to the block of symbolic information for above call back.
void *DisInfo;
// The assembly context for creating symbols and MCExprs in place of
// immediate operands when there is symbolic information.
MCContext *Ctx;
// The address of the instruction being disassembled.
uint64_t Address;
public:
void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo,
void *disInfo, MCContext *ctx,
uint64_t address) {
GetOpInfo = getOpInfo;
DisInfo = disInfo;
Ctx = ctx;
Address = address;
}
uint64_t getBuilderAddress() const { return Address; }
/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
/// operand in place of the immediate Value in the MCInst. The immediate
/// Value has had any PC adjustment made by the caller. If the getOpInfo()
/// function was set as part of the setupBuilderForSymbolicDisassembly() call
/// then that function is called to get any symbolic information at the
/// builder's Address for this instrution. If that returns non-zero then the
/// symbolic information is returns is used to create an MCExpr and that is
/// added as an operand to the MCInst. This function returns true if it adds
/// an operand to the MCInst and false otherwise.
bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI);
};
} // namespace llvm

View File

@ -1570,9 +1570,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12
|| Opcode == ARM::t2LEApcrel)
MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16)
MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) {
if (!B->tryAddingSymbolicOperand(getImm16(insn), 4, MI))
MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
} else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
uint32_t mask = 0;
if (getBitfieldInvMask(insn, mask))
MI.addOperand(MCOperand::CreateImm(mask));
@ -1756,7 +1757,9 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
Offset = decodeImm32_BLX(insn);
break;
}
MI.addOperand(MCOperand::CreateImm(Offset));
if (!B->tryAddingSymbolicOperand(Offset + B->getBuilderAddress() + 4, 4, MI))
MI.addOperand(MCOperand::CreateImm(Offset));
// This is an increment as some predicate operands may have been added first.
NumOpsAdded += 1;