mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-28 06:32:09 +00:00
Sketch out an CFG reconstruction mode for llvm-objdump.
- Not great yet, but it's a start. - Requires an object file with a symbol table. (I really want to fix this, but it'll need a whole new algorithm) - ELF and COFF won't work at the moment due to libObject shortcomings. To try it out run $ llvm-objdump -d --cfg foo.o This will create a graphviz file for every symbol in the object file's text section containing a CFG. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135608 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a4d0bd84f7
commit
685a2501b2
113
tools/llvm-objdump/MCFunction.cpp
Normal file
113
tools/llvm-objdump/MCFunction.cpp
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
//===-- MCFunction.cpp ----------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file defines the algorithm to break down a region of machine code
|
||||||
|
// into basic blocks and try to reconstruct a CFG from it.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "MCFunction.h"
|
||||||
|
#include "llvm/ADT/STLExtras.h"
|
||||||
|
#include "llvm/MC/MCDisassembler.h"
|
||||||
|
#include "llvm/MC/MCInst.h"
|
||||||
|
#include "llvm/MC/MCInstPrinter.h"
|
||||||
|
#include "llvm/MC/MCInstrDesc.h"
|
||||||
|
#include "llvm/MC/MCInstrInfo.h"
|
||||||
|
#include "llvm/Support/MemoryObject.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
#include "llvm/Support/system_error.h"
|
||||||
|
#include <set>
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
MCFunction
|
||||||
|
MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
|
||||||
|
const MemoryObject &Region, uint64_t Start,
|
||||||
|
uint64_t End, const MCInstrInfo *InstrInfo,
|
||||||
|
raw_ostream &DebugOut) {
|
||||||
|
std::set<uint64_t> Splits;
|
||||||
|
Splits.insert(Start);
|
||||||
|
std::vector<MCDecodedInst> Instructions;
|
||||||
|
uint64_t Size;
|
||||||
|
|
||||||
|
// Disassemble code and gather basic block split points.
|
||||||
|
for (uint64_t Index = Start; Index < End; Index += Size) {
|
||||||
|
MCInst Inst;
|
||||||
|
|
||||||
|
if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut)) {
|
||||||
|
const MCInstrDesc &Desc = InstrInfo->get(Inst.getOpcode());
|
||||||
|
if (Desc.isBranch()) {
|
||||||
|
if (Desc.OpInfo[0].OperandType == MCOI::OPERAND_PCREL) {
|
||||||
|
int64_t Imm = Inst.getOperand(0).getImm();
|
||||||
|
// FIXME: Distinguish relocations from nop jumps.
|
||||||
|
if (Imm != 0) {
|
||||||
|
assert(Index+Imm+Size < End && "Branch out of function.");
|
||||||
|
Splits.insert(Index+Imm+Size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Splits.insert(Index+Size);
|
||||||
|
}
|
||||||
|
|
||||||
|
Instructions.push_back(MCDecodedInst(Index, Size, Inst));
|
||||||
|
} else {
|
||||||
|
errs() << "warning: invalid instruction encoding\n";
|
||||||
|
if (Size == 0)
|
||||||
|
Size = 1; // skip illegible bytes
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
MCFunction f(Name);
|
||||||
|
|
||||||
|
// Create basic blocks.
|
||||||
|
unsigned ii = 0, ie = Instructions.size();
|
||||||
|
for (std::set<uint64_t>::iterator spi = Splits.begin(),
|
||||||
|
spe = Splits.end(); spi != spe; ++spi) {
|
||||||
|
MCBasicBlock BB;
|
||||||
|
uint64_t BlockEnd = llvm::next(spi) == spe ? End : *llvm::next(spi);
|
||||||
|
// Add instructions to the BB.
|
||||||
|
for (; ii != ie; ++ii) {
|
||||||
|
if (Instructions[ii].Address < *spi ||
|
||||||
|
Instructions[ii].Address >= BlockEnd)
|
||||||
|
break;
|
||||||
|
BB.addInst(Instructions[ii]);
|
||||||
|
}
|
||||||
|
f.addBlock(*spi, BB);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate successors of each block.
|
||||||
|
for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
|
||||||
|
MCBasicBlock &BB = i->second;
|
||||||
|
if (BB.getInsts().empty()) continue;
|
||||||
|
const MCDecodedInst &Inst = BB.getInsts().back();
|
||||||
|
const MCInstrDesc &Desc = InstrInfo->get(Inst.Inst.getOpcode());
|
||||||
|
|
||||||
|
if (Desc.isBranch()) {
|
||||||
|
// PCRel branch, we know the destination.
|
||||||
|
if (Desc.OpInfo[0].OperandType == MCOI::OPERAND_PCREL) {
|
||||||
|
int64_t Imm = Inst.Inst.getOperand(0).getImm();
|
||||||
|
if (Imm != 0)
|
||||||
|
BB.addSucc(&f.getBlockAtAddress(Inst.Address+Inst.Size+Imm));
|
||||||
|
// Conditional branches can also fall through to the next block.
|
||||||
|
if (Desc.isConditionalBranch() && llvm::next(i) != e)
|
||||||
|
BB.addSucc(&next(i)->second);
|
||||||
|
} else {
|
||||||
|
// Indirect branch. Bail and add all blocks of the function as a
|
||||||
|
// successor.
|
||||||
|
for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i)
|
||||||
|
BB.addSucc(&i->second);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No branch. Fall through to the next block.
|
||||||
|
if (!Desc.isReturn() && next(i) != e)
|
||||||
|
BB.addSucc(&next(i)->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return f;
|
||||||
|
}
|
88
tools/llvm-objdump/MCFunction.h
Normal file
88
tools/llvm-objdump/MCFunction.h
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
//===-- MCFunction.h ------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
// This file defines the data structures to hold a CFG reconstructed from
|
||||||
|
// machine code.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "llvm/ADT/ArrayRef.h"
|
||||||
|
#include "llvm/ADT/SmallPtrSet.h"
|
||||||
|
#include "llvm/MC/MCInst.h"
|
||||||
|
#include <map>
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
class MCDisassembler;
|
||||||
|
class MCInstrInfo;
|
||||||
|
class MemoryObject;
|
||||||
|
class raw_ostream;
|
||||||
|
|
||||||
|
/// MCDecodedInst - Small container to hold an MCInst and associated info like
|
||||||
|
/// address and size.
|
||||||
|
struct MCDecodedInst {
|
||||||
|
uint64_t Address;
|
||||||
|
uint64_t Size;
|
||||||
|
MCInst Inst;
|
||||||
|
|
||||||
|
MCDecodedInst(uint64_t Address, uint64_t Size, MCInst Inst)
|
||||||
|
: Address(Address), Size(Size), Inst(Inst) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// MCBasicBlock - Consists of multiple MCDecodedInsts and a list of successing
|
||||||
|
/// MCBasicBlocks.
|
||||||
|
class MCBasicBlock {
|
||||||
|
SmallVector<MCDecodedInst, 8> Insts;
|
||||||
|
typedef SmallPtrSet<MCBasicBlock*, 8> SetTy;
|
||||||
|
SetTy Succs;
|
||||||
|
public:
|
||||||
|
ArrayRef<MCDecodedInst> getInsts() const { return Insts; }
|
||||||
|
|
||||||
|
typedef SetTy::const_iterator succ_iterator;
|
||||||
|
succ_iterator succ_begin() const { return Succs.begin(); }
|
||||||
|
succ_iterator succ_end() const { return Succs.end(); }
|
||||||
|
|
||||||
|
void addInst(const MCDecodedInst &Inst) { Insts.push_back(Inst); }
|
||||||
|
void addSucc(MCBasicBlock *BB) { Succs.insert(BB); }
|
||||||
|
};
|
||||||
|
|
||||||
|
/// MCFunction - Represents a named function in machine code, containing
|
||||||
|
/// multiple MCBasicBlocks.
|
||||||
|
class MCFunction {
|
||||||
|
const StringRef Name;
|
||||||
|
// Keep BBs sorted by address.
|
||||||
|
typedef std::map<uint64_t, MCBasicBlock> MapTy;
|
||||||
|
MapTy Blocks;
|
||||||
|
public:
|
||||||
|
MCFunction(StringRef Name) : Name(Name) {}
|
||||||
|
|
||||||
|
// Create an MCFunction from a region of binary machine code.
|
||||||
|
static MCFunction
|
||||||
|
createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
|
||||||
|
const MemoryObject &Region, uint64_t Start, uint64_t End,
|
||||||
|
const MCInstrInfo *InstrInfo, raw_ostream &DebugOut);
|
||||||
|
|
||||||
|
typedef MapTy::iterator iterator;
|
||||||
|
iterator begin() { return Blocks.begin(); }
|
||||||
|
iterator end() { return Blocks.end(); }
|
||||||
|
|
||||||
|
StringRef getName() const { return Name; }
|
||||||
|
|
||||||
|
MCBasicBlock &addBlock(uint64_t Address, const MCBasicBlock &BB) {
|
||||||
|
assert(!Blocks.count(Address) && "Already a BB at address.");
|
||||||
|
return Blocks[Address] = BB;
|
||||||
|
}
|
||||||
|
|
||||||
|
MCBasicBlock &getBlockAtAddress(uint64_t Address) {
|
||||||
|
assert(Blocks.count(Address) && "No BB at address.");
|
||||||
|
return Blocks[Address];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -13,6 +13,7 @@
|
|||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "MCFunction.h"
|
||||||
#include "llvm/Object/ObjectFile.h"
|
#include "llvm/Object/ObjectFile.h"
|
||||||
#include "llvm/ADT/OwningPtr.h"
|
#include "llvm/ADT/OwningPtr.h"
|
||||||
#include "llvm/ADT/Triple.h"
|
#include "llvm/ADT/Triple.h"
|
||||||
@ -21,6 +22,8 @@
|
|||||||
#include "llvm/MC/MCDisassembler.h"
|
#include "llvm/MC/MCDisassembler.h"
|
||||||
#include "llvm/MC/MCInst.h"
|
#include "llvm/MC/MCInst.h"
|
||||||
#include "llvm/MC/MCInstPrinter.h"
|
#include "llvm/MC/MCInstPrinter.h"
|
||||||
|
#include "llvm/MC/MCInstrDesc.h"
|
||||||
|
#include "llvm/MC/MCInstrInfo.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
#include "llvm/Support/Format.h"
|
#include "llvm/Support/Format.h"
|
||||||
@ -52,6 +55,10 @@ namespace {
|
|||||||
Disassembled("d", cl::desc("Alias for --disassemble"),
|
Disassembled("d", cl::desc("Alias for --disassemble"),
|
||||||
cl::aliasopt(Disassemble));
|
cl::aliasopt(Disassemble));
|
||||||
|
|
||||||
|
cl::opt<bool>
|
||||||
|
CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and"
|
||||||
|
"write it to a graphviz file"));
|
||||||
|
|
||||||
cl::opt<std::string>
|
cl::opt<std::string>
|
||||||
TripleName("triple", cl::desc("Target triple to disassemble for, "
|
TripleName("triple", cl::desc("Target triple to disassemble for, "
|
||||||
"see -version for available targets"));
|
"see -version for available targets"));
|
||||||
@ -156,6 +163,7 @@ static void DisassembleInput(const StringRef &Filename) {
|
|||||||
// GetTarget prints out stuff.
|
// GetTarget prints out stuff.
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
const MCInstrInfo *InstrInfo = TheTarget->createMCInstrInfo();
|
||||||
|
|
||||||
outs() << '\n';
|
outs() << '\n';
|
||||||
outs() << Filename
|
outs() << Filename
|
||||||
@ -233,15 +241,14 @@ static void DisassembleInput(const StringRef &Filename) {
|
|||||||
uint64_t End = si == se-1 ? SectSize : Symbols[si + 1].first - 1;
|
uint64_t End = si == se-1 ? SectSize : Symbols[si + 1].first - 1;
|
||||||
outs() << '\n' << Symbols[si].second << ":\n";
|
outs() << '\n' << Symbols[si].second << ":\n";
|
||||||
|
|
||||||
for (Index = Start; Index < End; Index += Size) {
|
|
||||||
MCInst Inst;
|
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
|
raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
|
||||||
#else
|
#else
|
||||||
raw_ostream &DebugOut = nulls();
|
raw_ostream &DebugOut = nulls();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
for (Index = Start; Index < End; Index += Size) {
|
||||||
|
MCInst Inst;
|
||||||
if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, DebugOut)) {
|
if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, DebugOut)) {
|
||||||
uint64_t addr;
|
uint64_t addr;
|
||||||
if (error(i->getAddress(addr))) break;
|
if (error(i->getAddress(addr))) break;
|
||||||
@ -255,6 +262,36 @@ static void DisassembleInput(const StringRef &Filename) {
|
|||||||
Size = 1; // skip illegible bytes
|
Size = 1; // skip illegible bytes
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (CFG) {
|
||||||
|
MCFunction f =
|
||||||
|
MCFunction::createFunctionFromMC(Symbols[si].second, DisAsm.get(),
|
||||||
|
memoryObject, Start, End, InstrInfo,
|
||||||
|
DebugOut);
|
||||||
|
|
||||||
|
// Start a new dot file.
|
||||||
|
std::string Error;
|
||||||
|
raw_fd_ostream Out((f.getName().str() + ".dot").c_str(), Error);
|
||||||
|
|
||||||
|
Out << "digraph " << f.getName() << " {\n";
|
||||||
|
Out << "graph [ rankdir = \"LR\" ];\n";
|
||||||
|
for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
|
||||||
|
Out << '"' << (uintptr_t)&i->second << "\" [ label=\"<a>";
|
||||||
|
// Print instructions.
|
||||||
|
for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie;
|
||||||
|
++ii) {
|
||||||
|
IP->printInst(&i->second.getInsts()[ii].Inst, Out);
|
||||||
|
Out << '|';
|
||||||
|
}
|
||||||
|
Out << "<o>\" shape=\"record\" ];\n";
|
||||||
|
|
||||||
|
// Add edges.
|
||||||
|
for (MCBasicBlock::succ_iterator si = i->second.succ_begin(),
|
||||||
|
se = i->second.succ_end(); si != se; ++si)
|
||||||
|
Out << (uintptr_t)&i->second << ":o -> " << (uintptr_t)*si <<":a\n";
|
||||||
|
}
|
||||||
|
Out << "}\n";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -271,6 +308,7 @@ int main(int argc, char **argv) {
|
|||||||
llvm::InitializeAllTargets();
|
llvm::InitializeAllTargets();
|
||||||
llvm::InitializeAllMCAsmInfos();
|
llvm::InitializeAllMCAsmInfos();
|
||||||
llvm::InitializeAllMCCodeGenInfos();
|
llvm::InitializeAllMCCodeGenInfos();
|
||||||
|
llvm::InitializeAllMCInstrInfos();
|
||||||
llvm::InitializeAllAsmPrinters();
|
llvm::InitializeAllAsmPrinters();
|
||||||
llvm::InitializeAllAsmParsers();
|
llvm::InitializeAllAsmParsers();
|
||||||
llvm::InitializeAllDisassemblers();
|
llvm::InitializeAllDisassemblers();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user