MC CFG: Support disassembly at arbitrary addresses in MCObjectDisassembler.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188889 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Ahmed Bougacha 2013-08-21 07:28:55 +00:00
parent 05a81020d9
commit f176482752
2 changed files with 236 additions and 7 deletions

View File

@ -16,8 +16,11 @@
#define LLVM_MC_MCOBJECTDISASSEMBLER_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/MemoryObject.h"
#include <vector>
namespace llvm {
@ -55,6 +58,19 @@ public:
MCModule *buildEmptyModule();
typedef std::vector<uint64_t> AddressSetTy;
/// \name Create a new MCFunction.
MCFunction *createFunction(MCModule *Module, uint64_t BeginAddr,
AddressSetTy &CallTargets,
AddressSetTy &TailCallTargets);
/// \brief Set the region on which to fallback if disassembly was requested
/// somewhere not accessible in the object file.
/// This is used for dynamic disassembly (see RawMemoryObject).
void setFallbackRegion(OwningPtr<MemoryObject> &Region) {
FallbackRegion.reset(Region.take());
}
/// \brief Set the symbolizer to use to get information on external functions.
/// Note that this isn't used to do instruction-level symbolization (that is,
/// plugged into MCDisassembler), but to symbolize function call targets.
@ -96,6 +112,16 @@ protected:
const MCInstrAnalysis &MIA;
MCObjectSymbolizer *MOS;
/// \brief The fallback memory region, outside the object file.
OwningPtr<MemoryObject> FallbackRegion;
/// \brief Return a memory region suitable for reading starting at \p Addr.
/// In most cases, this returns a StringRefMemoryObject backed by the
/// containing section. When no section was found, this returns the
/// FallbackRegion, if it is suitable.
/// If it is not, or if there is no fallback region, this returns 0.
MemoryObject *getRegionFor(uint64_t Addr);
private:
/// \brief Fill \p Module by creating an atom for each section.
/// This could be made much smarter, using information like symbols, but also
@ -108,6 +134,10 @@ private:
/// When the CFG is built, contiguous instructions that were previously in a
/// single MCTextAtom will be split in multiple basic block atoms.
void buildCFG(MCModule *Module);
MCBasicBlock *getBBAt(MCModule *Module, MCFunction *MCFN, uint64_t BeginAddr,
AddressSetTy &CallTargets,
AddressSetTy &TailCallTargets);
};
class MCMachOObjectDisassembler : public MCObjectDisassembler {

View File

@ -27,7 +27,6 @@
#include "llvm/Support/StringRefMemoryObject.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <vector>
using namespace llvm;
using namespace object;
@ -62,6 +61,11 @@ ArrayRef<uint64_t> MCObjectDisassembler::getStaticExitFunctions() {
return ArrayRef<uint64_t>();
}
MemoryObject *MCObjectDisassembler::getRegionFor(uint64_t Addr) {
// FIXME: Keep track of object sections.
return FallbackRegion.get();
}
uint64_t MCObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) {
return Addr;
}
@ -156,6 +160,7 @@ namespace {
MCBasicBlock *BB;
BBInfoSetTy Succs;
BBInfoSetTy Preds;
MCObjectDisassembler::AddressSetTy SuccAddrs;
BBInfo() : Atom(0), BB(0) {}
@ -166,10 +171,14 @@ namespace {
};
}
static void RemoveDupsFromAddressVector(MCObjectDisassembler::AddressSetTy &V) {
std::sort(V.begin(), V.end());
V.erase(std::unique(V.begin(), V.end()), V.end());
}
void MCObjectDisassembler::buildCFG(MCModule *Module) {
typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
BBInfoByAddrTy BBInfos;
typedef std::vector<uint64_t> AddressSetTy;
AddressSetTy Splits;
AddressSetTy Calls;
@ -213,11 +222,8 @@ void MCObjectDisassembler::buildCFG(MCModule *Module) {
}
}
std::sort(Splits.begin(), Splits.end());
Splits.erase(std::unique(Splits.begin(), Splits.end()), Splits.end());
std::sort(Calls.begin(), Calls.end());
Calls.erase(std::unique(Calls.begin(), Calls.end()), Calls.end());
RemoveDupsFromAddressVector(Splits);
RemoveDupsFromAddressVector(Calls);
// Split text atoms into basic block atoms.
for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
@ -296,6 +302,199 @@ void MCObjectDisassembler::buildCFG(MCModule *Module) {
}
}
// Basic idea of the disassembly + discovery:
//
// start with the wanted address, insert it in the worklist
// while worklist not empty, take next address in the worklist:
// - check if atom exists there
// - if middle of atom:
// - split basic blocks referencing the atom
// - look for an already encountered BBInfo (using a map<atom, bbinfo>)
// - if there is, split it (new one, fallthrough, move succs, etc..)
// - if start of atom: nothing else to do
// - if no atom: create new atom and new bbinfo
// - look at the last instruction in the atom, add succs to worklist
// for all elements in the worklist:
// - create basic block, update preds/succs, etc..
//
MCBasicBlock *MCObjectDisassembler::getBBAt(MCModule *Module, MCFunction *MCFN,
uint64_t BBBeginAddr,
AddressSetTy &CallTargets,
AddressSetTy &TailCallTargets) {
typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
typedef SmallSetVector<uint64_t, 16> AddrWorklistTy;
BBInfoByAddrTy BBInfos;
AddrWorklistTy Worklist;
Worklist.insert(BBBeginAddr);
for (size_t wi = 0; wi < Worklist.size(); ++wi) {
const uint64_t BeginAddr = Worklist[wi];
BBInfo *BBI = &BBInfos[BeginAddr];
MCTextAtom *&TA = BBI->Atom;
assert(!TA && "Discovered basic block already has an associated atom!");
// Look for an atom at BeginAddr.
if (MCAtom *A = Module->findAtomContaining(BeginAddr)) {
// FIXME: We don't care about mixed atoms, see above.
TA = cast<MCTextAtom>(A);
// The found atom doesn't begin at BeginAddr, we have to split it.
if (TA->getBeginAddr() != BeginAddr) {
// FIXME: Handle overlapping atoms: middle-starting instructions, etc..
MCTextAtom *NewTA = TA->split(BeginAddr);
// Look for an already encountered basic block that needs splitting
BBInfoByAddrTy::iterator It = BBInfos.find(TA->getBeginAddr());
if (It != BBInfos.end() && It->second.Atom) {
BBI->SuccAddrs = It->second.SuccAddrs;
It->second.SuccAddrs.clear();
It->second.SuccAddrs.push_back(BeginAddr);
}
TA = NewTA;
}
BBI->Atom = TA;
} else {
// If we didn't find an atom, then we have to disassemble to create one!
MemoryObject *Region = getRegionFor(BeginAddr);
if (!Region)
llvm_unreachable(("Couldn't find suitable region for disassembly at " +
utostr(BeginAddr)).c_str());
uint64_t InstSize;
uint64_t EndAddr = Region->getBase() + Region->getExtent();
// We want to stop before the next atom and have a fallthrough to it.
if (MCTextAtom *NextAtom =
cast_or_null<MCTextAtom>(Module->findFirstAtomAfter(BeginAddr)))
EndAddr = std::min(EndAddr, NextAtom->getBeginAddr());
for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) {
MCInst Inst;
if (Dis.getInstruction(Inst, InstSize, *Region, Addr, nulls(),
nulls())) {
if (!TA)
TA = Module->createTextAtom(Addr, Addr);
TA->addInst(Inst, InstSize);
} else {
// We don't care about splitting mixed atoms either.
llvm_unreachable("Couldn't disassemble instruction in atom.");
}
uint64_t BranchTarget;
if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) {
if (MIA.isCall(Inst))
CallTargets.push_back(BranchTarget);
}
if (MIA.isTerminator(Inst))
break;
}
BBI->Atom = TA;
}
assert(TA && "Couldn't disassemble atom, none was created!");
assert(TA->begin() != TA->end() && "Empty atom!");
MemoryObject *Region = getRegionFor(TA->getBeginAddr());
assert(Region && "Couldn't find region for already disassembled code!");
uint64_t EndRegion = Region->getBase() + Region->getExtent();
// Now we have a basic block atom, add successors.
// Add the fallthrough block.
if ((MIA.isConditionalBranch(TA->back().Inst) ||
!MIA.isTerminator(TA->back().Inst)) &&
(TA->getEndAddr() + 1 < EndRegion)) {
BBI->SuccAddrs.push_back(TA->getEndAddr() + 1);
Worklist.insert(TA->getEndAddr() + 1);
}
// If the terminator is a branch, add the target block.
if (MIA.isBranch(TA->back().Inst)) {
uint64_t BranchTarget;
if (MIA.evaluateBranch(TA->back().Inst, TA->back().Address,
TA->back().Size, BranchTarget)) {
StringRef ExtFnName;
if (MOS)
ExtFnName =
MOS->findExternalFunctionAt(getOriginalLoadAddr(BranchTarget));
if (!ExtFnName.empty()) {
TailCallTargets.push_back(BranchTarget);
CallTargets.push_back(BranchTarget);
} else {
BBI->SuccAddrs.push_back(BranchTarget);
Worklist.insert(BranchTarget);
}
}
}
}
for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
const uint64_t BeginAddr = Worklist[wi];
BBInfo *BBI = &BBInfos[BeginAddr];
assert(BBI->Atom && "Found a basic block without an associated atom!");
// Look for a basic block at BeginAddr.
BBI->BB = MCFN->find(BeginAddr);
if (BBI->BB) {
// FIXME: check that the succs/preds are the same
continue;
}
// If there was none, we have to create one from the atom.
BBI->BB = &MCFN->createBlock(*BBI->Atom);
}
for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
const uint64_t BeginAddr = Worklist[wi];
BBInfo *BBI = &BBInfos[BeginAddr];
MCBasicBlock *BB = BBI->BB;
RemoveDupsFromAddressVector(BBI->SuccAddrs);
for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(),
SE = BBI->SuccAddrs.end();
SE != SE; ++SI) {
MCBasicBlock *Succ = BBInfos[*SI].BB;
BB->addSuccessor(Succ);
Succ->addPredecessor(BB);
}
}
assert(BBInfos[Worklist[0]].BB &&
"No basic block created at requested address?");
return BBInfos[Worklist[0]].BB;
}
MCFunction *
MCObjectDisassembler::createFunction(MCModule *Module, uint64_t BeginAddr,
AddressSetTy &CallTargets,
AddressSetTy &TailCallTargets) {
// First, check if this is an external function.
StringRef ExtFnName;
if (MOS)
ExtFnName = MOS->findExternalFunctionAt(getOriginalLoadAddr(BeginAddr));
if (!ExtFnName.empty())
return Module->createFunction(ExtFnName);
// If it's not, look for an existing function.
for (MCModule::func_iterator FI = Module->func_begin(),
FE = Module->func_end();
FI != FE; ++FI) {
if ((*FI)->empty())
continue;
// FIXME: MCModule should provide a findFunctionByAddr()
if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr)
return *FI;
}
// Finally, just create a new one.
MCFunction *MCFN = Module->createFunction("");
getBBAt(Module, MCFN, BeginAddr, CallTargets, TailCallTargets);
return MCFN;
}
// MachO MCObjectDisassembler implementation.
MCMachOObjectDisassembler::MCMachOObjectDisassembler(