llvm-objdump: factor code better, add comments.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140153 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Benjamin Kramer 2011-09-20 17:53:01 +00:00
parent 9d1a3dea15
commit a894c8e344
3 changed files with 183 additions and 159 deletions

View File

@ -47,44 +47,39 @@ MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
while (!WorkList.empty()) {
uint64_t Index = WorkList.pop_back_val();
if (VisitedInsts.find(Index) != VisitedInsts.end())
continue;
continue; // Already visited this location.
for (;Index < End; Index += Size) {
MCInst Inst;
VisitedInsts.insert(Index);
MCInst Inst;
if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){
Instructions.push_back(MCDecodedInst(Index, Size, Inst));
if (Ana->isBranch(Inst)) {
uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
if (targ != -1ULL && targ == Index+Size) {
Instructions.push_back(MCDecodedInst(Index, Size, Inst));
VisitedInsts.insert(Index);
continue;
}
if (targ != -1ULL && targ == Index+Size)
continue; // Skip nop jumps.
// If we could determine the branch target, make a note to start a
// new basic block there and add the target to the worklist.
if (targ != -1ULL) {
Splits.insert(targ);
WorkList.push_back(targ);
WorkList.push_back(Index+Size);
}
Splits.insert(Index+Size);
Instructions.push_back(MCDecodedInst(Index, Size, Inst));
VisitedInsts.insert(Index);
break;
} else if (Ana->isReturn(Inst)) {
// Return instruction. This basic block ends here.
Splits.insert(Index+Size);
Instructions.push_back(MCDecodedInst(Index, Size, Inst));
VisitedInsts.insert(Index);
break;
} else if (Ana->isCall(Inst)) {
uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
if (targ != -1ULL && targ != Index+Size) {
// Add the call to the call list if the destination is known.
if (targ != -1ULL && targ != Index+Size)
Calls.push_back(targ);
}
}
Instructions.push_back(MCDecodedInst(Index, Size, Inst));
VisitedInsts.insert(Index);
} else {
VisitedInsts.insert(Index);
errs().write_hex(Index) << ": warning: invalid instruction encoding\n";
if (Size == 0)
Size = 1; // skip illegible bytes
@ -93,9 +88,10 @@ MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
}
}
// Make sure the instruction list is sorted.
std::sort(Instructions.begin(), Instructions.end());
// Create basic blocks.
// Create basic blocks.
unsigned ii = 0, ie = Instructions.size();
for (std::set<uint64_t>::iterator spi = Splits.begin(),
spe = llvm::prior(Splits.end()); spi != spe; ++spi) {
@ -115,7 +111,7 @@ MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
// Calculate successors of each block.
for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
MCBasicBlock &BB = i->second;
MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second);
if (BB.getInsts().empty()) continue;
const MCDecodedInst &Inst = BB.getInsts().back();

View File

@ -83,9 +83,9 @@ public:
const MCInstrAnalysis *Ana, raw_ostream &DebugOut,
SmallVectorImpl<uint64_t> &Calls);
typedef MapTy::iterator iterator;
iterator begin() { return Blocks.begin(); }
iterator end() { return Blocks.end(); }
typedef MapTy::const_iterator iterator;
iterator begin() const { return Blocks.begin(); }
iterator end() const { return Blocks.end(); }
StringRef getName() const { return Name; }

View File

@ -94,6 +94,29 @@ struct Symbol {
bool operator<(const Symbol &RHS) const { return Value < RHS.Value; }
};
template <typename T>
static Section copySection(const T &Sect) {
Section S;
memcpy(S.Name, Sect->Name, 16);
S.Address = Sect->Address;
S.Size = Sect->Size;
S.Offset = Sect->Offset;
S.NumRelocs = Sect->NumRelocationTableEntries;
S.RelocTableOffset = Sect->RelocationTableOffset;
return S;
}
template <typename T>
static Symbol copySymbol(const T &STE) {
Symbol S;
S.StringIndex = STE->StringIndex;
S.SectionIndex = STE->SectionIndex;
S.Value = STE->Value;
return S;
}
// Print addtitional information about an address, if available.
static void DumpAddress(uint64_t Address, ArrayRef<Section> Sections,
MachOObject *MachOObj, raw_ostream &OS) {
for (unsigned i = 0; i != Sections.size(); ++i) {
@ -102,14 +125,86 @@ static void DumpAddress(uint64_t Address, ArrayRef<Section> Sections,
Sections[i].Address + Sections[i].Size > Address) {
StringRef bytes = MachOObj->getData(Sections[i].Offset,
Sections[i].Size);
// Print constant strings.
if (!strcmp(Sections[i].Name, "__cstring"))
OS << '"' << bytes.substr(addr, bytes.find('\0', addr)) << '"';
// Print constant CFStrings.
if (!strcmp(Sections[i].Name, "__cfstring"))
OS << "@\"" << bytes.substr(addr, bytes.find('\0', addr)) << '"';
}
}
}
typedef std::map<uint64_t, MCFunction*> FunctionMapTy;
typedef SmallVector<MCFunction, 16> FunctionListTy;
static void createMCFunctionAndSaveCalls(StringRef Name,
const MCDisassembler *DisAsm,
MemoryObject &Object, uint64_t Start,
uint64_t End,
MCInstrAnalysis *InstrAnalysis,
uint64_t Address,
raw_ostream &DebugOut,
FunctionMapTy &FunctionMap,
FunctionListTy &Functions) {
SmallVector<uint64_t, 16> Calls;
MCFunction f =
MCFunction::createFunctionFromMC(Name, DisAsm, Object, Start, End,
InstrAnalysis, DebugOut, Calls);
Functions.push_back(f);
FunctionMap[Address] = &Functions.back();
// Add the gathered callees to the map.
for (unsigned i = 0, e = Calls.size(); i != e; ++i)
FunctionMap.insert(std::make_pair(Calls[i], (MCFunction*)0));
}
// Write a graphviz file for the CFG inside an MCFunction.
static void emitDOTFile(const char *FileName, const MCFunction &f,
MCInstPrinter *IP) {
// Start a new dot file.
std::string Error;
raw_fd_ostream Out(FileName, Error);
if (!Error.empty()) {
errs() << "llvm-objdump: warning: " << Error << '\n';
return;
}
Out << "digraph " << f.getName() << " {\n";
Out << "graph [ rankdir = \"LR\" ];\n";
for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
bool hasPreds = false;
// Only print blocks that have predecessors.
// FIXME: Slow.
for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
++pi)
if (pi->second.contains(i->first)) {
hasPreds = true;
break;
}
if (!hasPreds && i != f.begin())
continue;
Out << '"' << i->first << "\" [ label=\"<a>";
// Print instructions.
for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie;
++ii) {
// Escape special chars and print the instruction in mnemonic form.
std::string Str;
raw_string_ostream OS(Str);
IP->printInst(&i->second.getInsts()[ii].Inst, OS, "");
Out << DOT::EscapeString(OS.str()) << '|';
}
Out << "<o>\" shape=\"record\" ];\n";
// Add edges.
for (MCBasicBlock::succ_iterator si = i->second.succ_begin(),
se = i->second.succ_end(); si != se; ++si)
Out << i->first << ":o -> " << *si <<":a\n";
}
Out << "}\n";
}
void llvm::DisassembleInputMachO(StringRef Filename) {
OwningPtr<MemoryBuffer> Buff;
@ -131,44 +226,28 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
// Set up disassembler.
OwningPtr<const MCAsmInfo> AsmInfo(TheTarget->createMCAsmInfo(TripleName));
if (!AsmInfo) {
errs() << "error: no assembly info for target " << TripleName << "\n";
return;
}
OwningPtr<const MCSubtargetInfo>
STI(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
if (!STI) {
errs() << "error: no subtarget info for target " << TripleName << "\n";
return;
}
OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI));
if (!DisAsm) {
errs() << "error: no disassembler for target " << TripleName << "\n";
return;
}
int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
AsmPrinterVariant, *AsmInfo, *STI));
if (!IP) {
errs() << "error: no instruction printer for target " << TripleName << '\n';
AsmPrinterVariant, *AsmInfo, *STI));
if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) {
errs() << "error: couldn't initialize disassmbler for target "
<< TripleName << '\n';
return;
}
outs() << '\n';
outs() << Filename << ":\n\n";
outs() << '\n' << Filename << ":\n\n";
const macho::Header &Header = MachOObj->getHeader();
const MachOObject::LoadCommandInfo *SymtabLCI = 0;
// First, find the symbol table segment.
for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i);
switch (LCI.Command.Type) {
case macho::LCT_Symtab:
if (LCI.Command.Type == macho::LCT_Symtab) {
SymtabLCI = &LCI;
break;
}
@ -184,34 +263,24 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
std::vector<Symbol> UnsortedSymbols; // FIXME: duplication
SmallVector<uint64_t, 8> FoundFns;
// Make a list of all symbols in the object file.
for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i);
if (LCI.Command.Type == macho::LCT_Segment) {
InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
MachOObj->ReadSegmentLoadCommand(LCI, SegmentLC);
// Store the sections in this segment.
for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
InMemoryStruct<macho::Section> Sect;
MachOObj->ReadSection(LCI, SectNum, Sect);
Sections.push_back(copySection(Sect));
Section S;
memcpy(S.Name, Sect->Name, 16);
S.Address = Sect->Address;
S.Size = Sect->Size;
S.Offset = Sect->Offset;
S.NumRelocs = Sect->NumRelocationTableEntries;
S.RelocTableOffset = Sect->RelocationTableOffset;
Sections.push_back(S);
// Store the symbols in this section.
for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
InMemoryStruct<macho::SymbolTableEntry> STE;
MachOObj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
Symbol S;
S.StringIndex = STE->StringIndex;
S.SectionIndex = STE->SectionIndex;
S.Value = STE->Value;
Symbols.push_back(S);
Symbols.push_back(copySymbol(STE));
UnsortedSymbols.push_back(Symbols.back());
}
}
@ -219,32 +288,24 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
MachOObj->ReadSegment64LoadCommand(LCI, Segment64LC);
for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
// Store the sections in this segment.
for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections;
++SectNum) {
InMemoryStruct<macho::Section64> Sect64;
MachOObj->ReadSection64(LCI, SectNum, Sect64);
Sections.push_back(copySection(Sect64));
Section S;
memcpy(S.Name, Sect64->Name, 16);
S.Address = Sect64->Address;
S.Size = Sect64->Size;
S.Offset = Sect64->Offset;
S.NumRelocs = Sect64->NumRelocationTableEntries;
S.RelocTableOffset = Sect64->RelocationTableOffset;
Sections.push_back(S);
// Store the symbols in this section.
for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
InMemoryStruct<macho::Symbol64TableEntry> STE;
MachOObj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
Symbol S;
S.StringIndex = STE->StringIndex;
S.SectionIndex = STE->SectionIndex;
S.Value = STE->Value;
Symbols.push_back(S);
Symbols.push_back(copySymbol(STE));
UnsortedSymbols.push_back(Symbols.back());
}
}
} else if (LCI.Command.Type == macho::LCT_FunctionStarts) {
// We found a function starts segment, parse the addresses for later
// consumption.
InMemoryStruct<macho::LinkeditDataLoadCommand> LLC;
MachOObj->ReadLinkeditDataLoadCommand(LCI, LLC);
@ -252,7 +313,6 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
}
}
std::map<uint64_t, MCFunction*> FunctionMap;
// Sort the symbols by address, just in case they didn't come in that way.
array_pod_sort(Symbols.begin(), Symbols.end());
@ -263,12 +323,14 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
raw_ostream &DebugOut = nulls();
#endif
SmallVector<MCFunction, 16> Functions;
FunctionMapTy FunctionMap;
FunctionListTy Functions;
for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
if (strcmp(Sections[SectIdx].Name, "__text"))
continue;
continue; // Skip non-text sections
// Insert the functions from the function starts segment into our map.
uint64_t VMAddr = Sections[SectIdx].Address - Sections[SectIdx].Offset;
for (unsigned i = 0, e = FoundFns.size(); i != e; ++i)
FunctionMap.insert(std::make_pair(FoundFns[i]+VMAddr, (MCFunction*)0));
@ -278,6 +340,7 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
StringRefMemoryObject memoryObject(Bytes);
bool symbolTableWorked = false;
// Parse relocations.
std::vector<std::pair<uint64_t, uint32_t> > Relocs;
for (unsigned j = 0; j != Sections[SectIdx].NumRelocs; ++j) {
InMemoryStruct<macho::RelocationEntry> RE;
@ -286,11 +349,16 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
}
array_pod_sort(Relocs.begin(), Relocs.end());
// Disassemble symbol by symbol.
for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
// Make sure the symbol is defined in this section.
if ((unsigned)Symbols[SymIdx].SectionIndex - 1 != SectIdx)
continue;
// Start at the address of the symbol relative to the section's address.
uint64_t Start = Symbols[SymIdx].Value - Sections[SectIdx].Address;
// Stop disassembling either at the beginning of the next symbol or at
// the end of the section.
uint64_t End = (SymIdx+1 == Symbols.size() ||
Symbols[SymIdx].SectionIndex != Symbols[SymIdx+1].SectionIndex) ?
Sections[SectIdx].Size :
@ -303,6 +371,7 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
symbolTableWorked = true;
if (!CFG) {
// Normal disassembly, print addresses, bytes and mnemonic form.
outs() << MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex)
<< ":\n";
for (uint64_t Index = Start; Index < End; Index += Size) {
@ -322,42 +391,27 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
}
} else {
// Create CFG and use it for disassembly.
SmallVector<uint64_t, 16> Calls;
MCFunction f =
MCFunction::createFunctionFromMC(
MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex),
DisAsm.get(),
memoryObject, Start, End,
InstrAnalysis.get(), DebugOut,
Calls);
Functions.push_back(f);
FunctionMap[Start] = &Functions.back();
for (unsigned i = 0, e = Calls.size(); i != e; ++i)
FunctionMap.insert(std::make_pair(Calls[i], (MCFunction*)0));
createMCFunctionAndSaveCalls(
MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex),
DisAsm.get(), memoryObject, Start, End, InstrAnalysis.get(),
Start, DebugOut, FunctionMap, Functions);
}
}
if (CFG) {
if (!symbolTableWorked) {
// Create CFG and use it for disassembly.
SmallVector<uint64_t, 16> Calls;
MCFunction f =
MCFunction::createFunctionFromMC("__TEXT", DisAsm.get(),
memoryObject, 0, Sections[SectIdx].Size,
InstrAnalysis.get(), DebugOut,
Calls);
Functions.push_back(f);
FunctionMap[Sections[SectIdx].Offset] = &Functions.back();
for (unsigned i = 0, e = Calls.size(); i != e; ++i)
FunctionMap.insert(std::make_pair(Calls[i], (MCFunction*)0));
// Reading the symbol table didn't work, create a big __TEXT symbol.
createMCFunctionAndSaveCalls("__TEXT", DisAsm.get(), memoryObject,
0, Sections[SectIdx].Size,
InstrAnalysis.get(),
Sections[SectIdx].Offset, DebugOut,
FunctionMap, Functions);
}
for (std::map<uint64_t, MCFunction*>::iterator mi = FunctionMap.begin(),
me = FunctionMap.end(); mi != me; ++mi)
if (mi->second == 0) {
// Create functions for the remaining callees we have gathered,
// but we didn't find a name for them.
SmallVector<uint64_t, 16> Calls;
MCFunction f =
MCFunction::createFunctionFromMC("unknown", DisAsm.get(),
@ -367,10 +421,11 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
Calls);
Functions.push_back(f);
mi->second = &Functions.back();
for (unsigned i = 0, e = Calls.size(); i != e; ++i)
if (FunctionMap.insert(std::make_pair(Calls[i], (MCFunction*)0))
.second)
for (unsigned i = 0, e = Calls.size(); i != e; ++i) {
std::pair<uint64_t, MCFunction*> p(Calls[i], (MCFunction*)0);
if (FunctionMap.insert(p).second)
mi = FunctionMap.begin();
}
}
DenseSet<uint64_t> PrintedBlocks;
@ -378,10 +433,13 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
MCFunction &f = Functions[ffi];
for (MCFunction::iterator fi = f.begin(), fe = f.end(); fi != fe; ++fi){
if (!PrintedBlocks.insert(fi->first).second)
continue;
continue; // We already printed this block.
// We assume a block has predecessors when it's the first block after
// a symbol.
bool hasPreds = FunctionMap.find(fi->first) != FunctionMap.end();
// Only print blocks that have predecessors.
// See if this block has predecessors.
// FIXME: Slow.
for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
++pi)
@ -390,8 +448,8 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
break;
}
// Data block.
if (!hasPreds && fi != f.begin()) {
// No predecessors, this is a data block. Print as .byte directives.
if (!hasPreds) {
uint64_t End = llvm::next(fi) == fe ? Sections[SectIdx].Size :
llvm::next(fi)->first;
outs() << "# " << End-fi->first << " bytes of data:\n";
@ -403,23 +461,31 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
continue;
}
if (fi->second.contains(fi->first))
if (fi->second.contains(fi->first)) // Print a header for simple loops
outs() << "# Loop begin:\n";
// Walk over the instructions and print them.
for (unsigned ii = 0, ie = fi->second.getInsts().size(); ii != ie;
++ii) {
const MCDecodedInst &Inst = fi->second.getInsts()[ii];
// If there's a symbol at this address, print its name.
if (FunctionMap.find(Sections[SectIdx].Address + Inst.Address) !=
FunctionMap.end())
outs() << FunctionMap[Sections[SectIdx].Address + Inst.Address]->
getName() << ":\n";
outs() << format("%8llx:\t", Sections[SectIdx].Address +
Inst.Address);
DumpBytes(StringRef(Bytes.data() + Inst.Address, Inst.Size));
// Simple loops.
if (fi->second.contains(fi->first))
if (fi->second.contains(fi->first)) // Indent simple loops.
outs() << '\t';
IP->printInst(&Inst.Inst, outs(), "");
// Look for relocations inside this instructions, if there is one
// print its target and additional information if availbable.
for (unsigned j = 0; j != Relocs.size(); ++j)
if (Relocs[j].first >= Sections[SectIdx].Address + Inst.Address &&
Relocs[j].first < Sections[SectIdx].Address + Inst.Address +
@ -431,6 +497,9 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
DumpAddress(UnsortedSymbols[Relocs[j].second].Value, Sections,
MachOObj.get(), outs());
}
// If this instructions contains an address, see if we can evaluate
// it and print additional information.
uint64_t targ = InstrAnalysis->evaluateBranch(Inst.Inst,
Inst.Address,
Inst.Size);
@ -441,48 +510,7 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
}
}
// Start a new dot file.
std::string Error;
raw_fd_ostream Out((f.getName().str() + ".dot").c_str(), Error);
if (!Error.empty()) {
errs() << "llvm-objdump: warning: " << Error << '\n';
continue;
}
Out << "digraph " << f.getName() << " {\n";
Out << "graph [ rankdir = \"LR\" ];\n";
for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
bool hasPreds = false;
// Only print blocks that have predecessors.
// FIXME: Slow.
for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe;
++pi)
if (pi->second.contains(i->first)) {
hasPreds = true;
break;
}
if (!hasPreds && i != f.begin())
continue;
Out << '"' << i->first << "\" [ label=\"<a>";
// Print instructions.
for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie;
++ii) {
// Escape special chars and print the instruction in mnemonic form.
std::string Str;
raw_string_ostream OS(Str);
IP->printInst(&i->second.getInsts()[ii].Inst, OS, "");
Out << DOT::EscapeString(OS.str()) << '|';
}
Out << "<o>\" shape=\"record\" ];\n";
// Add edges.
for (MCBasicBlock::succ_iterator si = i->second.succ_begin(),
se = i->second.succ_end(); si != se; ++si)
Out << i->first << ":o -> " << *si <<":a\n";
}
Out << "}\n";
emitDOTFile((f.getName().str() + ".dot").c_str(), f, IP.get());
}
}
}