From 0b8b771e9f2f251460a6f200c45efe9d55640d60 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer <benny.kra@googlemail.com> Date: Mon, 19 Sep 2011 17:56:04 +0000 Subject: [PATCH] Add a MachO-specific "mode" to llvm-objdump, that, if enabled, gathers additional information that are only available on MachO. - It can take FunctionStarts from a binary to find entry points more accurately. - Symbol offsets in executables are correct now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140028 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm-objdump/CMakeLists.txt | 1 + tools/llvm-objdump/MCFunction.cpp | 105 +++--- tools/llvm-objdump/MCFunction.h | 38 ++- tools/llvm-objdump/MachODump.cpp | 489 ++++++++++++++++++++++++++++ tools/llvm-objdump/llvm-objdump.cpp | 208 +++--------- tools/llvm-objdump/llvm-objdump.h | 47 +++ 6 files changed, 677 insertions(+), 211 deletions(-) create mode 100644 tools/llvm-objdump/MachODump.cpp create mode 100644 tools/llvm-objdump/llvm-objdump.h diff --git a/tools/llvm-objdump/CMakeLists.txt b/tools/llvm-objdump/CMakeLists.txt index 4c7ab20b6c6..db788646cfe 100644 --- a/tools/llvm-objdump/CMakeLists.txt +++ b/tools/llvm-objdump/CMakeLists.txt @@ -8,5 +8,6 @@ set(LLVM_LINK_COMPONENTS add_llvm_tool(llvm-objdump llvm-objdump.cpp + MachODump.cpp MCFunction.cpp ) diff --git a/tools/llvm-objdump/MCFunction.cpp b/tools/llvm-objdump/MCFunction.cpp index 5f1649694df..03390c2b586 100644 --- a/tools/llvm-objdump/MCFunction.cpp +++ b/tools/llvm-objdump/MCFunction.cpp @@ -30,48 +30,77 @@ MCFunction MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm, const MemoryObject &Region, uint64_t Start, uint64_t End, const MCInstrAnalysis *Ana, - raw_ostream &DebugOut) { + raw_ostream &DebugOut, + SmallVectorImpl<uint64_t> &Calls) { + std::vector<MCDecodedInst> Instructions; std::set<uint64_t> Splits; Splits.insert(Start); - std::vector<MCDecodedInst> Instructions; uint64_t Size; - // Disassemble code and gather basic block split points. - for (uint64_t Index = Start; Index < End; Index += Size) { - MCInst Inst; - - if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())) { - if (Ana->isBranch(Inst)) { - uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); - // FIXME: Distinguish relocations from nop jumps. - if (targ != -1ULL && (targ == Index+Size || targ >= End)) { - Instructions.push_back(MCDecodedInst(Index, Size, Inst)); - continue; // Skip branches that leave the function. - } - if (targ != -1ULL) - Splits.insert(targ); - Splits.insert(Index+Size); - } else if (Ana->isReturn(Inst)) { - Splits.insert(Index+Size); - } - - Instructions.push_back(MCDecodedInst(Index, Size, Inst)); - } else { - errs() << "warning: invalid instruction encoding\n"; - if (Size == 0) - Size = 1; // skip illegible bytes - } - - } - MCFunction f(Name); - // Create basic blocks. + { + DenseSet<uint64_t> VisitedInsts; + SmallVector<uint64_t, 16> WorkList; + WorkList.push_back(Start); + // Disassemble code and gather basic block split points. + while (!WorkList.empty()) { + uint64_t Index = WorkList.pop_back_val(); + if (VisitedInsts.find(Index) != VisitedInsts.end()) + continue; + + for (;Index < End; Index += Size) { + MCInst Inst; + + if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){ + if (Ana->isBranch(Inst)) { + uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); + if (targ != -1ULL && targ == Index+Size) { + Instructions.push_back(MCDecodedInst(Index, Size, Inst)); + VisitedInsts.insert(Index); + continue; + } + if (targ != -1ULL) { + Splits.insert(targ); + WorkList.push_back(targ); + WorkList.push_back(Index+Size); + } + Splits.insert(Index+Size); + Instructions.push_back(MCDecodedInst(Index, Size, Inst)); + VisitedInsts.insert(Index); + break; + } else if (Ana->isReturn(Inst)) { + Splits.insert(Index+Size); + Instructions.push_back(MCDecodedInst(Index, Size, Inst)); + VisitedInsts.insert(Index); + break; + } else if (Ana->isCall(Inst)) { + uint64_t targ = Ana->evaluateBranch(Inst, Index, Size); + if (targ != -1ULL && targ != Index+Size) { + Calls.push_back(targ); + } + } + + Instructions.push_back(MCDecodedInst(Index, Size, Inst)); + VisitedInsts.insert(Index); + } else { + VisitedInsts.insert(Index); + errs().write_hex(Index) << ": warning: invalid instruction encoding\n"; + if (Size == 0) + Size = 1; // skip illegible bytes + } + } + } + } + + std::sort(Instructions.begin(), Instructions.end()); + + // Create basic blocks. unsigned ii = 0, ie = Instructions.size(); for (std::set<uint64_t>::iterator spi = Splits.begin(), - spe = Splits.end(); spi != spe; ++spi) { + spe = llvm::prior(Splits.end()); spi != spe; ++spi) { MCBasicBlock BB; - uint64_t BlockEnd = llvm::next(spi) == spe ? End : *llvm::next(spi); + uint64_t BlockEnd = *llvm::next(spi); // Add instructions to the BB. for (; ii != ie; ++ii) { if (Instructions[ii].Address < *spi || @@ -82,6 +111,8 @@ MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm, f.addBlock(*spi, BB); } + std::sort(f.Blocks.begin(), f.Blocks.end()); + // Calculate successors of each block. for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { MCBasicBlock &BB = i->second; @@ -94,16 +125,16 @@ MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm, // Indirect branch. Bail and add all blocks of the function as a // successor. for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) - BB.addSucc(&i->second); + BB.addSucc(i->first); } else if (targ != Inst.Address+Inst.Size) - BB.addSucc(&f.getBlockAtAddress(targ)); + BB.addSucc(targ); // Conditional branches can also fall through to the next block. if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e) - BB.addSucc(&llvm::next(i)->second); + BB.addSucc(llvm::next(i)->first); } else { // No branch. Fall through to the next block. if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e) - BB.addSucc(&llvm::next(i)->second); + BB.addSucc(llvm::next(i)->first); } } diff --git a/tools/llvm-objdump/MCFunction.h b/tools/llvm-objdump/MCFunction.h index 023ca391830..4677d91bb77 100644 --- a/tools/llvm-objdump/MCFunction.h +++ b/tools/llvm-objdump/MCFunction.h @@ -12,8 +12,11 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_OBJECTDUMP_MCFUNCTION_H +#define LLVM_OBJECTDUMP_MCFUNCTION_H + #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/MC/MCInst.h" #include <map> @@ -31,15 +34,20 @@ struct MCDecodedInst { uint64_t Size; MCInst Inst; + MCDecodedInst() {} MCDecodedInst(uint64_t Address, uint64_t Size, MCInst Inst) : Address(Address), Size(Size), Inst(Inst) {} + + bool operator<(const MCDecodedInst &RHS) const { + return Address < RHS.Address; + } }; /// MCBasicBlock - Consists of multiple MCDecodedInsts and a list of successing /// MCBasicBlocks. class MCBasicBlock { - SmallVector<MCDecodedInst, 8> Insts; - typedef SmallPtrSet<MCBasicBlock*, 8> SetTy; + std::vector<MCDecodedInst> Insts; + typedef DenseSet<uint64_t> SetTy; SetTy Succs; public: ArrayRef<MCDecodedInst> getInsts() const { return Insts; } @@ -48,10 +56,14 @@ public: succ_iterator succ_begin() const { return Succs.begin(); } succ_iterator succ_end() const { return Succs.end(); } - bool contains(MCBasicBlock *BB) const { return Succs.count(BB); } + bool contains(uint64_t Addr) const { return Succs.count(Addr); } void addInst(const MCDecodedInst &Inst) { Insts.push_back(Inst); } - void addSucc(MCBasicBlock *BB) { Succs.insert(BB); } + void addSucc(uint64_t Addr) { Succs.insert(Addr); } + + bool operator<(const MCBasicBlock &RHS) const { + return Insts.size() < RHS.Insts.size(); + } }; /// MCFunction - Represents a named function in machine code, containing @@ -59,7 +71,7 @@ public: class MCFunction { const StringRef Name; // Keep BBs sorted by address. - typedef std::map<uint64_t, MCBasicBlock> MapTy; + typedef std::vector<std::pair<uint64_t, MCBasicBlock> > MapTy; MapTy Blocks; public: MCFunction(StringRef Name) : Name(Name) {} @@ -68,7 +80,8 @@ public: static MCFunction createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm, const MemoryObject &Region, uint64_t Start, uint64_t End, - const MCInstrAnalysis *Ana, raw_ostream &DebugOut); + const MCInstrAnalysis *Ana, raw_ostream &DebugOut, + SmallVectorImpl<uint64_t> &Calls); typedef MapTy::iterator iterator; iterator begin() { return Blocks.begin(); } @@ -77,14 +90,11 @@ public: StringRef getName() const { return Name; } MCBasicBlock &addBlock(uint64_t Address, const MCBasicBlock &BB) { - assert(!Blocks.count(Address) && "Already a BB at address."); - return Blocks[Address] = BB; - } - - MCBasicBlock &getBlockAtAddress(uint64_t Address) { - assert(Blocks.count(Address) && "No BB at address."); - return Blocks[Address]; + Blocks.push_back(std::make_pair(Address, BB)); + return Blocks.back().second; } }; } + +#endif diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp new file mode 100644 index 00000000000..49687d1ceae --- /dev/null +++ b/tools/llvm-objdump/MachODump.cpp @@ -0,0 +1,489 @@ +//===-- MachODump.cpp - Object file dumping utility for llvm --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MachO-specific dumper for llvm-objdump. +// +//===----------------------------------------------------------------------===// + +#include "llvm-objdump.h" +#include "MCFunction.h" +#include "llvm/Support/MachO.h" +#include "llvm/Object/MachOObject.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/system_error.h" +#include <algorithm> +#include <cstring> +using namespace llvm; +using namespace object; + +static cl::opt<bool> + CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and" + "write it to a graphviz file (MachO-only)")); + +static const Target *GetTarget(const MachOObject *MachOObj) { + // Figure out the target triple. + llvm::Triple TT("unknown-unknown-unknown"); + switch (MachOObj->getHeader().CPUType) { + case llvm::MachO::CPUTypeI386: + TT.setArch(Triple::ArchType(Triple::x86)); + break; + case llvm::MachO::CPUTypeX86_64: + TT.setArch(Triple::ArchType(Triple::x86_64)); + break; + case llvm::MachO::CPUTypeARM: + TT.setArch(Triple::ArchType(Triple::arm)); + break; + case llvm::MachO::CPUTypePowerPC: + TT.setArch(Triple::ArchType(Triple::ppc)); + break; + case llvm::MachO::CPUTypePowerPC64: + TT.setArch(Triple::ArchType(Triple::ppc64)); + break; + } + + TripleName = TT.str(); + + // Get the target specific parser. + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + if (TheTarget) + return TheTarget; + + errs() << "llvm-objdump: error: unable to get target for '" << TripleName + << "', see --version and --triple.\n"; + return 0; +} + +struct Section { + char Name[16]; + uint64_t Address; + uint64_t Size; + uint32_t Offset; + uint32_t NumRelocs; + uint64_t RelocTableOffset; +}; + +struct Symbol { + uint64_t Value; + uint32_t StringIndex; + uint8_t SectionIndex; + bool operator<(const Symbol &RHS) const { return Value < RHS.Value; } +}; + +static void DumpAddress(uint64_t Address, ArrayRef<Section> Sections, + MachOObject *MachOObj, raw_ostream &OS) { + for (unsigned i = 0; i != Sections.size(); ++i) { + uint64_t addr = Address-Sections[i].Address; + if (Sections[i].Address <= Address && + Sections[i].Address + Sections[i].Size > Address) { + StringRef bytes = MachOObj->getData(Sections[i].Offset, + Sections[i].Size); + if (!strcmp(Sections[i].Name, "__cstring")) + OS << '"' << bytes.substr(addr, bytes.find('\0', addr)) << '"'; + if (!strcmp(Sections[i].Name, "__cfstring")) + OS << "@\"" << bytes.substr(addr, bytes.find('\0', addr)) << '"'; + } + } +} + +void llvm::DisassembleInputMachO(StringRef Filename) { + OwningPtr<MemoryBuffer> Buff; + + if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) { + errs() << "llvm-objdump: " << Filename << ": " << ec.message() << "\n"; + return; + } + + OwningPtr<MachOObject> MachOObj(MachOObject::LoadFromBuffer(Buff.take())); + + const Target *TheTarget = GetTarget(MachOObj.get()); + if (!TheTarget) { + // GetTarget prints out stuff. + return; + } + const MCInstrInfo *InstrInfo = TheTarget->createMCInstrInfo(); + OwningPtr<MCInstrAnalysis> + InstrAnalysis(TheTarget->createMCInstrAnalysis(InstrInfo)); + + // Set up disassembler. + OwningPtr<const MCAsmInfo> AsmInfo(TheTarget->createMCAsmInfo(TripleName)); + + if (!AsmInfo) { + errs() << "error: no assembly info for target " << TripleName << "\n"; + return; + } + + OwningPtr<const MCSubtargetInfo> + STI(TheTarget->createMCSubtargetInfo(TripleName, "", "")); + + if (!STI) { + errs() << "error: no subtarget info for target " << TripleName << "\n"; + return; + } + + OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI)); + if (!DisAsm) { + errs() << "error: no disassembler for target " << TripleName << "\n"; + return; + } + + int AsmPrinterVariant = AsmInfo->getAssemblerDialect(); + OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter( + AsmPrinterVariant, *AsmInfo, *STI)); + if (!IP) { + errs() << "error: no instruction printer for target " << TripleName << '\n'; + return; + } + + outs() << '\n'; + outs() << Filename << ":\n\n"; + + const macho::Header &Header = MachOObj->getHeader(); + + const MachOObject::LoadCommandInfo *SymtabLCI = 0; + for (unsigned i = 0; i != Header.NumLoadCommands; ++i) { + const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i); + switch (LCI.Command.Type) { + case macho::LCT_Symtab: + SymtabLCI = &LCI; + break; + } + } + + // Read and register the symbol table data. + InMemoryStruct<macho::SymtabLoadCommand> SymtabLC; + MachOObj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC); + MachOObj->RegisterStringTable(*SymtabLC); + + std::vector<Section> Sections; + std::vector<Symbol> Symbols; + std::vector<Symbol> UnsortedSymbols; // FIXME: duplication + SmallVector<uint64_t, 8> FoundFns; + + for (unsigned i = 0; i != Header.NumLoadCommands; ++i) { + const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i); + if (LCI.Command.Type == macho::LCT_Segment) { + InMemoryStruct<macho::SegmentLoadCommand> SegmentLC; + MachOObj->ReadSegmentLoadCommand(LCI, SegmentLC); + + for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) { + InMemoryStruct<macho::Section> Sect; + MachOObj->ReadSection(LCI, SectNum, Sect); + + Section S; + memcpy(S.Name, Sect->Name, 16); + S.Address = Sect->Address; + S.Size = Sect->Size; + S.Offset = Sect->Offset; + S.NumRelocs = Sect->NumRelocationTableEntries; + S.RelocTableOffset = Sect->RelocationTableOffset; + Sections.push_back(S); + + for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { + InMemoryStruct<macho::SymbolTableEntry> STE; + MachOObj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE); + + Symbol S; + S.StringIndex = STE->StringIndex; + S.SectionIndex = STE->SectionIndex; + S.Value = STE->Value; + Symbols.push_back(S); + UnsortedSymbols.push_back(Symbols.back()); + } + } + } else if (LCI.Command.Type == macho::LCT_Segment64) { + InMemoryStruct<macho::Segment64LoadCommand> Segment64LC; + MachOObj->ReadSegment64LoadCommand(LCI, Segment64LC); + + for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) { + InMemoryStruct<macho::Section64> Sect64; + MachOObj->ReadSection64(LCI, SectNum, Sect64); + + Section S; + memcpy(S.Name, Sect64->Name, 16); + S.Address = Sect64->Address; + S.Size = Sect64->Size; + S.Offset = Sect64->Offset; + S.NumRelocs = Sect64->NumRelocationTableEntries; + S.RelocTableOffset = Sect64->RelocationTableOffset; + Sections.push_back(S); + + for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { + InMemoryStruct<macho::Symbol64TableEntry> STE; + MachOObj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE); + + Symbol S; + S.StringIndex = STE->StringIndex; + S.SectionIndex = STE->SectionIndex; + S.Value = STE->Value; + Symbols.push_back(S); + UnsortedSymbols.push_back(Symbols.back()); + } + } + } else if (LCI.Command.Type == macho::LCT_FunctionStarts) { + InMemoryStruct<macho::LinkeditDataLoadCommand> LLC; + MachOObj->ReadLinkeditDataLoadCommand(LCI, LLC); + + MachOObj->ReadULEB128s(LLC->DataOffset, FoundFns); + } + } + + std::map<uint64_t, MCFunction*> FunctionMap; + + // Sort the symbols by address, just in case they didn't come in that way. + array_pod_sort(Symbols.begin(), Symbols.end()); + +#ifndef NDEBUG + raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls(); +#else + raw_ostream &DebugOut = nulls(); +#endif + + SmallVector<MCFunction, 16> Functions; + + for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) { + if (strcmp(Sections[SectIdx].Name, "__text")) + continue; + + uint64_t VMAddr = Sections[SectIdx].Address - Sections[SectIdx].Offset; + for (unsigned i = 0, e = FoundFns.size(); i != e; ++i) + FunctionMap.insert(std::pair<uint64_t,MCFunction*>(FoundFns[i]+VMAddr,0)); + + StringRef Bytes = MachOObj->getData(Sections[SectIdx].Offset, + Sections[SectIdx].Size); + StringRefMemoryObject memoryObject(Bytes); + bool symbolTableWorked = false; + + std::vector<std::pair<uint64_t, uint32_t> > Relocs; + for (unsigned j = 0; j != Sections[SectIdx].NumRelocs; ++j) { + InMemoryStruct<macho::RelocationEntry> RE; + MachOObj->ReadRelocationEntry(Sections[SectIdx].RelocTableOffset, j, RE); + Relocs.push_back(std::make_pair(RE->Word0, RE->Word1 & 0xffffff)); + } + array_pod_sort(Relocs.begin(), Relocs.end()); + + for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) { + if ((unsigned)Symbols[SymIdx].SectionIndex - 1 != SectIdx) + continue; + + uint64_t Start = Symbols[SymIdx].Value - Sections[SectIdx].Address; + uint64_t End = (SymIdx+1 == Symbols.size() || + Symbols[SymIdx].SectionIndex != Symbols[SymIdx+1].SectionIndex) ? + Sections[SectIdx].Size : + Symbols[SymIdx+1].Value - Sections[SectIdx].Address; + uint64_t Size; + + if (Start >= End) + continue; + + symbolTableWorked = true; + + if (!CFG) { + outs() << MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex) + << ":\n"; + for (uint64_t Index = Start; Index < End; Index += Size) { + MCInst Inst; + + if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, + DebugOut, nulls())) { + outs() << format("%8llx:\t", Sections[SectIdx].Address + Index); + DumpBytes(StringRef(Bytes.data() + Index, Size)); + IP->printInst(&Inst, outs(), ""); + outs() << "\n"; + } else { + errs() << "llvm-objdump: warning: invalid instruction encoding\n"; + if (Size == 0) + Size = 1; // skip illegible bytes + } + } + } else { + // Create CFG and use it for disassembly. + SmallVector<uint64_t, 16> Calls; + MCFunction f = + MCFunction::createFunctionFromMC( + MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex), + DisAsm.get(), + memoryObject, Start, End, + InstrAnalysis.get(), DebugOut, + Calls); + + Functions.push_back(f); + FunctionMap[Start] = &Functions.back(); + + for (unsigned i = 0, e = Calls.size(); i != e; ++i) + FunctionMap.insert(std::pair<uint64_t, MCFunction*>(Calls[i], 0)); + } + } + + if (CFG) { + if (!symbolTableWorked) { + // Create CFG and use it for disassembly. + SmallVector<uint64_t, 16> Calls; + MCFunction f = + MCFunction::createFunctionFromMC("__TEXT", DisAsm.get(), + memoryObject, 0, Sections[SectIdx].Size, + InstrAnalysis.get(), DebugOut, + Calls); + + Functions.push_back(f); + FunctionMap[Sections[SectIdx].Offset] = &Functions.back(); + + for (unsigned i = 0, e = Calls.size(); i != e; ++i) + FunctionMap.insert(std::pair<uint64_t, MCFunction*>(Calls[i], 0)); + } + for (std::map<uint64_t, MCFunction*>::iterator mi = FunctionMap.begin(), + me = FunctionMap.end(); mi != me; ++mi) + if (mi->second == 0) { + SmallVector<uint64_t, 16> Calls; + MCFunction f = + MCFunction::createFunctionFromMC("unknown", DisAsm.get(), + memoryObject, mi->first, + Sections[SectIdx].Size, + InstrAnalysis.get(), DebugOut, + Calls); + Functions.push_back(f); + mi->second = &Functions.back(); + for (unsigned i = 0, e = Calls.size(); i != e; ++i) + if (FunctionMap.insert(std::pair<uint64_t, MCFunction*>(Calls[i],0)) + .second) + mi = FunctionMap.begin(); + } + + DenseSet<uint64_t> PrintedBlocks; + for (unsigned ffi = 0, ffe = Functions.size(); ffi != ffe; ++ffi) { + MCFunction &f = Functions[ffi]; + for (MCFunction::iterator fi = f.begin(), fe = f.end(); fi != fe; ++fi){ + if (!PrintedBlocks.insert(fi->first).second) + continue; + bool hasPreds = FunctionMap.find(fi->first) != FunctionMap.end(); + + // Only print blocks that have predecessors. + // FIXME: Slow. + for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe; + ++pi) + if (pi->second.contains(fi->first)) { + hasPreds = true; + break; + } + + // Data block. + if (!hasPreds && fi != f.begin()) { + uint64_t End = llvm::next(fi) == fe ? Sections[SectIdx].Size : + llvm::next(fi)->first; + outs() << "# " << End-fi->first << " bytes of data:\n"; + for (unsigned pos = fi->first; pos != End; ++pos) { + outs() << format("%8x:\t", Sections[SectIdx].Address + pos); + DumpBytes(StringRef(Bytes.data() + pos, 1)); + outs() << format("\t.byte 0x%02x\n", (uint8_t)Bytes[pos]); + } + continue; + } + + if (fi->second.contains(fi->first)) + outs() << "# Loop begin:\n"; + + for (unsigned ii = 0, ie = fi->second.getInsts().size(); ii != ie; + ++ii) { + const MCDecodedInst &Inst = fi->second.getInsts()[ii]; + if (FunctionMap.find(Sections[SectIdx].Address + Inst.Address) != + FunctionMap.end()) + outs() << FunctionMap[Sections[SectIdx].Address + Inst.Address]-> + getName() << ":\n"; + outs() << format("%8llx:\t", Sections[SectIdx].Address + + Inst.Address); + DumpBytes(StringRef(Bytes.data() + Inst.Address, Inst.Size)); + // Simple loops. + if (fi->second.contains(fi->first)) + outs() << '\t'; + IP->printInst(&Inst.Inst, outs(), ""); + for (unsigned j = 0; j != Relocs.size(); ++j) + if (Relocs[j].first >= Sections[SectIdx].Address + Inst.Address && + Relocs[j].first < Sections[SectIdx].Address + Inst.Address + + Inst.Size) { + outs() << "\t# " + << MachOObj->getStringAtIndex( + UnsortedSymbols[Relocs[j].second].StringIndex) + << ' '; + DumpAddress(UnsortedSymbols[Relocs[j].second].Value, Sections, + MachOObj.get(), outs()); + } + uint64_t targ = InstrAnalysis->evaluateBranch(Inst.Inst, + Inst.Address, + Inst.Size); + if (targ != -1ULL) + DumpAddress(targ, Sections, MachOObj.get(), outs()); + + outs() << '\n'; + } + } + + // Start a new dot file. + std::string Error; + raw_fd_ostream Out((f.getName().str() + ".dot").c_str(), Error); + if (!Error.empty()) { + errs() << "llvm-objdump: warning: " << Error << '\n'; + continue; + } + + Out << "digraph " << f.getName() << " {\n"; + Out << "graph [ rankdir = \"LR\" ];\n"; + for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { + bool hasPreds = false; + // Only print blocks that have predecessors. + // FIXME: Slow. + for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe; + ++pi) + if (pi->second.contains(i->first)) { + hasPreds = true; + break; + } + + if (!hasPreds && i != f.begin()) + continue; + + Out << '"' << i->first << "\" [ label=\"<a>"; + // Print instructions. + for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie; + ++ii) { + // Escape special chars and print the instruction in mnemonic form. + std::string Str; + raw_string_ostream OS(Str); + IP->printInst(&i->second.getInsts()[ii].Inst, OS, ""); + Out << DOT::EscapeString(OS.str()) << '|'; + } + Out << "<o>\" shape=\"record\" ];\n"; + + // Add edges. + for (MCBasicBlock::succ_iterator si = i->second.succ_begin(), + se = i->second.succ_end(); si != se; ++si) + Out << i->first << ":o -> " << *si <<":a\n"; + } + Out << "}\n"; + } + } + } +} diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index 4cfd4f4164a..78824e9e067 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm-objdump.h" #include "MCFunction.h" #include "llvm/Object/ObjectFile.h" #include "llvm/ADT/OwningPtr.h" @@ -46,39 +47,37 @@ using namespace llvm; using namespace object; -namespace { - cl::list<std::string> - InputFilenames(cl::Positional, cl::desc("<input object files>"), - cl::ZeroOrMore); +static cl::list<std::string> +InputFilenames(cl::Positional, cl::desc("<input object files>"),cl::ZeroOrMore); - cl::opt<bool> - Disassemble("disassemble", - cl::desc("Display assembler mnemonics for the machine instructions")); - cl::alias - Disassembled("d", cl::desc("Alias for --disassemble"), - cl::aliasopt(Disassemble)); +static cl::opt<bool> +Disassemble("disassemble", + cl::desc("Display assembler mnemonics for the machine instructions")); +static cl::alias +Disassembled("d", cl::desc("Alias for --disassemble"), + cl::aliasopt(Disassemble)); - cl::opt<bool> - CFG("cfg", cl::desc("Create a CFG for every symbol in the object file and" - "write it to a graphviz file")); +static cl::opt<bool> +MachO("macho", cl::desc("Use MachO specific object file parser")); +static cl::alias +MachOm("m", cl::desc("Alias for --macho"), cl::aliasopt(MachO)); - cl::opt<std::string> - TripleName("triple", cl::desc("Target triple to disassemble for, " +cl::opt<std::string> +llvm::TripleName("triple", cl::desc("Target triple to disassemble for, " + "see -version for available targets")); + +cl::opt<std::string> +llvm::ArchName("arch", cl::desc("Target arch to disassemble for, " "see -version for available targets")); - cl::opt<std::string> - ArchName("arch", cl::desc("Target arch to disassemble for, " - "see -version for available targets")); +static StringRef ToolName; - StringRef ToolName; +static bool error(error_code ec) { + if (!ec) return false; - bool error(error_code ec) { - if (!ec) return false; - - outs() << ToolName << ": error reading file: " << ec.message() << ".\n"; - outs().flush(); - return true; - } + outs() << ToolName << ": error reading file: " << ec.message() << ".\n"; + outs().flush(); + return true; } static const Target *GetTarget(const ObjectFile *Obj = NULL) { @@ -106,27 +105,8 @@ static const Target *GetTarget(const ObjectFile *Obj = NULL) { return 0; } -namespace { -class StringRefMemoryObject : public MemoryObject { -private: - StringRef Bytes; -public: - StringRefMemoryObject(StringRef bytes) : Bytes(bytes) {} - - uint64_t getBase() const { return 0; } - uint64_t getExtent() const { return Bytes.size(); } - - int readByte(uint64_t Addr, uint8_t *Byte) const { - if (Addr >= getExtent()) - return -1; - *Byte = Bytes[Addr]; - return 0; - } -}; -} - -static void DumpBytes(StringRef bytes) { - static char hex_rep[] = "0123456789abcdef"; +void llvm::DumpBytes(StringRef bytes) { + static const char hex_rep[] = "0123456789abcdef"; // FIXME: The real way to do this is to figure out the longest instruction // and align to that size before printing. I'll fix this when I get // around to outputting relocations. @@ -151,7 +131,7 @@ static void DumpBytes(StringRef bytes) { outs() << output; } -static void DisassembleInput(const StringRef &Filename) { +void llvm::DisassembleInputLibObject(StringRef Filename) { OwningPtr<MemoryBuffer> Buff; if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) { @@ -259,118 +239,22 @@ static void DisassembleInput(const StringRef &Filename) { raw_ostream &DebugOut = nulls(); #endif - if (!CFG) { - for (Index = Start; Index < End; Index += Size) { - MCInst Inst; + for (Index = Start; Index < End; Index += Size) { + MCInst Inst; - if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, - DebugOut, nulls())) { - uint64_t addr; - if (error(i->getAddress(addr))) break; - outs() << format("%8x:\t", addr + Index); - DumpBytes(StringRef(Bytes.data() + Index, Size)); - IP->printInst(&Inst, outs(), ""); - outs() << "\n"; - } else { - errs() << ToolName << ": warning: invalid instruction encoding\n"; - if (Size == 0) - Size = 1; // skip illegible bytes - } + if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, + DebugOut, nulls())) { + uint64_t addr; + if (error(i->getAddress(addr))) break; + outs() << format("%8x:\t", addr + Index); + DumpBytes(StringRef(Bytes.data() + Index, Size)); + IP->printInst(&Inst, outs(), ""); + outs() << "\n"; + } else { + errs() << ToolName << ": warning: invalid instruction encoding\n"; + if (Size == 0) + Size = 1; // skip illegible bytes } - - } else { - // Create CFG and use it for disassembly. - MCFunction f = - MCFunction::createFunctionFromMC(Symbols[si].second, DisAsm.get(), - memoryObject, Start, End, - InstrAnalysis.get(), DebugOut); - - for (MCFunction::iterator fi = f.begin(), fe = f.end(); fi != fe; ++fi){ - bool hasPreds = false; - // Only print blocks that have predecessors. - // FIXME: Slow. - for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe; - ++pi) - if (pi->second.contains(&fi->second)) { - hasPreds = true; - break; - } - - // Data block. - if (!hasPreds && fi != f.begin()) { - uint64_t End = llvm::next(fi) == fe ? SectSize : - llvm::next(fi)->first; - uint64_t addr; - if (error(i->getAddress(addr))) break; - outs() << "# " << End-fi->first << " bytes of data:\n"; - for (unsigned pos = fi->first; pos != End; ++pos) { - outs() << format("%8x:\t", addr + pos); - DumpBytes(StringRef(Bytes.data() + pos, 1)); - outs() << format("\t.byte 0x%02x\n", (uint8_t)Bytes[pos]); - } - continue; - } - - if (fi->second.contains(&fi->second)) - outs() << "# Loop begin:\n"; - - for (unsigned ii = 0, ie = fi->second.getInsts().size(); ii != ie; - ++ii) { - uint64_t addr; - if (error(i->getAddress(addr))) break; - const MCDecodedInst &Inst = fi->second.getInsts()[ii]; - outs() << format("%8x:\t", addr + Inst.Address); - DumpBytes(StringRef(Bytes.data() + Inst.Address, Inst.Size)); - // Simple loops. - if (fi->second.contains(&fi->second)) - outs() << '\t'; - IP->printInst(&Inst.Inst, outs(), ""); - outs() << '\n'; - } - } - - // Start a new dot file. - std::string Error; - raw_fd_ostream Out((f.getName().str() + ".dot").c_str(), Error); - if (!Error.empty()) { - errs() << ToolName << ": warning: " << Error << '\n'; - continue; - } - - Out << "digraph " << f.getName() << " {\n"; - Out << "graph [ rankdir = \"LR\" ];\n"; - for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) { - bool hasPreds = false; - // Only print blocks that have predecessors. - // FIXME: Slow. - for (MCFunction::iterator pi = f.begin(), pe = f.end(); pi != pe; - ++pi) - if (pi->second.contains(&i->second)) { - hasPreds = true; - break; - } - - if (!hasPreds && i != f.begin()) - continue; - - Out << '"' << (uintptr_t)&i->second << "\" [ label=\"<a>"; - // Print instructions. - for (unsigned ii = 0, ie = i->second.getInsts().size(); ii != ie; - ++ii) { - // Escape special chars and print the instruction in mnemonic form. - std::string Str; - raw_string_ostream OS(Str); - IP->printInst(&i->second.getInsts()[ii].Inst, OS, ""); - Out << DOT::EscapeString(OS.str()) << '|'; - } - Out << "<o>\" shape=\"record\" ];\n"; - - // Add edges. - for (MCBasicBlock::succ_iterator si = i->second.succ_begin(), - se = i->second.succ_end(); si != se; ++si) - Out << (uintptr_t)&i->second << ":o -> " << (uintptr_t)*si <<":a\n"; - } - Out << "}\n"; } } } @@ -404,8 +288,12 @@ int main(int argc, char **argv) { return 2; } - std::for_each(InputFilenames.begin(), InputFilenames.end(), - DisassembleInput); + if (MachO) + std::for_each(InputFilenames.begin(), InputFilenames.end(), + DisassembleInputMachO); + else + std::for_each(InputFilenames.begin(), InputFilenames.end(), + DisassembleInputLibObject); return 0; } diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h new file mode 100644 index 00000000000..63108d56ecd --- /dev/null +++ b/tools/llvm-objdump/llvm-objdump.h @@ -0,0 +1,47 @@ +//===-- llvm-objdump.h ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJDUMP_H +#define LLVM_OBJDUMP_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/MemoryObject.h" + +namespace llvm { + +extern cl::opt<std::string> TripleName; +extern cl::opt<std::string> ArchName; + +// Various helper functions. +void DumpBytes(StringRef bytes); +void DisassembleInputLibObject(StringRef Filename); +void DisassembleInputMachO(StringRef Filename); + +class StringRefMemoryObject : public MemoryObject { +private: + StringRef Bytes; +public: + StringRefMemoryObject(StringRef bytes) : Bytes(bytes) {} + + uint64_t getBase() const { return 0; } + uint64_t getExtent() const { return Bytes.size(); } + + int readByte(uint64_t Addr, uint8_t *Byte) const { + if (Addr >= getExtent()) + return -1; + *Byte = Bytes[Addr]; + return 0; + } +}; + +} + +#endif