From 8175bc3d3bc8567a90b9dd50d68c1423baffd63b Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Fri, 28 Jun 2013 08:15:40 +0000 Subject: [PATCH] llvm-symbolizer: add support for Mach-O universal binaries git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185137 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/CommandGuide/llvm-symbolizer.rst | 16 ++++ test/DebugInfo/Inputs/macho-universal | Bin 0 -> 16660 bytes test/DebugInfo/Inputs/macho-universal.cc | 10 ++ test/DebugInfo/llvm-symbolizer.test | 18 +++- tools/llvm-symbolizer/LLVMSymbolize.cpp | 109 ++++++++++++++-------- tools/llvm-symbolizer/LLVMSymbolize.h | 28 +++++- tools/llvm-symbolizer/llvm-symbolizer.cpp | 6 +- 7 files changed, 143 insertions(+), 44 deletions(-) create mode 100755 test/DebugInfo/Inputs/macho-universal create mode 100644 test/DebugInfo/Inputs/macho-universal.cc diff --git a/docs/CommandGuide/llvm-symbolizer.rst b/docs/CommandGuide/llvm-symbolizer.rst index 73babb1e5c5..e03be9b1987 100644 --- a/docs/CommandGuide/llvm-symbolizer.rst +++ b/docs/CommandGuide/llvm-symbolizer.rst @@ -22,6 +22,8 @@ EXAMPLE a.out 0x4004f4 /tmp/b.out 0x400528 /tmp/c.so 0x710 + /tmp/mach_universal_binary:i386 0x1f84 + /tmp/mach_universal_binary:x86_64 0x100000f24 $ llvm-symbolizer < addr.txt main /tmp/a.cc:4 @@ -38,6 +40,12 @@ EXAMPLE main /tmp/source.cc:8 + _main + /tmp/source_i386.cc:8 + + _main + /tmp/source_x86_64.cc:8 + OPTIONS ------- @@ -59,6 +67,14 @@ OPTIONS If a source code location is in an inlined function, prints all the inlnied frames. Defaults to true. +.. option:: -default-arch + + If a binary contains object files for multiple architectures (e.g. it is a + Mach-O universal binary), symbolize the object file for a given architecture. + You can also specify architecture by writing ``binary_name:arch_name`` in the + input (see example above). If architecture is not specified in either way, + address will not be symbolized. Defaults to empty string. + EXIT STATUS ----------- diff --git a/test/DebugInfo/Inputs/macho-universal b/test/DebugInfo/Inputs/macho-universal new file mode 100755 index 0000000000000000000000000000000000000000..a161441802ddb8d2f66b4ce2e32ce8fbe36a308e GIT binary patch literal 16660 zcmeHN-)me&6rQ`;n#Hco7Ggx_v!|%ZfgosDAOc+mnQzdict7o}9pQL4A9RF6_AnEOZZNkV&}SfjbWKU~8gA_xcqf`A|(2nYg# zfFK|U2m*qDARq_`0#6bGe>}YZ2qfPD$*;zcINARt7@w)MDdQl%S*X^zlkd3a-LoH< z<(3cL4t5mAISxmyC*6-GTiwlhUL2Ls<^#kYC{i+_b=g}ghX#gi@%m5<)-%=xcHGW6 z##J?HYF#N^E|&7TSSoysjrMq%Fdi|W(C#K$d$YQ*Fg5Q5UhVO&hw)~^Ahd05o#Jlc z|K7y=@4A@@iyiQ3!7apV)GP)MH{sNTJ&eEaXr~L>*AQnMYy4jrfc^-Z_%$#d$G8`o zgGPneHc>CDc z(cBTNAtj{}PaA#N+FXOYhx(9rPMF!Cr)8z+-r z%ekHvY&b3FKD@G6_U4CAMbEteFXlOpPXgmhp`A6!D>W3cFJUes2nYg#fFK|U2m*qD zARq_`0=tL6xvanDFE{JU%};NUzxa&{Hw%TraNnV;=2GrwYxh#O+$L>*xsmlN>-FWv z-Cs90HnO$V)Po4P|T#?8lRk1&Qi)rC3_Rs zu|vMYJ94;{V*S*~eW}l5Z$0x=c#eJD+|2-SkAdb;>oYGwTRVSqkH2c{gzOZ%LFRM9?UY-RW#?W0hPHaC?9(+|ti zdV9&*pg|t07t57tJ&h*o?AZGc`COmfo+TQy%!R~X6kI`d4fkUjDedwI^eBWYS!QYh? zb?>ilJM%Ak>$b0cBE-)2)yGZJTYYufbI3qM5D)|e0YN|z5CjAPK|l}?1O$QIM&O*k zmaDr7-)+|2<{q-e+G-p>G1&ZlfxlL}H&Azz_0tLa8-vpg-+fSb8-D!A-Jj9NudNPr z|B(R+F-aHotB{a!-r+|ESZ$TZ@BiOn|Gx?zeP=MO{?EQY`6PS(d> %t.input RUN: echo "%p/Inputs/dwarfdump-test4.elf-x86-64 0x62c" >> %t.input RUN: echo "%p/Inputs/dwarfdump-inl-test.elf-x86-64 0x710" >> %t.input RUN: echo "\"%p/Inputs/dwarfdump-test3.elf-x86-64 space\" 0x633" >> %t.input +RUN: echo "%p/Inputs/macho-universal 0x1f84" >> %t.input +RUN: echo "%p/Inputs/macho-universal:i386 0x1f67" >> %t.input +RUN: echo "%p/Inputs/macho-universal:x86_64 0x100000f05" >> %t.input -RUN: llvm-symbolizer --functions --inlining --demangle=false < %t.input \ -RUN: | FileCheck %s +RUN: llvm-symbolizer --functions --inlining --demangle=false \ +RUN: --default-arch=i386 < %t.input | FileCheck %s REQUIRES: shell @@ -29,5 +32,16 @@ CHECK-NEXT: dwarfdump-inl-test.cc: CHECK: _Z3do1v CHECK-NEXT: dwarfdump-test3-decl.h:7 +CHECK: _main +CHECK: __Z3inci +CHECK: __Z3inci + RUN: echo "unexisting-file 0x1234" > %t.input2 RUN: llvm-symbolizer < %t.input2 + +RUN: echo "%p/Inputs/macho-universal 0x1f84" > %t.input3 +RUN: llvm-symbolizer < %t.input3 | FileCheck %s --check-prefix=UNKNOWN-ARCH + +UNKNOWN-ARCH-NOT: main +UNKNOWN-ARCH: ?? +UNKNOWN-ARCH-NOT: main diff --git a/tools/llvm-symbolizer/LLVMSymbolize.cpp b/tools/llvm-symbolizer/LLVMSymbolize.cpp index 7fccedf1c4e..74e9843c434 100644 --- a/tools/llvm-symbolizer/LLVMSymbolize.cpp +++ b/tools/llvm-symbolizer/LLVMSymbolize.cpp @@ -198,23 +198,10 @@ std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName, void LLVMSymbolizer::flush() { DeleteContainerSeconds(Modules); + DeleteContainerPointers(ParsedBinariesAndObjects); } -// Returns true if the object endianness is known. -static bool getObjectEndianness(const ObjectFile *Obj, bool &IsLittleEndian) { - // FIXME: Implement this when libLLVMObject allows to do it easily. - IsLittleEndian = true; - return true; -} - -static ObjectFile *getObjectFile(const std::string &Path) { - OwningPtr Buff; - if (error(MemoryBuffer::getFile(Path, Buff))) - return 0; - return ObjectFile::createObjectFile(Buff.take()); -} - -static std::string getDarwinDWARFResourceForModule(const std::string &Path) { +static std::string getDarwinDWARFResourceForPath(const std::string &Path) { StringRef Basename = sys::path::filename(Path); const std::string &DSymDirectory = Path + ".dSYM"; SmallString<16> ResourceName = StringRef(DSymDirectory); @@ -223,39 +210,85 @@ static std::string getDarwinDWARFResourceForModule(const std::string &Path) { return ResourceName.str(); } +LLVMSymbolizer::BinaryPair +LLVMSymbolizer::getOrCreateBinary(const std::string &Path) { + BinaryMapTy::iterator I = BinaryForPath.find(Path); + if (I != BinaryForPath.end()) + return I->second; + Binary *Bin = 0; + Binary *DbgBin = 0; + OwningPtr ParsedBinary; + OwningPtr ParsedDbgBinary; + if (!error(createBinary(Path, ParsedBinary))) { + // Check if it's a universal binary. + Bin = ParsedBinary.take(); + ParsedBinariesAndObjects.push_back(Bin); + if (Bin->isMachO() || Bin->isMachOUniversalBinary()) { + // On Darwin we may find DWARF in separate object file in + // resource directory. + const std::string &ResourcePath = + getDarwinDWARFResourceForPath(Path); + bool ResourceFileExists = false; + if (!sys::fs::exists(ResourcePath, ResourceFileExists) && + ResourceFileExists && + !error(createBinary(ResourcePath, ParsedDbgBinary))) { + DbgBin = ParsedDbgBinary.take(); + ParsedBinariesAndObjects.push_back(DbgBin); + } + } + } + if (DbgBin == 0) + DbgBin = Bin; + BinaryPair Res = std::make_pair(Bin, DbgBin); + BinaryForPath[Path] = Res; + return Res; +} + +ObjectFile * +LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin, const std::string &ArchName) { + if (Bin == 0) + return 0; + ObjectFile *Res = 0; + if (MachOUniversalBinary *UB = dyn_cast(Bin)) { + ObjectFileForArchMapTy::iterator I = ObjectFileForArch.find( + std::make_pair(UB, ArchName)); + if (I != ObjectFileForArch.end()) + return I->second; + OwningPtr ParsedObj; + if (!UB->getObjectForArch(Triple(ArchName).getArch(), ParsedObj)) { + Res = ParsedObj.take(); + ParsedBinariesAndObjects.push_back(Res); + } + ObjectFileForArch[std::make_pair(UB, ArchName)] = Res; + } else if (Bin->isObject()) { + Res = cast(Bin); + } + return Res; +} + ModuleInfo * LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { ModuleMapTy::iterator I = Modules.find(ModuleName); if (I != Modules.end()) return I->second; + std::string BinaryName = ModuleName; + std::string ArchName = Opts.DefaultArch; + size_t ColonPos = ModuleName.find(':'); + if (ColonPos != std::string::npos) { + BinaryName = ModuleName.substr(0, ColonPos); + ArchName = ModuleName.substr(ColonPos + 1); + } + BinaryPair Binaries = getOrCreateBinary(BinaryName); + ObjectFile *Obj = getObjectFileFromBinary(Binaries.first, ArchName); + ObjectFile *DbgObj = getObjectFileFromBinary(Binaries.second, ArchName); - ObjectFile *Obj = getObjectFile(ModuleName); if (Obj == 0) { - // Module name doesn't point to a valid object file. + // Failed to find valid object file. Modules.insert(make_pair(ModuleName, (ModuleInfo *)0)); return 0; } - - DIContext *Context = 0; - bool IsLittleEndian; - if (getObjectEndianness(Obj, IsLittleEndian)) { - // On Darwin we may find DWARF in separate object file in - // resource directory. - ObjectFile *DbgObj = Obj; - if (isa(Obj)) { - const std::string &ResourceName = - getDarwinDWARFResourceForModule(ModuleName); - bool ResourceFileExists = false; - if (!sys::fs::exists(ResourceName, ResourceFileExists) && - ResourceFileExists) { - if (ObjectFile *ResourceObj = getObjectFile(ResourceName)) - DbgObj = ResourceObj; - } - } - Context = DIContext::getDWARFContext(DbgObj); - assert(Context); - } - + DIContext *Context = DIContext::getDWARFContext(DbgObj); + assert(Context); ModuleInfo *Info = new ModuleInfo(Obj, Context); Modules.insert(make_pair(ModuleName, Info)); return Info; diff --git a/tools/llvm-symbolizer/LLVMSymbolize.h b/tools/llvm-symbolizer/LLVMSymbolize.h index 188331bfa67..c7f87b1429c 100644 --- a/tools/llvm-symbolizer/LLVMSymbolize.h +++ b/tools/llvm-symbolizer/LLVMSymbolize.h @@ -14,7 +14,9 @@ #define LLVM_SYMBOLIZE_H #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/DIContext.h" +#include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/MemoryBuffer.h" #include @@ -35,10 +37,13 @@ public: bool PrintFunctions : 1; bool PrintInlining : 1; bool Demangle : 1; + std::string DefaultArch; Options(bool UseSymbolTable = true, bool PrintFunctions = true, - bool PrintInlining = true, bool Demangle = true) + bool PrintInlining = true, bool Demangle = true, + std::string DefaultArch = "") : UseSymbolTable(UseSymbolTable), PrintFunctions(PrintFunctions), - PrintInlining(PrintInlining), Demangle(Demangle) { + PrintInlining(PrintInlining), Demangle(Demangle), + DefaultArch(DefaultArch) { } }; @@ -52,12 +57,29 @@ public: symbolizeData(const std::string &ModuleName, uint64_t ModuleOffset); void flush(); private: + typedef std::pair BinaryPair; + ModuleInfo *getOrCreateModuleInfo(const std::string &ModuleName); + /// \brief Returns pair of pointers to binary and debug binary. + BinaryPair getOrCreateBinary(const std::string &Path); + /// \brief Returns a parsed object file for a given architecture in a + /// universal binary (or the binary itself if it is an object file). + ObjectFile *getObjectFileFromBinary(Binary *Bin, const std::string &ArchName); + std::string printDILineInfo(DILineInfo LineInfo) const; void DemangleName(std::string &Name) const; + // Owns all the parsed binaries and object files. + SmallVector ParsedBinariesAndObjects; + // Owns module info objects. typedef std::map ModuleMapTy; ModuleMapTy Modules; + typedef std::map BinaryMapTy; + BinaryMapTy BinaryForPath; + typedef std::map, ObjectFile *> + ObjectFileForArchMapTy; + ObjectFileForArchMapTy ObjectFileForArch; + Options Opts; static const char kBadString[]; }; @@ -77,7 +99,7 @@ private: bool getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address, std::string &Name, uint64_t &Addr, uint64_t &Size) const; - OwningPtr Module; + ObjectFile *Module; OwningPtr DebugInfoContext; struct SymbolDesc { diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp index 0cafffaf712..c32e9494ea3 100644 --- a/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -47,6 +47,10 @@ ClPrintInlining("inlining", cl::init(true), static cl::opt ClDemangle("demangle", cl::init(true), cl::desc("Demangle function names")); +static cl::opt ClDefaultArch("default-arch", cl::init(""), + cl::desc("Default architecture " + "(for multi-arch objects)")); + static bool parseCommand(bool &IsData, std::string &ModuleName, uint64_t &ModuleOffset) { const char *kDataCmd = "DATA "; @@ -102,7 +106,7 @@ int main(int argc, char **argv) { cl::ParseCommandLineOptions(argc, argv, "llvm symbolizer for compiler-rt\n"); LLVMSymbolizer::Options Opts(ClUseSymbolTable, ClPrintFunctions, - ClPrintInlining, ClDemangle); + ClPrintInlining, ClDemangle, ClDefaultArch); LLVMSymbolizer Symbolizer(Opts); bool IsData = false;