From 0602444f70ea5d578817f68d7003d34c35522cff Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Sat, 31 Jan 2015 00:37:11 +0000 Subject: [PATCH] Add the -section option to llvm-objdump used with -macho that takes the argument segname,sectname to specify a Mach-O section to print. The printing is based on the section type or section attributes. The printing of the module initialization and termination section types is printed with this change. Printing of other section types will be added next. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227649 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/Inputs/dylibModInit.macho-x86_64 | Bin 0 -> 8256 bytes .../tools/llvm-objdump/X86/macho-section.test | 4 + tools/llvm-objdump/MachODump.cpp | 224 ++++++++++++++++-- tools/llvm-objdump/llvm-objdump.cpp | 3 +- tools/llvm-objdump/llvm-objdump.h | 1 + 5 files changed, 207 insertions(+), 25 deletions(-) create mode 100755 test/tools/llvm-objdump/X86/Inputs/dylibModInit.macho-x86_64 create mode 100644 test/tools/llvm-objdump/X86/macho-section.test diff --git a/test/tools/llvm-objdump/X86/Inputs/dylibModInit.macho-x86_64 b/test/tools/llvm-objdump/X86/Inputs/dylibModInit.macho-x86_64 new file mode 100755 index 0000000000000000000000000000000000000000..a39424aab312178e255795e662fc696132657fef GIT binary patch literal 8256 zcmeI1v1`*&5XbLVYZKGd{-8zFO8E^Uy0jUDW^A;D7DZzl!9@XI=s!gM$utaCz_U-S2Ym^Gk+%@8geeKXXLUDI%H? zEs8!D$Sh`F};pq9CYKjLAN)RvrdS`>L9%xV27b#rGA=S$|@+Bpq@Jv|7A z(GN}LZu_v=-s5Jw(UFDG`pTMbOCN$g6($&&GuNMTWB*aB?iNP#^)+8#pM&l9Hj*kc z!R?JG^G2ms2_ihNF(bGkuOQEaxz)iYn<+QC?I&aNQx z?y&Z7ZR|X8-+xg8u&OWoF#<-w2p9n)U<8bS z5ikNq;NKFszxC?N!w)Ba#M|h3E@^4uh+Y7_>vb8SYub-sj=ruaJwiO_n4(4TL89Xw zijLwaF6Qzlk{ta8^7el@L2sT(gJrcCp}-C@ciHQOTH`{jerp_0!F|H7y%<- z1dMY literal 0 HcmV?d00001 diff --git a/test/tools/llvm-objdump/X86/macho-section.test b/test/tools/llvm-objdump/X86/macho-section.test new file mode 100644 index 00000000000..720b9a402e1 --- /dev/null +++ b/test/tools/llvm-objdump/X86/macho-section.test @@ -0,0 +1,4 @@ +// RUN: llvm-objdump -m -section __DATA,__mod_init_func %p/Inputs/dylibModInit.macho-x86_64 | FileCheck %s -check-prefix=MODINIT + +MODINIT: Contents of (__DATA,__mod_init_func) section +MODINIT: 0x0000000000001000 0x0000000000000f30 _libinit diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index c996fc26ef2..042ac2dd8a5 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -91,6 +91,11 @@ cl::opt cl::desc("Print the linker optimization hints for " "Mach-O objects (requires -macho)")); +cl::list + llvm::DumpSections("section", + cl::desc("Prints the specified segment,section for " + "Mach-O objects (requires -macho)")); + static cl::list ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"), cl::ZeroOrMore); @@ -499,6 +504,190 @@ static void PrintLinkOptHints(MachOObjectFile *O) { } } +typedef DenseMap SymbolAddressMap; + +static void CreateSymbolAddressMap(MachOObjectFile *O, + SymbolAddressMap *AddrMap) { + // Create a map of symbol addresses to symbol names. + for (const SymbolRef &Symbol : O->symbols()) { + SymbolRef::Type ST; + Symbol.getType(ST); + if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data || + ST == SymbolRef::ST_Other) { + uint64_t Address; + Symbol.getAddress(Address); + StringRef SymName; + Symbol.getName(SymName); + (*AddrMap)[Address] = SymName; + } + } +} + +// GuessSymbolName is passed the address of what might be a symbol and a +// pointer to the SymbolAddressMap. It returns the name of a symbol +// with that address or nullptr if no symbol is found with that address. +static const char *GuessSymbolName(uint64_t value, SymbolAddressMap *AddrMap) { + const char *SymbolName = nullptr; + // A DenseMap can't lookup up some values. + if (value != 0xffffffffffffffffULL && value != 0xfffffffffffffffeULL) { + StringRef name = AddrMap->lookup(value); + if (!name.empty()) + SymbolName = name.data(); + } + return SymbolName; +} + +static void DumpInitTermPointerSection(MachOObjectFile *O, const char *sect, + uint32_t sect_size, uint64_t sect_addr, + SymbolAddressMap *AddrMap, + bool verbose) { + uint32_t stride; + if (O->is64Bit()) + stride = sizeof(uint64_t); + else + stride = sizeof(uint32_t); + for (uint32_t i = 0; i < sect_size; i += stride) { + const char *SymbolName = nullptr; + if (O->is64Bit()) { + outs() << format("0x%016" PRIx64, sect_addr + i * stride) << " "; + uint64_t pointer_value; + memcpy(&pointer_value, sect + i, stride); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(pointer_value); + outs() << format("0x%016" PRIx64, pointer_value); + if (verbose) + SymbolName = GuessSymbolName(pointer_value, AddrMap); + } else { + outs() << format("0x%08" PRIx64, sect_addr + i * stride) << " "; + uint32_t pointer_value; + memcpy(&pointer_value, sect + i, stride); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(pointer_value); + outs() << format("0x%08" PRIx32, pointer_value); + if (verbose) + SymbolName = GuessSymbolName(pointer_value, AddrMap); + } + if (SymbolName) + outs() << " " << SymbolName; + outs() << "\n"; + } +} + +static void DumpRawSectionContents(MachOObjectFile *O, const char *sect, + uint32_t size, uint64_t addr) { + uint32_t cputype = O->getHeader().cputype; + if (cputype == MachO::CPU_TYPE_I386 || cputype == MachO::CPU_TYPE_X86_64) { + uint32_t j; + for (uint32_t i = 0; i < size; i += j, addr += j) { + if (O->is64Bit()) + outs() << format("%016" PRIx64, addr) << "\t"; + else + outs() << format("%08" PRIx64, sect) << "\t"; + for (j = 0; j < 16 && i + j < size; j++) { + uint8_t byte_word = *(sect + i + j); + outs() << format("%02" PRIx32, (uint32_t)byte_word) << " "; + } + outs() << "\n"; + } + } else { + uint32_t j; + for (uint32_t i = 0; i < size; i += j, addr += j) { + if (O->is64Bit()) + outs() << format("%016" PRIx64, addr) << "\t"; + else + outs() << format("%08" PRIx64, sect) << "\t"; + for (j = 0; j < 4 * sizeof(int32_t) && i + j < size; + j += sizeof(int32_t)) { + if (i + j + sizeof(int32_t) < size) { + uint32_t long_word; + memcpy(&long_word, sect + i + j, sizeof(int32_t)); + if (O->isLittleEndian() != sys::IsLittleEndianHost) + sys::swapByteOrder(long_word); + outs() << format("%08" PRIx32, long_word) << " "; + } else { + for (uint32_t k = 0; i + j + k < size; k++) { + uint8_t byte_word = *(sect + i + j); + outs() << format("%02" PRIx32, (uint32_t)byte_word) << " "; + } + } + } + outs() << "\n"; + } + } +} + +static void DumpSectionContents(MachOObjectFile *O, bool verbose) { + SymbolAddressMap AddrMap; + if (verbose) + CreateSymbolAddressMap(O, &AddrMap); + + for (unsigned i = 0; i < DumpSections.size(); ++i) { + StringRef DumpSection = DumpSections[i]; + std::pair DumpSegSectName; + DumpSegSectName = DumpSection.split(','); + StringRef DumpSegName, DumpSectName; + if (DumpSegSectName.second.size()) { + DumpSegName = DumpSegSectName.first; + DumpSectName = DumpSegSectName.second; + } else { + DumpSegName = ""; + DumpSectName = DumpSegSectName.first; + } + for (const SectionRef &Section : O->sections()) { + StringRef SectName; + Section.getName(SectName); + DataRefImpl Ref = Section.getRawDataRefImpl(); + StringRef SegName = O->getSectionFinalSegmentName(Ref); + if ((DumpSegName.empty() || SegName == DumpSegName) && + (SectName == DumpSectName)) { + outs() << "Contents of (" << SegName << "," << SectName + << ") section\n"; + uint32_t section_type; + if (O->is64Bit()) { + const MachO::section_64 Sec = O->getSection64(Ref); + section_type = Sec.flags & MachO::SECTION_TYPE; + + } else { + const MachO::section Sec = O->getSection(Ref); + section_type = Sec.flags & MachO::SECTION_TYPE; + } + + StringRef BytesStr; + Section.getContents(BytesStr); + const char *sect = reinterpret_cast(BytesStr.data()); + uint32_t sect_size = BytesStr.size(); + uint64_t sect_addr = Section.getAddress(); + + if (verbose) { + switch (section_type) { + case MachO::S_REGULAR: + DumpRawSectionContents(O, sect, sect_size, sect_addr); + break; + case MachO::S_ZEROFILL: + outs() << "zerofill section and has no contents in the file\n"; + break; + case MachO::S_MOD_INIT_FUNC_POINTERS: + case MachO::S_MOD_TERM_FUNC_POINTERS: + DumpInitTermPointerSection(O, sect, sect_size, sect_addr, &AddrMap, + verbose); + break; + default: + outs() << "Unknown section type (" + << format("0x%08" PRIx32, section_type) << ")\n"; + DumpRawSectionContents(O, sect, sect_size, sect_addr); + break; + } + } else { + if (section_type == MachO::S_ZEROFILL) + outs() << "zerofill section and has no contents in the file\n"; + else + DumpRawSectionContents(O, sect, sect_size, sect_addr); + } + } + } + } +} + // checkMachOAndArchFlags() checks to see if the ObjectFile is a Mach-O file // and if it is and there is a list of architecture flags is specified then // check to make sure this Mach-O file is one of those architectures or all @@ -546,7 +735,8 @@ static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF, // info. And don't print it otherwise like in the case of printing the // UniversalHeaders or ArchiveHeaders. if (Disassemble || PrivateHeaders || ExportsTrie || Rebase || Bind || - LazyBind || WeakBind || IndirectSymbols || DataInCode || LinkOptHints) { + LazyBind || WeakBind || IndirectSymbols || DataInCode || LinkOptHints || + DumpSections.size() != 0) { outs() << Filename; if (!ArchiveMemberName.empty()) outs() << '(' << ArchiveMemberName << ')'; @@ -569,6 +759,8 @@ static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF, PrintSectionHeaders(MachOOF); if (SectionContents) PrintSectionContents(MachOOF); + if (DumpSections.size() != 0) + DumpSectionContents(MachOOF, true); if (SymbolTable) PrintSymbolTable(MachOOF); if (UnwindInfo) @@ -1045,7 +1237,6 @@ void llvm::ParseInputMachO(StringRef Filename) { << "Unrecognized file type.\n"; } -typedef DenseMap SymbolAddressMap; typedef std::pair BindInfoEntry; typedef std::vector BindTable; typedef BindTable::iterator bind_table_iterator; @@ -1066,21 +1257,6 @@ struct DisassembleInfo { BindTable *bindtable; }; -// GuessSymbolName is passed the address of what might be a symbol and a -// pointer to the DisassembleInfo struct. It returns the name of a symbol -// with that address or nullptr if no symbol is found with that address. -static const char *GuessSymbolName(uint64_t value, - struct DisassembleInfo *info) { - const char *SymbolName = nullptr; - // A DenseMap can't lookup up some values. - if (value != 0xffffffffffffffffULL && value != 0xfffffffffffffffeULL) { - StringRef name = info->AddrMap->lookup(value); - if (!name.empty()) - SymbolName = name.data(); - } - return SymbolName; -} - // SymbolizerGetOpInfo() is the operand information call back function. // This is called to get the symbolic information for operand(s) of an // instruction when it is being done. This routine does this from @@ -1171,8 +1347,8 @@ int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, } if (reloc_found && (r_type == MachO::GENERIC_RELOC_SECTDIFF || r_type == MachO::GENERIC_RELOC_LOCAL_SECTDIFF)) { - const char *add = GuessSymbolName(r_value, info); - const char *sub = GuessSymbolName(pair_r_value, info); + const char *add = GuessSymbolName(r_value, info->AddrMap); + const char *sub = GuessSymbolName(pair_r_value, info->AddrMap); uint32_t offset = value - (r_value - pair_r_value); op_info->AddSymbol.Present = 1; if (add != nullptr) @@ -1373,8 +1549,8 @@ int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_HI16; else op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16; - const char *add = GuessSymbolName(r_value, info); - const char *sub = GuessSymbolName(pair_r_value, info); + const char *add = GuessSymbolName(r_value, info->AddrMap); + const char *sub = GuessSymbolName(pair_r_value, info->AddrMap); int32_t offset = value - (r_value - pair_r_value); op_info->AddSymbol.Present = 1; if (add != nullptr) @@ -1403,7 +1579,7 @@ int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, op_info->VariantKind = LLVMDisassembler_VariantKind_ARM_LO16; } } - const char *add = GuessSymbolName(value, info); + const char *add = GuessSymbolName(value, info->AddrMap); if (add != nullptr) { op_info->AddSymbol.Name = add; return 1; @@ -1864,7 +2040,7 @@ const char *get_symbol_64(uint32_t sect_offset, SectionRef S, // // NOTE: need add passing the ReferenceValue to this routine. Then that code // would simply be this: - // SymbolName = GuessSymbolName(ReferenceValue, info); + // SymbolName = GuessSymbolName(ReferenceValue, info->AddrMap); return SymbolName; } @@ -2207,7 +2383,7 @@ const char *SymbolizerSymbolLookUp(void *DisInfo, uint64_t ReferenceValue, return nullptr; } - const char *SymbolName = GuessSymbolName(ReferenceValue, info); + const char *SymbolName = GuessSymbolName(ReferenceValue, info->AddrMap); if (*ReferenceType == LLVMDisassembler_ReferenceType_In_Branch) { *ReferenceName = GuessIndirectSymbol(ReferenceValue, info); diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index 4935f98eca1..6e62aaac235 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -906,7 +906,8 @@ int main(int argc, char **argv) { && !(ArchiveHeaders && MachOOpt) && !(IndirectSymbols && MachOOpt) && !(DataInCode && MachOOpt) - && !(LinkOptHints && MachOOpt)) { + && !(LinkOptHints && MachOOpt) + && !(DumpSections.size() != 0 && MachOOpt)) { cl::PrintHelpMessage(); return 2; } diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h index 402632daea7..19f842f381d 100644 --- a/tools/llvm-objdump/llvm-objdump.h +++ b/tools/llvm-objdump/llvm-objdump.h @@ -26,6 +26,7 @@ extern cl::opt TripleName; extern cl::opt ArchName; extern cl::opt MCPU; extern cl::list MAttrs; +extern cl::list DumpSections; extern cl::opt Disassemble; extern cl::opt NoShowRawInsn; extern cl::opt PrivateHeaders;