diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h index dddc9c1c8c6..29f737571fb 100644 --- a/include/llvm/Object/MachO.h +++ b/include/llvm/Object/MachO.h @@ -100,6 +100,42 @@ private: }; typedef content_iterator export_iterator; +/// MachORebaseEntry encapsulates the current state in the decompression of +/// rebasing opcodes. This allows you to iterate through the compressed table of +/// rebasing using: +/// for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable()) { +/// } +class MachORebaseEntry { +public: + MachORebaseEntry(ArrayRef opcodes, bool is64Bit); + + uint32_t segmentIndex() const; + uint64_t segmentOffset() const; + StringRef typeName() const; + + bool operator==(const MachORebaseEntry &) const; + + void moveNext(); + +private: + friend class MachOObjectFile; + void moveToFirst(); + void moveToEnd(); + uint64_t readULEB128(); + + ArrayRef Opcodes; + const uint8_t *Ptr; + uint64_t SegmentOffset; + uint32_t SegmentIndex; + uint64_t RemainingLoopCount; + uint64_t AdvanceAmount; + uint8_t RebaseType; + uint8_t PointerSize; + bool Malformed; + bool Done; +}; +typedef content_iterator rebase_iterator; + class MachOObjectFile : public ObjectFile { public: struct LoadCommandInfo { @@ -202,6 +238,13 @@ public: /// For use examining a trie not in a MachOObjectFile. static iterator_range exports(ArrayRef Trie); + /// For use iterating over all rebase table entries. + iterator_range rebaseTable() const; + + /// For use examining rebase opcodes not in a MachOObjectFile. + static iterator_range rebaseTable(ArrayRef Opcodes, + bool is64); + // In a MachO file, sections have a segment name. This is used in the .o // files. They have a single segment, but this field specifies which segment // a section should be put in in the final object. diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index c6815ea27d2..7e7e8c49315 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" #include "llvm/Support/LEB128.h" @@ -1688,6 +1689,177 @@ iterator_range MachOObjectFile::exports() const { } +MachORebaseEntry::MachORebaseEntry(ArrayRef Bytes, bool is64Bit) + : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0), + RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0), + PointerSize(is64Bit ? 8 : 4), Malformed(false), Done(false) {} + +void MachORebaseEntry::moveToFirst() { + Ptr = Opcodes.begin(); + moveNext(); +} + +void MachORebaseEntry::moveToEnd() { + Ptr = Opcodes.end(); + RemainingLoopCount = 0; + Done = true; +} + +void MachORebaseEntry::moveNext() { + // If in the middle of some loop, move to next rebasing in loop. + SegmentOffset += AdvanceAmount; + if (RemainingLoopCount) { + --RemainingLoopCount; + return; + } + if (Ptr == Opcodes.end()) { + Done = true; + return; + } + bool More = true; + while (More && !Malformed) { + // Parse next opcode and set up next loop. + uint8_t Byte = *Ptr++; + uint8_t ImmValue = Byte & MachO::REBASE_IMMEDIATE_MASK; + uint8_t Opcode = Byte & MachO::REBASE_OPCODE_MASK; + switch (Opcode) { + case MachO::REBASE_OPCODE_DONE: + More = false; + Done = true; + moveToEnd(); + DEBUG_WITH_TYPE("mach-o-rebase", llvm::dbgs() << "REBASE_OPCODE_DONE\n"); + break; + case MachO::REBASE_OPCODE_SET_TYPE_IMM: + RebaseType = ImmValue; + DEBUG_WITH_TYPE( + "mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_SET_TYPE_IMM: " + << "RebaseType=" << (int) RebaseType << "\n"); + break; + case MachO::REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: + SegmentIndex = ImmValue; + SegmentOffset = readULEB128(); + DEBUG_WITH_TYPE( + "mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB: " + << "SegmentIndex=" << SegmentIndex << ", " + << format("SegmentOffset=0x%06X", SegmentOffset) + << "\n"); + break; + case MachO::REBASE_OPCODE_ADD_ADDR_ULEB: + SegmentOffset += readULEB128(); + DEBUG_WITH_TYPE("mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + break; + case MachO::REBASE_OPCODE_ADD_ADDR_IMM_SCALED: + SegmentOffset += ImmValue * PointerSize; + DEBUG_WITH_TYPE("mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_ADD_ADDR_IMM_SCALED: " + << format("SegmentOffset=0x%06X", + SegmentOffset) << "\n"); + break; + case MachO::REBASE_OPCODE_DO_REBASE_IMM_TIMES: + AdvanceAmount = PointerSize; + RemainingLoopCount = ImmValue - 1; + DEBUG_WITH_TYPE( + "mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_IMM_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES: + AdvanceAmount = PointerSize; + RemainingLoopCount = readULEB128() - 1; + DEBUG_WITH_TYPE( + "mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: + AdvanceAmount = readULEB128() + PointerSize; + RemainingLoopCount = 0; + DEBUG_WITH_TYPE( + "mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + case MachO::REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: + RemainingLoopCount = readULEB128() - 1; + AdvanceAmount = readULEB128() + PointerSize; + DEBUG_WITH_TYPE( + "mach-o-rebase", + llvm::dbgs() << "REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB: " + << format("SegmentOffset=0x%06X", SegmentOffset) + << ", AdvanceAmount=" << AdvanceAmount + << ", RemainingLoopCount=" << RemainingLoopCount + << "\n"); + return; + default: + Malformed = true; + } + } +} + +uint64_t MachORebaseEntry::readULEB128() { + unsigned Count; + uint64_t Result = decodeULEB128(Ptr, &Count); + Ptr += Count; + if (Ptr > Opcodes.end()) { + Ptr = Opcodes.end(); + Malformed = true; + } + return Result; +} + +uint32_t MachORebaseEntry::segmentIndex() const { return SegmentIndex; } + +uint64_t MachORebaseEntry::segmentOffset() const { return SegmentOffset; } + +StringRef MachORebaseEntry::typeName() const { + switch (RebaseType) { + case MachO::REBASE_TYPE_POINTER: + return "pointer"; + case MachO::REBASE_TYPE_TEXT_ABSOLUTE32: + return "text abs32"; + case MachO::REBASE_TYPE_TEXT_PCREL32: + return "text rel32"; + } + return "unknown"; +} + +bool MachORebaseEntry::operator==(const MachORebaseEntry &Other) const { + assert(Opcodes == Other.Opcodes && "compare iterators of different files"); + return (Ptr == Other.Ptr) && + (RemainingLoopCount == Other.RemainingLoopCount) && + (Done == Other.Done); +} + +iterator_range +MachOObjectFile::rebaseTable(ArrayRef Opcodes, bool is64) { + MachORebaseEntry Start(Opcodes, is64); + Start.moveToFirst(); + + MachORebaseEntry Finish(Opcodes, is64); + Finish.moveToEnd(); + + return iterator_range(rebase_iterator(Start), + rebase_iterator(Finish)); +} + +iterator_range MachOObjectFile::rebaseTable() const { + return rebaseTable(getDyldInfoRebaseOpcodes(), is64Bit()); +} + StringRef MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const { ArrayRef Raw = getSectionRawFinalSegmentName(Sec); diff --git a/test/tools/llvm-objdump/Inputs/rebase.macho-x86_64 b/test/tools/llvm-objdump/Inputs/rebase.macho-x86_64 new file mode 100755 index 00000000000..05062d82483 Binary files /dev/null and b/test/tools/llvm-objdump/Inputs/rebase.macho-x86_64 differ diff --git a/test/tools/llvm-objdump/macho-rebase.test b/test/tools/llvm-objdump/macho-rebase.test new file mode 100644 index 00000000000..98e7f32f7d3 --- /dev/null +++ b/test/tools/llvm-objdump/macho-rebase.test @@ -0,0 +1,15 @@ +# RUN: llvm-objdump -macho -rebase -arch x86_64 \ +# RUN: %p/Inputs/rebase.macho-x86_64 2>/dev/null | FileCheck %s + + +# CHECK: segment section address type +# CHECK: __DATA __data 0x00001010 pointer +# CHECK: __DATA __data 0x00001028 pointer +# CHECK: __DATA __data 0x00001030 pointer +# CHECK: __DATA __data 0x00001038 pointer +# CHECK: __DATA __data 0x00001040 pointer +# CHECK: __DATA __data 0x00001258 pointer +# CHECK: __DATA __mystuff 0x00001278 pointer +# CHECK: __DATA __mystuff 0x00001288 pointer +# CHECK: __DATA __mystuff 0x00001298 pointer +# CHECK: __DATA __mystuff 0x000012A8 pointer diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index 34933eb7071..debe33e9cfa 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -2146,3 +2146,109 @@ void llvm::printMachOExportsTrie(const object::MachOObjectFile *Obj) { outs() << "\n"; } } + + +//===----------------------------------------------------------------------===// +// rebase table dumping +//===----------------------------------------------------------------------===// + +namespace { +class SegInfo { +public: + SegInfo(const object::MachOObjectFile *Obj); + + StringRef segmentName(uint32_t SegIndex); + StringRef sectionName(uint32_t SegIndex, uint64_t SegOffset); + uint64_t address(uint32_t SegIndex, uint64_t SegOffset); + +private: + struct SectionInfo { + uint64_t Address; + uint64_t Size; + StringRef SectionName; + StringRef SegmentName; + uint64_t OffsetInSegment; + uint64_t SegmentStartAddress; + uint32_t SegmentIndex; + }; + const SectionInfo &findSection(uint32_t SegIndex, uint64_t SegOffset); + SmallVector Sections; +}; +} + +SegInfo::SegInfo(const object::MachOObjectFile *Obj) { + // Build table of sections so segIndex/offset pairs can be translated. + uint32_t CurSegIndex = 0; + StringRef CurSegName; + uint64_t CurSegAddress; + for (const SectionRef &Section : Obj->sections()) { + SectionInfo Info; + if (error(Section.getName(Info.SectionName))) + return; + if (error(Section.getAddress(Info.Address))) + return; + if (error(Section.getSize(Info.Size))) + return; + Info.SegmentName = + Obj->getSectionFinalSegmentName(Section.getRawDataRefImpl()); + if (!Info.SegmentName.equals(CurSegName)) { + ++CurSegIndex; + CurSegName = Info.SegmentName; + CurSegAddress = Info.Address; + } + Info.SegmentIndex = CurSegIndex - 1; + Info.OffsetInSegment = Info.Address - CurSegAddress; + Info.SegmentStartAddress = CurSegAddress; + Sections.push_back(Info); + } +} + +StringRef SegInfo::segmentName(uint32_t SegIndex) { + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex == SegIndex) + return SI.SegmentName; + } + llvm_unreachable("invalid segIndex"); +} + +const SegInfo::SectionInfo &SegInfo::findSection(uint32_t SegIndex, + uint64_t OffsetInSeg) { + for (const SectionInfo &SI : Sections) { + if (SI.SegmentIndex != SegIndex) + continue; + if (SI.OffsetInSegment > OffsetInSeg) + continue; + if (OffsetInSeg >= (SI.OffsetInSegment + SI.Size)) + continue; + return SI; + } + llvm_unreachable("segIndex and offset not in any section"); +} + +StringRef SegInfo::sectionName(uint32_t SegIndex, uint64_t OffsetInSeg) { + return findSection(SegIndex, OffsetInSeg).SectionName; +} + +uint64_t SegInfo::address(uint32_t SegIndex, uint64_t OffsetInSeg) { + const SectionInfo &SI = findSection(SegIndex, OffsetInSeg); + return SI.SegmentStartAddress + OffsetInSeg; +} + +void llvm::printMachORebaseTable(const object::MachOObjectFile *Obj) { + // Build table of sections so names can used in final output. + SegInfo sectionTable(Obj); + + outs() << "segment section address type\n"; + for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable()) { + uint32_t SegIndex = Entry.segmentIndex(); + uint64_t OffsetInSeg = Entry.segmentOffset(); + StringRef SegmentName = sectionTable.segmentName(SegIndex); + StringRef SectionName = sectionTable.sectionName(SegIndex, OffsetInSeg); + uint64_t Address = sectionTable.address(SegIndex, OffsetInSeg); + + // Table lines look like: __DATA __nl_symbol_ptr 0x0000F00C pointer + outs() << format("%-8s %-18s 0x%08X %s\n", SegmentName.str().c_str(), + SectionName.str().c_str(), Address, + Entry.typeName().str().c_str()); + } +} diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp index 7debbe9a961..2b70167d135 100644 --- a/tools/llvm-objdump/llvm-objdump.cpp +++ b/tools/llvm-objdump/llvm-objdump.cpp @@ -81,6 +81,9 @@ SymbolTable("t", cl::desc("Display the symbol table")); static cl::opt ExportsTrie("exports-trie", cl::desc("Display mach-o exported symbols")); +static cl::opt +Rebase("rebase", cl::desc("Display mach-o rebasing info")); + static cl::opt MachOOpt("macho", cl::desc("Use MachO specific object file parser")); static cl::alias @@ -720,6 +723,18 @@ static void printExportsTrie(const ObjectFile *o) { } } +static void printRebaseTable(const ObjectFile *o) { + outs() << "Rebase table:\n"; + if (const MachOObjectFile *MachO = dyn_cast(o)) + printMachORebaseTable(MachO); + else { + errs() << "This operation is only currently supported " + "for Mach-O executable files.\n"; + return; + } +} + + static void printPrivateFileHeader(const ObjectFile *o) { if (o->isELF()) { printELFFileHeader(o); @@ -751,6 +766,8 @@ static void DumpObject(const ObjectFile *o) { printPrivateFileHeader(o); if (ExportsTrie) printExportsTrie(o); + if (Rebase) + printRebaseTable(o); } /// @brief Dump each object file in \a a; @@ -833,7 +850,8 @@ int main(int argc, char **argv) { && !SymbolTable && !UnwindInfo && !PrivateHeaders - && !ExportsTrie) { + && !ExportsTrie + && !Rebase) { cl::PrintHelpMessage(); return 2; } diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h index c259042c39d..a328e635ee0 100644 --- a/tools/llvm-objdump/llvm-objdump.h +++ b/tools/llvm-objdump/llvm-objdump.h @@ -36,6 +36,7 @@ void DisassembleInputMachO(StringRef Filename); void printCOFFUnwindInfo(const object::COFFObjectFile* o); void printMachOUnwindInfo(const object::MachOObjectFile* o); void printMachOExportsTrie(const object::MachOObjectFile* o); +void printMachORebaseTable(const object::MachOObjectFile* o); void printELFFileHeader(const object::ObjectFile *o); void printCOFFFileHeader(const object::ObjectFile *o); void printMachOFileHeader(const object::ObjectFile *o);