diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h index f3ba8ef7572..1b9faaa9fd9 100644 --- a/include/llvm/Object/MachO.h +++ b/include/llvm/Object/MachO.h @@ -25,6 +25,31 @@ namespace llvm { namespace object { +/// DiceRef - This is a value type class that represents a single +/// data in code entry in the table in a Mach-O object file. +class DiceRef { + DataRefImpl DicePimpl; + const ObjectFile *OwningObject; + +public: + DiceRef() : OwningObject(NULL) { } + + DiceRef(DataRefImpl DiceP, const ObjectFile *Owner); + + bool operator==(const DiceRef &Other) const; + bool operator<(const DiceRef &Other) const; + + error_code getNext(DiceRef &Result) const; + + error_code getOffset(uint32_t &Result) const; + error_code getLength(uint16_t &Result) const; + error_code getKind(uint16_t &Result) const; + + DataRefImpl getRawDataRefImpl() const; + const ObjectFile *getObjectFile() const; +}; +typedef content_iterator dice_iterator; + class MachOObjectFile : public ObjectFile { public: struct LoadCommandInfo { @@ -108,6 +133,9 @@ public: relocation_iterator getSectionRelBegin(unsigned Index) const; relocation_iterator getSectionRelEnd(unsigned Index) const; + dice_iterator begin_dices() const; + dice_iterator end_dices() const; + // In a MachO file, sections have a segment name. This is used in the .o // files. They have a single segment, but this field specifies which segment // a section should be put in in the final object. @@ -152,6 +180,7 @@ public: getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const; macho::RelocationEntry getRelocation(DataRefImpl Rel) const; + macho::DataInCodeTableEntry getDice(DataRefImpl Rel) const; macho::Header getHeader() const; macho::Header64Ext getHeader64Ext() const; macho::IndirectSymbolTableEntry @@ -161,6 +190,7 @@ public: unsigned Index) const; macho::SymtabLoadCommand getSymtabLoadCommand() const; macho::DysymtabLoadCommand getDysymtabLoadCommand() const; + macho::LinkeditDataLoadCommand getDataInCodeLoadCommand() const; StringRef getStringTableData() const; bool is64Bit() const; @@ -175,8 +205,66 @@ private: SectionList Sections; const char *SymtabLoadCmd; const char *DysymtabLoadCmd; + const char *DataInCodeLoadCmd; }; +/// DiceRef +inline DiceRef::DiceRef(DataRefImpl DiceP, const ObjectFile *Owner) + : DicePimpl(DiceP) , OwningObject(Owner) {} + +inline bool DiceRef::operator==(const DiceRef &Other) const { + return DicePimpl == Other.DicePimpl; +} + +inline bool DiceRef::operator<(const DiceRef &Other) const { + return DicePimpl < Other.DicePimpl; +} + +inline error_code DiceRef::getNext(DiceRef &Result) const { + DataRefImpl Rel = DicePimpl; + const macho::DataInCodeTableEntry *P = + reinterpret_cast(Rel.p); + Rel.p = reinterpret_cast(P + 1); + Result = DiceRef(Rel, OwningObject); + return object_error::success; +} + +// Since a Mach-O data in code reference, a DiceRef, can only be created when +// the OwningObject ObjectFile is a MachOObjectFile a static_cast<> is used for +// the methods that get the values of the fields of the reference. + +inline error_code DiceRef::getOffset(uint32_t &Result) const { + const MachOObjectFile *MachOOF = + static_cast(OwningObject); + macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl); + Result = Dice.Offset; + return object_error::success; +} + +inline error_code DiceRef::getLength(uint16_t &Result) const { + const MachOObjectFile *MachOOF = + static_cast(OwningObject); + macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl); + Result = Dice.Length; + return object_error::success; +} + +inline error_code DiceRef::getKind(uint16_t &Result) const { + const MachOObjectFile *MachOOF = + static_cast(OwningObject); + macho::DataInCodeTableEntry Dice = MachOOF->getDice(DicePimpl); + Result = Dice.Kind; + return object_error::success; +} + +inline DataRefImpl DiceRef::getRawDataRefImpl() const { + return DicePimpl; +} + +inline const ObjectFile *DiceRef::getObjectFile() const { + return OwningObject; +} + } } diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index bd5ea57c1f5..e62b5a48190 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -414,7 +414,7 @@ MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, bool IsLittleEndian, bool Is64bits, error_code &ec) : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object), - SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL) { + SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL), DataInCodeLoadCmd(NULL) { uint32_t LoadCommandCount = this->getHeader().NumLoadCommands; macho::LoadCommandType SegmentLoadType = is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment; @@ -427,6 +427,9 @@ MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, } else if (Load.C.Type == macho::LCT_Dysymtab) { assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables"); DysymtabLoadCmd = Load.Ptr; + } else if (Load.C.Type == macho::LCT_DataInCode) { + assert(!DataInCodeLoadCmd && "Multiple data in code tables"); + DataInCodeLoadCmd = Load.Ptr; } else if (Load.C.Type == SegmentLoadType) { uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load); for (unsigned J = 0; J < NumSections; ++J) { @@ -1328,6 +1331,27 @@ relocation_iterator MachOObjectFile::getSectionRelEnd(unsigned Index) const { return getSectionRelEnd(DRI); } +dice_iterator MachOObjectFile::begin_dices() const { + DataRefImpl DRI; + if (!DataInCodeLoadCmd) + return dice_iterator(DiceRef(DRI, this)); + + macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand(); + DRI.p = reinterpret_cast(getPtr(this, DicLC.DataOffset)); + return dice_iterator(DiceRef(DRI, this)); +} + +dice_iterator MachOObjectFile::end_dices() const { + DataRefImpl DRI; + if (!DataInCodeLoadCmd) + return dice_iterator(DiceRef(DRI, this)); + + macho::LinkeditDataLoadCommand DicLC = getDataInCodeLoadCommand(); + unsigned Offset = DicLC.DataOffset + DicLC.DataSize; + DRI.p = reinterpret_cast(getPtr(this, Offset)); + return dice_iterator(DiceRef(DRI, this)); +} + StringRef MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const { ArrayRef Raw = getSectionRawFinalSegmentName(Sec); @@ -1492,6 +1516,12 @@ MachOObjectFile::getRelocation(DataRefImpl Rel) const { return getStruct(this, P); } +macho::DataInCodeTableEntry +MachOObjectFile::getDice(DataRefImpl Rel) const { + const char *P = reinterpret_cast(Rel.p); + return getStruct(this, P); +} + macho::Header MachOObjectFile::getHeader() const { return getStruct(this, getPtr(this, 0)); } @@ -1524,6 +1554,20 @@ macho::DysymtabLoadCommand MachOObjectFile::getDysymtabLoadCommand() const { return getStruct(this, DysymtabLoadCmd); } +macho::LinkeditDataLoadCommand +MachOObjectFile::getDataInCodeLoadCommand() const { + if (DataInCodeLoadCmd) + return getStruct(this, DataInCodeLoadCmd); + + // If there is no DataInCodeLoadCmd return a load command with zero'ed fields. + macho::LinkeditDataLoadCommand Cmd; + Cmd.Type = macho::LCT_DataInCode; + Cmd.Size = macho::LinkeditLoadCommandSize; + Cmd.DataOffset = 0; + Cmd.DataSize = 0; + return Cmd; +} + StringRef MachOObjectFile::getStringTableData() const { macho::SymtabLoadCommand S = getSymtabLoadCommand(); return getData().substr(S.StringTableOffset, S.StringTableSize); diff --git a/test/Object/Inputs/macho-data-in-code.macho-thumbv7 b/test/Object/Inputs/macho-data-in-code.macho-thumbv7 new file mode 100644 index 00000000000..57649302dd9 Binary files /dev/null and b/test/Object/Inputs/macho-data-in-code.macho-thumbv7 differ diff --git a/test/Object/X86/macho-data-in-code.test b/test/Object/X86/macho-data-in-code.test new file mode 100644 index 00000000000..dca084c2cab --- /dev/null +++ b/test/Object/X86/macho-data-in-code.test @@ -0,0 +1,7 @@ +RUN: llvm-objdump -triple thumbv7-apple-iOS -disassemble %p/../Inputs/macho-data-in-code.macho-thumbv7 -macho | FileCheck %s + +CHECK: 12: 80 bd pop {r7, pc} + +CHECK: 14: 38 00 00 00 .long 56 @ KIND_DATA +CHECK: 16: 00 00 movs r0, r0 + diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp index 1ee3e42dab7..27e1623d451 100644 --- a/tools/llvm-objdump/MachODump.cpp +++ b/tools/llvm-objdump/MachODump.cpp @@ -87,12 +87,73 @@ struct SymbolSorter { } }; +// Types for the storted data in code table that is built before disassembly +// and the predicate function to sort them. +typedef std::pair DiceTableEntry; +typedef std::vector DiceTable; +typedef DiceTable::iterator dice_table_iterator; + +static bool +compareDiceTableEntries(const DiceTableEntry i, + const DiceTableEntry j) { + return i.first == j.first; +} + +static void DumpDataInCode(const char *bytes, uint64_t Size, + unsigned short Kind) { + uint64_t Value; + + switch (Kind) { + case macho::Data: + switch (Size) { + case 4: + Value = bytes[3] << 24 | + bytes[2] << 16 | + bytes[1] << 8 | + bytes[0]; + outs() << "\t.long " << Value; + break; + case 2: + Value = bytes[1] << 8 | + bytes[0]; + outs() << "\t.short " << Value; + break; + case 1: + Value = bytes[0]; + outs() << "\t.byte " << Value; + break; + } + outs() << "\t@ KIND_DATA\n"; + break; + case macho::JumpTable8: + Value = bytes[0]; + outs() << "\t.byte " << Value << "\t@ KIND_JUMP_TABLE8"; + break; + case macho::JumpTable16: + Value = bytes[1] << 8 | + bytes[0]; + outs() << "\t.short " << Value << "\t@ KIND_JUMP_TABLE16"; + break; + case macho::JumpTable32: + Value = bytes[3] << 24 | + bytes[2] << 16 | + bytes[1] << 8 | + bytes[0]; + outs() << "\t.long " << Value << "\t@ KIND_JUMP_TABLE32"; + break; + default: + outs() << "\t@ data in code kind = " << Kind << "\n"; + break; + } +} + static void getSectionsAndSymbols(const macho::Header Header, MachOObjectFile *MachOObj, std::vector &Sections, std::vector &Symbols, - SmallVectorImpl &FoundFns) { + SmallVectorImpl &FoundFns, + uint64_t &BaseSegmentAddress) { error_code ec; for (symbol_iterator SI = MachOObj->begin_symbols(), SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec)) @@ -108,6 +169,7 @@ getSectionsAndSymbols(const macho::Header Header, MachOObjectFile::LoadCommandInfo Command = MachOObj->getFirstLoadCommandInfo(); + bool BaseSegmentAddressSet = false; for (unsigned i = 0; ; ++i) { if (Command.C.Type == macho::LCT_FunctionStarts) { // We found a function starts segment, parse the addresses for later @@ -117,6 +179,15 @@ getSectionsAndSymbols(const macho::Header Header, MachOObj->ReadULEB128s(LLC.DataOffset, FoundFns); } + else if (Command.C.Type == macho::LCT_Segment) { + macho::SegmentLoadCommand SLC = + MachOObj->getSegmentLoadCommand(Command); + StringRef SegName = SLC.Name; + if(!BaseSegmentAddressSet && SegName != "__PAGEZERO") { + BaseSegmentAddressSet = true; + BaseSegmentAddress = SLC.VMAddress; + } + } if (i == Header.NumLoadCommands - 1) break; @@ -184,14 +255,32 @@ static void DisassembleInputMachO2(StringRef Filename, std::vector Sections; std::vector Symbols; SmallVector FoundFns; + uint64_t BaseSegmentAddress; - getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns); + getSectionsAndSymbols(Header, MachOOF, Sections, Symbols, FoundFns, + BaseSegmentAddress); // Make a copy of the unsorted symbol list. FIXME: duplication std::vector UnsortedSymbols(Symbols); // Sort the symbols by address, just in case they didn't come in that way. std::sort(Symbols.begin(), Symbols.end(), SymbolSorter()); + // Build a data in code table that is sorted on by the address of each entry. + uint64_t BaseAddress = 0; + if (Header.FileType == macho::HFT_Object) + Sections[0].getAddress(BaseAddress); + else + BaseAddress = BaseSegmentAddress; + DiceTable Dices; + error_code ec; + for (dice_iterator DI = MachOOF->begin_dices(), DE = MachOOF->end_dices(); + DI != DE; DI.increment(ec)){ + uint32_t Offset; + DI->getOffset(Offset); + Dices.push_back(std::make_pair(BaseAddress + Offset, *DI)); + } + array_pod_sort(Dices.begin(), Dices.end()); + #ifndef NDEBUG raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls(); #else @@ -309,12 +398,29 @@ static void DisassembleInputMachO2(StringRef Filename, for (uint64_t Index = Start; Index < End; Index += Size) { MCInst Inst; + uint64_t SectAddress = 0; + Sections[SectIdx].getAddress(SectAddress); + outs() << format("%8" PRIx64 ":\t", SectAddress + Index); + + // Check the data in code table here to see if this is data not an + // instruction to be disassembled. + DiceTable Dice; + Dice.push_back(std::make_pair(SectAddress + Index, DiceRef())); + dice_table_iterator DTI = std::search(Dices.begin(), Dices.end(), + Dice.begin(), Dice.end(), + compareDiceTableEntries); + if (DTI != Dices.end()){ + uint16_t Length; + DTI->second.getLength(Length); + DumpBytes(StringRef(Bytes.data() + Index, Length)); + uint16_t Kind; + DTI->second.getKind(Kind); + DumpDataInCode(Bytes.data() + Index, Length, Kind); + continue; + } + if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, DebugOut, nulls())) { - uint64_t SectAddress = 0; - Sections[SectIdx].getAddress(SectAddress); - outs() << format("%8" PRIx64 ":\t", SectAddress + Index); - DumpBytes(StringRef(Bytes.data() + Index, Size)); IP->printInst(&Inst, outs(), ""); diff --git a/tools/macho-dump/macho-dump.cpp b/tools/macho-dump/macho-dump.cpp index 88fd4529ab4..897a785f41b 100644 --- a/tools/macho-dump/macho-dump.cpp +++ b/tools/macho-dump/macho-dump.cpp @@ -292,7 +292,7 @@ DumpDataInCodeDataCommand(const MachOObjectFile &Obj, << " ('datasize', " << LLC.DataSize << ")\n" << " ('_data_regions', [\n"; - unsigned NumRegions = LLC.DataSize / 8; + unsigned NumRegions = LLC.DataSize / sizeof(macho::DataInCodeTableEntry); for (unsigned i = 0; i < NumRegions; ++i) { macho::DataInCodeTableEntry DICE = Obj.getDataInCodeTableEntry(LLC.DataOffset, i);