Update llvm-objdump’s Mach-O symbolizer code for Objective-C references.

This prints disassembly comments for Objective-C references to CFStrings,
Selectors, Classes and method calls.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220500 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Kevin Enderby 2014-10-23 19:37:31 +00:00
parent a023f50940
commit a1afbd6421
5 changed files with 628 additions and 48 deletions

View File

@ -2345,11 +2345,47 @@ MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset,
}
MachO::symtab_command MachOObjectFile::getSymtabLoadCommand() const {
return getStruct<MachO::symtab_command>(this, SymtabLoadCmd);
if (SymtabLoadCmd)
return getStruct<MachO::symtab_command>(this, SymtabLoadCmd);
// If there is no SymtabLoadCmd return a load command with zero'ed fields.
MachO::symtab_command Cmd;
Cmd.cmd = MachO::LC_SYMTAB;
Cmd.cmdsize = sizeof(MachO::symtab_command);
Cmd.symoff = 0;
Cmd.nsyms = 0;
Cmd.stroff = 0;
Cmd.strsize = 0;
return Cmd;
}
MachO::dysymtab_command MachOObjectFile::getDysymtabLoadCommand() const {
return getStruct<MachO::dysymtab_command>(this, DysymtabLoadCmd);
if (DysymtabLoadCmd)
return getStruct<MachO::dysymtab_command>(this, DysymtabLoadCmd);
// If there is no DysymtabLoadCmd return a load command with zero'ed fields.
MachO::dysymtab_command Cmd;
Cmd.cmd = MachO::LC_DYSYMTAB;
Cmd.cmdsize = sizeof(MachO::dysymtab_command);
Cmd.ilocalsym = 0;
Cmd.nlocalsym = 0;
Cmd.iextdefsym = 0;
Cmd.nextdefsym = 0;
Cmd.iundefsym = 0;
Cmd.nundefsym = 0;
Cmd.tocoff = 0;
Cmd.ntoc = 0;
Cmd.modtaboff = 0;
Cmd.nmodtab = 0;
Cmd.extrefsymoff = 0;
Cmd.nextrefsyms = 0;
Cmd.indirectsymoff = 0;
Cmd.nindirectsyms = 0;
Cmd.extreloff = 0;
Cmd.nextrel = 0;
Cmd.locreloff = 0;
Cmd.nlocrel = 0;
return Cmd;
}
MachO::linkedit_data_command

Binary file not shown.

View File

@ -1,8 +1,25 @@
// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s -check-prefix=OBJ
// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/hello.exe.macho-x86_64 | FileCheck %s -check-prefix=EXE
// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/ObjC.obj.macho-x86_64 | FileCheck %s -check-prefix=ObjC-OBJ
// RUN: llvm-objdump -d -m -no-show-raw-insn -full-leading-addr -print-imm-hex %p/Inputs/ObjC.exe.macho-x86_64 | FileCheck %s -check-prefix=ObjC-EXE
OBJ: 0000000000000008 leaq L_.str(%rip), %rax ## literal pool for: "Hello world\n"
OBJ: 0000000000000026 callq _printf
EXE: 0000000100000f38 leaq 0x4f(%rip), %rax ## literal pool for: "Hello world\n"
EXE: 0000000100000f56 callq 0x100000f6c ## symbol stub for: _printf
ObjC-OBJ: 0000000000000008 leaq 0xb1(%rip), %rax ## Objc cfstring ref: @"The current date and time is: %@"
ObjC-OBJ: 0000000000000016 movq 0x4b(%rip), %rcx ## Objc class ref: NSObject
ObjC-OBJ: 000000000000001d movq 0x64(%rip), %rsi ## Objc selector ref: new
ObjC-OBJ: 0000000000000034 movq 0x35(%rip), %rax ## Objc class ref: NSDate
ObjC-OBJ: 000000000000003b movq 0x4e(%rip), %rsi ## Objc selector ref: date
ObjC-EXE: 0000000100000ee8 leaq 0x159(%rip), %rax ## Objc cfstring ref: @"The current date and time is: %@"
ObjC-EXE: 0000000100000ef6 movq 0x13b(%rip), %rcx ## Objc class ref: _OBJC_CLASS_$_NSObject
ObjC-EXE: 0000000100000efd movq 0x124(%rip), %rsi ## Objc selector ref: new
ObjC-EXE: 0000000100000f0b callq 0x100000f4a ## Objc message: +[NSObject new]
ObjC-EXE: 0000000100000f14 movq 0x125(%rip), %rax ## Objc class ref: _OBJC_CLASS_$_NSDate
ObjC-EXE: 0000000100000f1b movq 0x10e(%rip), %rsi ## Objc selector ref: date
ObjC-EXE: 0000000100000f25 callq 0x100000f4a ## Objc message: +[NSDate date]
ObjC-EXE: 0000000100000f33 callq 0x100000f44 ## symbol stub for: _NSLog

View File

@ -235,6 +235,9 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
}
typedef DenseMap<uint64_t, StringRef> SymbolAddressMap;
typedef std::pair<uint64_t, const char *> BindInfoEntry;
typedef std::vector<BindInfoEntry> BindTable;
typedef BindTable::iterator bind_table_iterator;
// The block of info used by the Symbolizer call backs.
struct DisassembleInfo {
@ -242,6 +245,11 @@ struct DisassembleInfo {
MachOObjectFile *O;
SectionRef S;
SymbolAddressMap *AddrMap;
std::vector<SectionRef> *Sections;
const char *class_name;
const char *selector_name;
char *method;
BindTable *BindTable;
};
// SymbolizerGetOpInfo() is the operand information call back function.
@ -342,7 +350,7 @@ int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
// TODO:
// Second search the external relocation entries of a fully linked image
// (if any) for an entry that matches this segment offset.
//uint64_t seg_offset = (Pc + Offset);
// uint64_t seg_offset = (Pc + Offset);
return 0;
} else if (Arch == Triple::arm) {
return 0;
@ -445,7 +453,7 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride;
if (index < Dysymtab.nindirectsyms) {
uint32_t indirect_symbol =
info->O->getIndirectSymbolTableEntry(Dysymtab, index);
info->O->getIndirectSymbolTableEntry(Dysymtab, index);
if (indirect_symbol < Symtab.nsyms) {
symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
SymbolRef Symbol = *Sym;
@ -479,7 +487,7 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
uint32_t index = Sec.reserved1 + (ReferenceValue - Sec.addr) / stride;
if (index < Dysymtab.nindirectsyms) {
uint32_t indirect_symbol =
info->O->getIndirectSymbolTableEntry(Dysymtab, index);
info->O->getIndirectSymbolTableEntry(Dysymtab, index);
if (indirect_symbol < Symtab.nsyms) {
symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
SymbolRef Symbol = *Sym;
@ -500,6 +508,401 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
return nullptr;
}
// method_reference() is called passing it the ReferenceName that might be
// a reference it to an Objective-C method call. If so then it allocates and
// assembles a method call string with the values last seen and saved in
// the DisassembleInfo's class_name and selector_name fields. This is saved
// into the method field of the info and any previous string is free'ed.
// Then the class_name field in the info is set to nullptr. The method call
// string is set into ReferenceName and ReferenceType is set to
// LLVMDisassembler_ReferenceType_Out_Objc_Message. If this not a method call
// then both ReferenceType and ReferenceName are left unchanged.
static void method_reference(struct DisassembleInfo *info,
uint64_t *ReferenceType,
const char **ReferenceName) {
if (*ReferenceName != nullptr) {
if (strcmp(*ReferenceName, "_objc_msgSend") == 0) {
if (info->selector_name != NULL) {
if (info->method != nullptr)
free(info->method);
if (info->class_name != nullptr) {
info->method = (char *)malloc(5 + strlen(info->class_name) +
strlen(info->selector_name));
if (info->method != nullptr) {
strcpy(info->method, "+[");
strcat(info->method, info->class_name);
strcat(info->method, " ");
strcat(info->method, info->selector_name);
strcat(info->method, "]");
*ReferenceName = info->method;
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message;
}
} else {
info->method = (char *)malloc(9 + strlen(info->selector_name));
if (info->method != nullptr) {
strcpy(info->method, "-[%rdi ");
strcat(info->method, info->selector_name);
strcat(info->method, "]");
*ReferenceName = info->method;
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message;
}
}
info->class_name = nullptr;
}
} else if (strcmp(*ReferenceName, "_objc_msgSendSuper2") == 0) {
if (info->selector_name != NULL) {
if (info->method != nullptr)
free(info->method);
info->method = (char *)malloc(17 + strlen(info->selector_name));
if (info->method != nullptr) {
strcpy(info->method, "-[[%rdi super] ");
strcat(info->method, info->selector_name);
strcat(info->method, "]");
*ReferenceName = info->method;
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message;
}
info->class_name = nullptr;
}
}
}
}
// GuessPointerPointer() is passed the address of what might be a pointer to
// a reference to an Objective-C class, selector, message ref or cfstring.
// If so the value of the pointer is returned and one of the booleans are set
// to true. If not zero is returned and all the booleans are set to false.
static uint64_t GuessPointerPointer(uint64_t ReferenceValue,
struct DisassembleInfo *info,
bool &classref, bool &selref, bool &msgref,
bool &cfstring) {
classref = false;
selref = false;
msgref = false;
cfstring = false;
uint32_t LoadCommandCount = info->O->getHeader().ncmds;
MachOObjectFile::LoadCommandInfo Load = info->O->getFirstLoadCommandInfo();
for (unsigned I = 0;; ++I) {
if (Load.C.cmd == MachO::LC_SEGMENT_64) {
MachO::segment_command_64 Seg = info->O->getSegment64LoadCommand(Load);
for (unsigned J = 0; J < Seg.nsects; ++J) {
MachO::section_64 Sec = info->O->getSection64(Load, J);
if ((strncmp(Sec.sectname, "__objc_selrefs", 16) == 0 ||
strncmp(Sec.sectname, "__objc_classrefs", 16) == 0 ||
strncmp(Sec.sectname, "__objc_superrefs", 16) == 0 ||
strncmp(Sec.sectname, "__objc_msgrefs", 16) == 0 ||
strncmp(Sec.sectname, "__cfstring", 16) == 0) &&
ReferenceValue >= Sec.addr &&
ReferenceValue < Sec.addr + Sec.size) {
uint64_t sect_offset = ReferenceValue - Sec.addr;
uint64_t object_offset = Sec.offset + sect_offset;
StringRef MachOContents = info->O->getData();
uint64_t object_size = MachOContents.size();
const char *object_addr = (const char *)MachOContents.data();
if (object_offset < object_size) {
uint64_t pointer_value;
memcpy(&pointer_value, object_addr + object_offset,
sizeof(uint64_t));
if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
sys::swapByteOrder(pointer_value);
if (strncmp(Sec.sectname, "__objc_selrefs", 16) == 0)
selref = true;
else if (strncmp(Sec.sectname, "__objc_classrefs", 16) == 0 ||
strncmp(Sec.sectname, "__objc_superrefs", 16) == 0)
classref = true;
else if (strncmp(Sec.sectname, "__objc_msgrefs", 16) == 0 &&
ReferenceValue + 8 < Sec.addr + Sec.size) {
msgref = true;
memcpy(&pointer_value, object_addr + object_offset + 8,
sizeof(uint64_t));
if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
sys::swapByteOrder(pointer_value);
} else if (strncmp(Sec.sectname, "__cfstring", 16) == 0)
cfstring = true;
return pointer_value;
} else {
return 0;
}
}
}
}
// TODO: Look for LC_SEGMENT for 32-bit Mach-O files.
if (I == LoadCommandCount - 1)
break;
else
Load = info->O->getNextLoadCommandInfo(Load);
}
return 0;
}
// get_pointer_64 returns a pointer to the bytes in the object file at the
// Address from a section in the Mach-O file. And indirectly returns the
// offset into the section, number of bytes left in the section past the offset
// and which section is was being referenced. If the Address is not in a
// section nullptr is returned.
const char *get_pointer_64(uint64_t Address, uint32_t &offset, uint32_t &left,
SectionRef &S, DisassembleInfo *info) {
offset = 0;
left = 0;
S = SectionRef();
for (unsigned SectIdx = 0; SectIdx != info->Sections->size(); SectIdx++) {
uint64_t SectAddress = ((*(info->Sections))[SectIdx]).getAddress();
uint64_t SectSize = ((*(info->Sections))[SectIdx]).getSize();
if (Address >= SectAddress && Address < SectAddress + SectSize) {
S = (*(info->Sections))[SectIdx];
offset = Address - SectAddress;
left = SectSize - offset;
StringRef SectContents;
((*(info->Sections))[SectIdx]).getContents(SectContents);
return SectContents.data() + offset;
}
}
return nullptr;
}
// get_symbol_64() returns the name of a symbol (or nullptr) and the address of
// the symbol indirectly through n_value. Based on the relocation information
// for the specified section offset in the specified section reference.
const char *get_symbol_64(uint32_t sect_offset, SectionRef S,
DisassembleInfo *info, uint64_t &n_value) {
n_value = 0;
if (info->verbose == false)
return nullptr;
// See if there is an external relocation entry at the sect_offset.
bool reloc_found = false;
DataRefImpl Rel;
MachO::any_relocation_info RE;
bool isExtern = false;
SymbolRef Symbol;
for (const RelocationRef &Reloc : S.relocations()) {
uint64_t RelocOffset;
Reloc.getOffset(RelocOffset);
if (RelocOffset == sect_offset) {
Rel = Reloc.getRawDataRefImpl();
RE = info->O->getRelocation(Rel);
if (info->O->isRelocationScattered(RE))
continue;
isExtern = info->O->getPlainRelocationExternal(RE);
if (isExtern) {
symbol_iterator RelocSym = Reloc.getSymbol();
Symbol = *RelocSym;
}
reloc_found = true;
break;
}
}
// If there is an external relocation entry for a symbol in this section
// at this section_offset then use that symbol's value for the n_value
// and return its name.
const char *SymbolName = nullptr;
if (reloc_found && isExtern) {
Symbol.getAddress(n_value);
StringRef name;
Symbol.getName(name);
if (!name.empty()) {
SymbolName = name.data();
return SymbolName;
}
}
// TODO: For fully linked images, look through the external relocation
// entries off the dynamic symtab command. For these the r_offset is from the
// start of the first writeable segment in the Mach-O file. So the offset
// to this section from that segment is passed to this routine by the caller,
// as the database_offset. Which is the difference of the section's starting
// address and the first writable segment.
//
// NOTE: need add passing the database_offset to this routine.
// TODO: We did not find an external relocation entry so look up the
// ReferenceValue as an address of a symbol and if found return that symbol's
// name.
//
// NOTE: need add passing the ReferenceValue to this routine. Then that code
// would simply be this:
//
// if (ReferenceValue != 0xffffffffffffffffLLU &&
// ReferenceValue != 0xfffffffffffffffeLLU) {
// StringRef name = info->AddrMap->lookup(ReferenceValue);
// if (!name.empty())
// SymbolName = name.data();
// }
return SymbolName;
}
// These are structs in the Objective-C meta data and read to produce the
// comments for disassembly. While these are part of the ABI they are no
// public defintions. So the are here not in include/llvm/Support/MachO.h .
// The cfstring object in a 64-bit Mach-O file.
struct cfstring64_t {
uint64_t isa; // class64_t * (64-bit pointer)
uint64_t flags; // flag bits
uint64_t characters; // char * (64-bit pointer)
uint64_t length; // number of non-NULL characters in above
};
// The class object in a 64-bit Mach-O file.
struct class64_t {
uint64_t isa; // class64_t * (64-bit pointer)
uint64_t superclass; // class64_t * (64-bit pointer)
uint64_t cache; // Cache (64-bit pointer)
uint64_t vtable; // IMP * (64-bit pointer)
uint64_t data; // class_ro64_t * (64-bit pointer)
};
struct class_ro64_t {
uint32_t flags;
uint32_t instanceStart;
uint32_t instanceSize;
uint32_t reserved;
uint64_t ivarLayout; // const uint8_t * (64-bit pointer)
uint64_t name; // const char * (64-bit pointer)
uint64_t baseMethods; // const method_list_t * (64-bit pointer)
uint64_t baseProtocols; // const protocol_list_t * (64-bit pointer)
uint64_t ivars; // const ivar_list_t * (64-bit pointer)
uint64_t weakIvarLayout; // const uint8_t * (64-bit pointer)
uint64_t baseProperties; // const struct objc_property_list (64-bit pointer)
};
inline void swapStruct(struct cfstring64_t &cfs) {
sys::swapByteOrder(cfs.isa);
sys::swapByteOrder(cfs.flags);
sys::swapByteOrder(cfs.characters);
sys::swapByteOrder(cfs.length);
}
inline void swapStruct(struct class64_t &c) {
sys::swapByteOrder(c.isa);
sys::swapByteOrder(c.superclass);
sys::swapByteOrder(c.cache);
sys::swapByteOrder(c.vtable);
sys::swapByteOrder(c.data);
}
inline void swapStruct(struct class_ro64_t &cro) {
sys::swapByteOrder(cro.flags);
sys::swapByteOrder(cro.instanceStart);
sys::swapByteOrder(cro.instanceSize);
sys::swapByteOrder(cro.reserved);
sys::swapByteOrder(cro.ivarLayout);
sys::swapByteOrder(cro.name);
sys::swapByteOrder(cro.baseMethods);
sys::swapByteOrder(cro.baseProtocols);
sys::swapByteOrder(cro.ivars);
sys::swapByteOrder(cro.weakIvarLayout);
sys::swapByteOrder(cro.baseProperties);
}
static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue,
struct DisassembleInfo *info);
// get_objc2_64bit_class_name() is used for disassembly and is passed a pointer
// to an Objective-C class and returns the class name. It is also passed the
// address of the pointer, so when the pointer is zero as it can be in an .o
// file, that is used to look for an external relocation entry with a symbol
// name.
const char *get_objc2_64bit_class_name(uint64_t pointer_value,
uint64_t ReferenceValue,
struct DisassembleInfo *info) {
const char *r;
uint32_t offset, left;
SectionRef S;
// The pointer_value can be 0 in an object file and have a relocation
// entry for the class symbol at the ReferenceValue (the address of the
// pointer).
if (pointer_value == 0) {
r = get_pointer_64(ReferenceValue, offset, left, S, info);
if (r == nullptr || left < sizeof(uint64_t))
return nullptr;
uint64_t n_value;
const char *symbol_name = get_symbol_64(offset, S, info, n_value);
if (symbol_name == nullptr)
return nullptr;
const char *class_name = rindex(symbol_name, '$');
if (class_name != nullptr && class_name[1] == '_' && class_name[2] != '\0')
return class_name + 2;
else
return nullptr;
}
// The case were the pointer_value is non-zero and points to a class defined
// in this Mach-O file.
r = get_pointer_64(pointer_value, offset, left, S, info);
if (r == nullptr || left < sizeof(struct class64_t))
return nullptr;
struct class64_t c;
memcpy(&c, r, sizeof(struct class64_t));
if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
swapStruct(c);
if (c.data == 0)
return nullptr;
r = get_pointer_64(c.data, offset, left, S, info);
if (r == nullptr || left < sizeof(struct class_ro64_t))
return nullptr;
struct class_ro64_t cro;
memcpy(&cro, r, sizeof(struct class_ro64_t));
if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
swapStruct(cro);
if (cro.name == 0)
return nullptr;
const char *name = get_pointer_64(cro.name, offset, left, S, info);
return name;
}
// get_objc2_64bit_cfstring_name is used for disassembly and is passed a
// pointer to a cfstring and returns its name or nullptr.
const char *get_objc2_64bit_cfstring_name(uint64_t ReferenceValue,
struct DisassembleInfo *info) {
const char *r, *name;
uint32_t offset, left;
SectionRef S;
struct cfstring64_t cfs;
uint64_t cfs_characters;
r = get_pointer_64(ReferenceValue, offset, left, S, info);
if (r == nullptr || left < sizeof(struct cfstring64_t))
return nullptr;
memcpy(&cfs, r, sizeof(struct cfstring64_t));
if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
swapStruct(cfs);
if (cfs.characters == 0) {
uint64_t n_value;
const char *symbol_name = get_symbol_64(
offset + offsetof(struct cfstring64_t, characters), S, info, n_value);
if (symbol_name == nullptr)
return nullptr;
cfs_characters = n_value;
} else
cfs_characters = cfs.characters;
name = get_pointer_64(cfs_characters, offset, left, S, info);
return name;
}
// get_objc2_64bit_selref() is used for disassembly and is passed a the address
// of a pointer to an Objective-C selector reference when the pointer value is
// zero as in a .o file and is likely to have a external relocation entry with
// who's symbol's n_value is the real pointer to the selector name. If that is
// the case the real pointer to the selector name is returned else 0 is
// returned
uint64_t get_objc2_64bit_selref(uint64_t ReferenceValue,
struct DisassembleInfo *info) {
uint32_t offset, left;
SectionRef S;
const char *r = get_pointer_64(ReferenceValue, offset, left, S, info);
if (r == nullptr || left < sizeof(uint64_t))
return 0;
uint64_t n_value;
const char *symbol_name = get_symbol_64(offset, S, info, n_value);
if (symbol_name == nullptr)
return 0;
return n_value;
}
// GuessLiteralPointer returns a string which for the item in the Mach-O file
// for the address passed in as ReferenceValue for printing as a comment with
// the instruction and also returns the corresponding type of that item
@ -509,13 +912,20 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
// cstring is returned and ReferenceType is set to
// LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr .
//
// TODO: other literals such as Objective-C CFStrings refs, Selector refs,
// Message refs, Class refs and a Symbol address in a literal pool are yet
// to be done here.
// If ReferenceValue is an address of an Objective-C CFString, Selector ref or
// Class ref that name is returned and the ReferenceType is set accordingly.
//
// Lastly, literals which are Symbol address in a literal pool are looked for
// and if found the symbol name is returned and ReferenceType is set to
// LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr .
//
// If there is no item in the Mach-O file for the address passed in as
// ReferenceValue nullptr is returned and ReferenceType is unchanged.
const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC,
uint64_t *ReferenceType,
struct DisassembleInfo *info) {
// TODO: This rouine's code is only for an x86_64 Mach-O file for now.
// TODO: This rouine's code and the routines it calls are only work with
// x86_64 Mach-O files for now.
unsigned int Arch = info->O->getArch();
if (Arch != Triple::x86_64)
return nullptr;
@ -556,20 +966,71 @@ const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC,
}
}
// TODO: the code to look for other literals such as Objective-C CFStrings
// refs, Selector refs, Message refs, Class refs will be added here.
// Look for literals such as Objective-C CFStrings refs, Selector refs,
// Message refs and Class refs.
bool classref, selref, msgref, cfstring;
uint64_t pointer_value = GuessPointerPointer(ReferenceValue, info, classref,
selref, msgref, cfstring);
if (classref == true && pointer_value == 0) {
// Note the ReferenceValue is a pointer into the __objc_classrefs section.
// And the pointer_value in that section is typically zero as it will be
// set by dyld as part of the "bind information".
const char *name = get_dyld_bind_info_symbolname(ReferenceValue, info);
if (name != nullptr) {
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref;
const char *class_name = rindex(name, '$');
if (class_name != nullptr && class_name[1] == '_' &&
class_name[2] != '\0') {
info->class_name = class_name + 2;
return name;
}
}
}
const char *name = GuessCstringPointer(ReferenceValue, info);
if (name) {
// TODO: note when the code is added above for Selector refs and Message
// refs we will need check for that here and set the ReferenceType
// accordingly.
*ReferenceType = LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr;
if (classref == true) {
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref;
const char *name =
get_objc2_64bit_class_name(pointer_value, ReferenceValue, info);
if (name != nullptr)
info->class_name = name;
else
name = "bad class ref";
return name;
}
// TODO: look for an indirect symbol with this ReferenceValue which is in
// a literal pool.
if (cfstring == true) {
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref;
const char *name = get_objc2_64bit_cfstring_name(ReferenceValue, info);
return name;
}
if (selref == true && pointer_value == 0)
pointer_value = get_objc2_64bit_selref(ReferenceValue, info);
if (pointer_value != 0)
ReferenceValue = pointer_value;
const char *name = GuessCstringPointer(ReferenceValue, info);
if (name) {
if (pointer_value != 0 && selref == true) {
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref;
info->selector_name = name;
} else if (pointer_value != 0 && msgref == true) {
info->class_name = nullptr;
*ReferenceType = LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref;
info->selector_name = name;
} else
*ReferenceType = LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr;
return name;
}
// Lastly look for an indirect symbol with this ReferenceValue which is in
// a literal pool. If found return that symbol name.
name = GuessIndirectSymbol(ReferenceValue, info);
if (name) {
*ReferenceType = LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr;
return name;
}
return nullptr;
}
@ -584,7 +1045,7 @@ const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC,
// Out type and the ReferenceName will also be set which is added as a comment
// to the disassembled instruction.
//
// If the symbol name is a C++ mangled name then the demangled name is
// TODO: If the symbol name is a C++ mangled name then the demangled name is
// returned through ReferenceName and ReferenceType is set to
// LLVMDisassembler_ReferenceType_DeMangled_Name .
//
@ -599,7 +1060,7 @@ const char *GuessLiteralPointer(uint64_t ReferenceValue, uint64_t ReferencePC,
// ReferenceType will be LLVMDisassembler_ReferenceType_In_PCrel_Load then the
// SymbolValue is checked to be an address of literal pointer, symbol pointer,
// or an Objective-C meta data reference. If so the output ReferenceType is
// set to correspond to that as well as ReferenceName.
// set to correspond to that as well as setting the ReferenceName.
const char *SymbolizerSymbolLookUp(void *DisInfo, uint64_t ReferenceValue,
uint64_t *ReferenceType,
uint64_t ReferencePC,
@ -613,24 +1074,34 @@ const char *SymbolizerSymbolLookUp(void *DisInfo, uint64_t ReferenceValue,
}
const char *SymbolName = nullptr;
StringRef name = info->AddrMap->lookup(ReferenceValue);
if (!name.empty())
SymbolName = name.data();
if (ReferenceValue != 0xffffffffffffffffLLU &&
ReferenceValue != 0xfffffffffffffffeLLU) {
StringRef name = info->AddrMap->lookup(ReferenceValue);
if (!name.empty())
SymbolName = name.data();
}
if (*ReferenceType == LLVMDisassembler_ReferenceType_In_Branch) {
*ReferenceName = GuessIndirectSymbol(ReferenceValue, info);
if (*ReferenceName) {
method_reference(info, ReferenceType, ReferenceName);
if (*ReferenceType != LLVMDisassembler_ReferenceType_Out_Objc_Message)
*ReferenceType = LLVMDisassembler_ReferenceType_Out_SymbolStub;
} else
// TODO: if SymbolName is not nullptr see if it is a C++ name
// and demangle it.
*ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
} else if (*ReferenceType == LLVMDisassembler_ReferenceType_In_PCrel_Load) {
*ReferenceName =
GuessLiteralPointer(ReferenceValue, ReferencePC, ReferenceType, info);
if (*ReferenceName)
*ReferenceType = LLVMDisassembler_ReferenceType_Out_SymbolStub;
method_reference(info, ReferenceType, ReferenceName);
else
*ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
}
else if (*ReferenceType == LLVMDisassembler_ReferenceType_In_PCrel_Load) {
*ReferenceName = GuessLiteralPointer(ReferenceValue, ReferencePC,
ReferenceType, info);
if (*ReferenceName == nullptr)
*ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
// TODO: other types of references to be added.
} else {
// TODO: if SymbolName is not nullptr see if it is a C++ name
// and demangle it.
else {
*ReferenceName = nullptr;
*ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
}
@ -652,8 +1123,8 @@ class DisasmMemoryObject : public MemoryObject {
uint64_t Size;
uint64_t BasePC;
public:
DisasmMemoryObject(const uint8_t *bytes, uint64_t size, uint64_t basePC) :
Bytes(bytes), Size(size), BasePC(basePC) {}
DisasmMemoryObject(const uint8_t *bytes, uint64_t size, uint64_t basePC)
: Bytes(bytes), Size(size), BasePC(basePC) {}
uint64_t getBase() const override { return BasePC; }
uint64_t getExtent() const override { return Size; }
@ -917,6 +1388,11 @@ static void DisassembleInputMachO2(StringRef Filename,
SymbolizerInfo.O = MachOOF;
SymbolizerInfo.S = Sections[SectIdx];
SymbolizerInfo.AddrMap = &AddrMap;
SymbolizerInfo.Sections = &Sections;
SymbolizerInfo.class_name = nullptr;
SymbolizerInfo.selector_name = nullptr;
SymbolizerInfo.method = nullptr;
SymbolizerInfo.BindTable = nullptr;
// Disassemble symbol by symbol.
for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
@ -962,6 +1438,9 @@ static void DisassembleInputMachO2(StringRef Filename,
uint64_t Size;
symbolTableWorked = true;
DisasmMemoryObject SectionMemoryObject((const uint8_t *)Bytes.data() +
Start,
End - Start, SectAddress + Start);
DataRefImpl Symb = Symbols[SymIdx].getRawDataRefImpl();
bool isThumb =
@ -976,7 +1455,7 @@ static void DisassembleInputMachO2(StringRef Filename,
if (FullLeadingAddr) {
if (MachOOF->is64Bit())
outs() << format("%016" PRIx64, PC);
else
else
outs() << format("%08" PRIx64, PC);
} else {
outs() << format("%8" PRIx64 ":", PC);
@ -1006,10 +1485,10 @@ static void DisassembleInputMachO2(StringRef Filename,
bool gotInst;
if (isThumb)
gotInst = ThumbDisAsm->getInstruction(Inst, Size, MemoryObject, PC,
DebugOut, Annotations);
gotInst = ThumbDisAsm->getInstruction(Inst, Size, SectionMemoryObject,
PC, DebugOut, Annotations);
else
gotInst = DisAsm->getInstruction(Inst, Size, MemoryObject, PC,
gotInst = DisAsm->getInstruction(Inst, Size, SectionMemoryObject, PC,
DebugOut, Annotations);
if (gotInst) {
if (!NoShowRawInsn) {
@ -1036,9 +1515,16 @@ static void DisassembleInputMachO2(StringRef Filename,
}
outs() << "\n";
} else {
errs() << "llvm-objdump: warning: invalid instruction encoding\n";
if (Size == 0)
Size = 1; // skip illegible bytes
unsigned int Arch = MachOOF->getArch();
if (Arch == Triple::x86_64 || Arch == Triple::x86){
outs() << format("\t.byte 0x%02x #bad opcode\n",
*(Bytes.data() + Index) & 0xff);
Size = 1; // skip exactly one illegible byte and move on.
} else {
errs() << "llvm-objdump: warning: invalid instruction encoding\n";
if (Size == 0)
Size = 1; // skip illegible bytes
}
}
}
}
@ -1051,12 +1537,12 @@ static void DisassembleInputMachO2(StringRef Filename,
MCInst Inst;
uint64_t PC = SectAddress + Index;
if (DisAsm->getInstruction(Inst, InstSize, MemoryObject, PC,
DebugOut, nulls())) {
if (DisAsm->getInstruction(Inst, InstSize, MemoryObject, PC, DebugOut,
nulls())) {
if (FullLeadingAddr) {
if (MachOOF->is64Bit())
outs() << format("%016" PRIx64, PC);
else
else
outs() << format("%08" PRIx64, PC);
} else {
outs() << format("%8" PRIx64 ":", PC);
@ -1068,12 +1554,23 @@ static void DisassembleInputMachO2(StringRef Filename,
IP->printInst(&Inst, outs(), "");
outs() << "\n";
} else {
errs() << "llvm-objdump: warning: invalid instruction encoding\n";
if (InstSize == 0)
InstSize = 1; // skip illegible bytes
unsigned int Arch = MachOOF->getArch();
if (Arch == Triple::x86_64 || Arch == Triple::x86){
outs() << format("\t.byte 0x%02x #bad opcode\n",
*(Bytes.data() + Index) & 0xff);
InstSize = 1; // skip exactly one illegible byte and move on.
} else {
errs() << "llvm-objdump: warning: invalid instruction encoding\n";
if (InstSize == 0)
InstSize = 1; // skip illegible bytes
}
}
}
}
if (SymbolizerInfo.method != nullptr)
free(SymbolizerInfo.method);
if (SymbolizerInfo.BindTable != nullptr)
delete SymbolizerInfo.BindTable;
}
}
@ -2909,4 +3406,34 @@ void llvm::printMachOWeakBindTable(const object::MachOObjectFile *Obj) {
}
}
// get_dyld_bind_info_symbolname() is used for disassembly and passed an
// address, ReferenceValue, in the Mach-O file and looks in the dyld bind
// information for that address. If the address is found its binding symbol
// name is returned. If not nullptr is returned.
static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue,
struct DisassembleInfo *info) {
if (info->BindTable == nullptr) {
info->BindTable = new (BindTable);
SegInfo sectionTable(info->O);
for (const llvm::object::MachOBindEntry &Entry : info->O->bindTable()) {
uint32_t SegIndex = Entry.segmentIndex();
uint64_t OffsetInSeg = Entry.segmentOffset();
uint64_t Address = sectionTable.address(SegIndex, OffsetInSeg);
const char *SymbolName = nullptr;
StringRef name = Entry.symbolName();
if (!name.empty())
SymbolName = name.data();
info->BindTable->push_back(std::make_pair(Address, SymbolName));
}
}
for (bind_table_iterator BI = info->BindTable->begin(),
BE = info->BindTable->end();
BI != BE; ++BI) {
uint64_t Address = BI->first;
if (ReferenceValue == Address) {
const char *SymbolName = BI->second;
return SymbolName;
}
}
return nullptr;
}