llvm-mc/Mach-O: Improve symbol table support:

- Honor .globl.

 - Set symbol type and section correctly ('nm' now works), and order symbols
   appropriately.

 - Take care to the string table so that the .o matches 'as' exactly (for ease
   of testing).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79740 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Daniel Dunbar 2009-08-22 11:41:10 +00:00
parent f3d2ef0c97
commit 3edd9bb7a3
5 changed files with 338 additions and 41 deletions

View File

@ -312,6 +312,10 @@ public:
/// Offset - The offset to apply to the fragment address to form this symbol's
/// value.
uint64_t Offset;
/// IsExternal - True if this symbol is visible outside this translation
/// unit.
unsigned IsExternal : 1;
public:
// Only for use as sentinel.
@ -330,6 +334,13 @@ public:
uint64_t getOffset() const { return Offset; }
void setOffset(uint64_t Value) { Offset = Value; }
/// @}
/// @name Symbol Attributes
/// @{
bool isExternal() const { return IsExternal; }
void setExternal(bool Value) { IsExternal = Value; }
/// @}
};

View File

@ -43,7 +43,7 @@ namespace llvm {
/// undefined symbols, and the special AbsolutePseudoSection value for
/// absolute symbols.
const MCSection *Section;
/// IsTemporary - True if this is an assembler temporary label, which
/// typically does not survive in the .o file's symbol table. Usually
/// "Lfoo" or ".foo".
@ -52,8 +52,7 @@ namespace llvm {
private: // MCContext creates and uniques these.
friend class MCContext;
MCSymbol(const StringRef &_Name, bool _IsTemporary)
: Name(_Name), Section(0),
IsTemporary(_IsTemporary) {}
: Name(_Name), Section(0), IsTemporary(_IsTemporary) {}
MCSymbol(const MCSymbol&); // DO NOT IMPLEMENT
void operator=(const MCSymbol&); // DO NOT IMPLEMENT
@ -61,7 +60,7 @@ namespace llvm {
/// getName - Get the symbol name.
const std::string &getName() const { return Name; }
/// @name Symbol Location Functions
/// @name Symbol Type
/// @{
/// isUndefined - Check if this symbol undefined (i.e., implicitly defined).

View File

@ -9,6 +9,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
@ -50,6 +51,37 @@ class MachObjectWriter {
LCT_Dysymtab = 0xb
};
// See <mach-o/nlist.h>.
enum SymbolTypeType {
STT_Undefined = 0x00,
STT_Absolute = 0x02,
STT_Section = 0x0e
};
enum SymbolTypeFlags {
// If any of these bits are set, then the entry is a stab entry number (see
// <mach-o/stab.h>. Otherwise the other masks apply.
STF_StabsEntryMask = 0xe0,
STF_TypeMask = 0x0e,
STF_External = 0x01,
STF_PrivateExtern = 0x10
};
/// MachSymbolData - Helper struct for containing some precomputed information
/// on symbols.
struct MachSymbolData {
MCSymbolData *SymbolData;
uint64_t StringIndex;
uint8_t SectionIndex;
// Support lexicographic sorting.
bool operator<(const MachSymbolData &RHS) const {
const std::string &Name = SymbolData->getSymbol().getName();
return Name < RHS.SymbolData->getSymbol().getName();
}
};
raw_ostream &OS;
bool IsLSB;
@ -245,44 +277,131 @@ public:
assert(OS.tell() - Start == DysymtabLoadCommandSize);
}
void WriteNlist32(uint32_t StringIndex, uint8_t Type, uint8_t Sect,
int16_t Desc, uint32_t Value) {
void WriteNlist32(MachSymbolData &MSD) {
MCSymbol &Symbol = MSD.SymbolData->getSymbol();
uint8_t Type = 0;
// Set the N_TYPE bits. See <mach-o/nlist.h>.
//
// FIXME: Are the prebound or indirect fields possible here?
if (Symbol.isUndefined())
Type = STT_Undefined;
else if (Symbol.isAbsolute())
Type = STT_Absolute;
else
Type = STT_Section;
// FIXME: Set STAB bits.
// FIXME: Set private external bit.
// Set external bit.
if (MSD.SymbolData->isExternal())
Type |= STF_External;
// struct nlist (12 bytes)
Write32(StringIndex);
Write32(MSD.StringIndex);
Write8(Type);
Write8(Sect);
Write16(Desc);
Write32(Value);
Write8(MSD.SectionIndex);
Write16(0); // FIXME: Desc
Write32(0); // FIXME: Value
}
/// ComputeStringTable - Compute the string table, for use in the symbol
/// table.
/// ComputeSymbolTable - Compute the symbol table data
///
/// \param StringTable [out] - The string table data.
/// \param StringIndexMap [out] - Map from symbol names to offsets in the
/// string table.
void ComputeStringTable(MCAssembler &Asm, SmallString<256> &StringTable,
StringMap<uint64_t> &StringIndexMap) {
// Build the string table.
//
// FIXME: Does 'as' ever bother to compress this when we have a suffix
// match?
void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
std::vector<MachSymbolData> &LocalSymbolData,
std::vector<MachSymbolData> &ExternalSymbolData,
std::vector<MachSymbolData> &UndefinedSymbolData) {
// Build section lookup table.
DenseMap<const MCSection*, uint8_t> SectionIndexMap;
unsigned Index = 1;
for (MCAssembler::iterator it = Asm.begin(),
ie = Asm.end(); it != ie; ++it, ++Index)
SectionIndexMap[&it->getSection()] = Index;
assert(Index <= 256 && "Too many sections!");
// Index 0 is always the empty string.
StringMap<uint64_t> StringIndexMap;
StringTable += '\x00';
// Build the symbol arrays and the string table, but only for non-local
// symbols.
//
// The particular order that we collect the symbols and create the string
// table, then sort the symbols is chosen to match 'as'. Even though it
// doesn't matter for correctness, this is important for letting us diff .o
// files.
for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
ie = Asm.symbol_end(); it != ie; ++it) {
StringRef Name = it->getSymbol().getName();
uint64_t &Entry = StringIndexMap[Name];
MCSymbol &Symbol = it->getSymbol();
if (!it->isExternal())
continue;
uint64_t &Entry = StringIndexMap[Symbol.getName()];
if (!Entry) {
Entry = StringTable.size();
StringTable += Name;
StringTable += Symbol.getName();
StringTable += '\x00';
}
MachSymbolData MSD;
MSD.SymbolData = it;
MSD.StringIndex = Entry;
if (Symbol.isUndefined()) {
MSD.SectionIndex = 0;
UndefinedSymbolData.push_back(MSD);
} else if (Symbol.isAbsolute()) {
MSD.SectionIndex = 0;
ExternalSymbolData.push_back(MSD);
} else {
MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
assert(MSD.SectionIndex && "Invalid section index!");
ExternalSymbolData.push_back(MSD);
}
}
// Now add the data for local symbols.
for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
ie = Asm.symbol_end(); it != ie; ++it) {
MCSymbol &Symbol = it->getSymbol();
if (it->isExternal())
continue;
uint64_t &Entry = StringIndexMap[Symbol.getName()];
if (!Entry) {
Entry = StringTable.size();
StringTable += Symbol.getName();
StringTable += '\x00';
}
MachSymbolData MSD;
MSD.SymbolData = it;
MSD.StringIndex = Entry;
assert(!Symbol.isUndefined() && "Local symbol can not be undefined!");
if (Symbol.isAbsolute()) {
MSD.SectionIndex = 0;
LocalSymbolData.push_back(MSD);
} else {
MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
assert(MSD.SectionIndex && "Invalid section index!");
LocalSymbolData.push_back(MSD);
}
}
// External and undefined symbols are required to be in lexicographic order.
std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
// The string table is padded to a multiple of 4.
//
// FIXME: Check to see if this varies per arch.
@ -295,12 +414,15 @@ public:
// Compute symbol table information.
SmallString<256> StringTable;
StringMap<uint64_t> StringIndexMap;
std::vector<MachSymbolData> LocalSymbolData;
std::vector<MachSymbolData> ExternalSymbolData;
std::vector<MachSymbolData> UndefinedSymbolData;
unsigned NumSymbols = Asm.symbol_size();
// No symbol table command is written if there are no symbols.
if (NumSymbols)
ComputeStringTable(Asm, StringTable, StringIndexMap);
ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
UndefinedSymbolData);
// Compute the file offsets for all the sections in advance, so that we can
// write things out in order.
@ -350,13 +472,13 @@ public:
WriteSymtabLoadCommand(SymbolTableOffset, NumSymbols,
StringTableOffset, StringTable.size());
// FIXME: Get correct symbol indices and counts.
unsigned FirstLocalSymbol = 0;
unsigned NumLocalSymbols = NumSymbols;
unsigned FirstExternalSymbol = NumLocalSymbols;
unsigned NumExternalSymbols = 0;
unsigned FirstUndefinedSymbol = NumLocalSymbols;
unsigned NumUndefinedSymbols = 0;
unsigned NumLocalSymbols = LocalSymbolData.size();
unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
unsigned NumExternalSymbols = ExternalSymbolData.size();
unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
// FIXME: Get correct symbol indices and counts for indirect symbols.
unsigned IndirectSymbolOffset = 0;
unsigned NumIndirectSymbols = 0;
WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
@ -373,18 +495,15 @@ public:
if (NumSymbols) {
// FIXME: Check that offsets match computed ones.
// FIXME: These need to be reordered, both to segregate into categories
// as well as to order some sublists.
// FIXME: Some of these are ordered by name to help the linker.
// Write the symbol table entries.
for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
ie = Asm.symbol_end(); it != ie; ++it) {
MCSymbol &Sym = it->getSymbol();
uint64_t Index = StringIndexMap[Sym.getName()];
assert(Index && "Invalid index!");
WriteNlist32(Index, /*FIXME: Type=*/0, /*FIXME: Sect=*/0,
/*FIXME: Desc=*/0, /*FIXME: Value=*/0);
}
for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
WriteNlist32(LocalSymbolData[i]);
for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
WriteNlist32(ExternalSymbolData[i]);
for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
WriteNlist32(UndefinedSymbolData[i]);
// Write the string table.
OS << StringTable.str();
@ -427,7 +546,8 @@ MCSymbolData::MCSymbolData() : Symbol(*(MCSymbol*)0) {}
MCSymbolData::MCSymbolData(MCSymbol &_Symbol, MCFragment *_Fragment,
uint64_t _Offset, MCAssembler *A)
: Symbol(_Symbol), Fragment(_Fragment), Offset(_Offset)
: Symbol(_Symbol), Fragment(_Fragment), Offset(_Offset),
IsExternal(false)
{
if (A)
A->getSymbolList().push_back(this);

View File

@ -142,7 +142,14 @@ void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol,
void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
SymbolAttr Attribute) {
llvm_unreachable("FIXME: Not yet implemented!");
switch (Attribute) {
default:
llvm_unreachable("FIXME: Not yet implemented!");
case MCStreamer::Global:
getSymbolData(*Symbol).setExternal(true);
break;
}
}
void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {

160
test/MC/MachO/symbols-1.s Normal file
View File

@ -0,0 +1,160 @@
// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
sym_local_B:
.globl sym_globl_def_B
.globl sym_globl_undef_B
sym_local_A:
.globl sym_globl_def_A
.globl sym_globl_undef_A
sym_local_C:
.globl sym_globl_def_C
.globl sym_globl_undef_C
sym_globl_def_A:
sym_globl_def_B:
sym_globl_def_C:
.long 0
// CHECK: ('cputype', 7)
// CHECK: ('cpusubtype', 3)
// CHECK: ('filetype', 1)
// CHECK: ('num_load_commands', 1)
// CHECK: ('load_commands_size', 228)
// CHECK: ('flag', 0)
// CHECK: ('load_commands', [
// CHECK: # Load Command 0
// CHECK: (('command', 1)
// CHECK: ('size', 124)
// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
// CHECK: ('vm_addr', 0)
// CHECK: ('vm_size', 4)
// CHECK: ('file_offset', 256)
// CHECK: ('file_size', 4)
// CHECK: ('maxprot', 7)
// CHECK: ('initprot', 7)
// CHECK: ('num_sections', 1)
// CHECK: ('flags', 0)
// CHECK: ('sections', [
// CHECK: # Section 0
// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
// CHECK: ('address', 0)
// CHECK: ('size', 4)
// CHECK: ('offset', 256)
// CHECK: ('alignment', 0)
// CHECK: ('reloc_offset', 0)
// CHECK: ('num_reloc', 0)
// CHECK: ('flags', 0x80000000)
// CHECK: ('reserved1', 0)
// CHECK: ('reserved2', 0)
// CHECK: ),
// CHECK: ])
// CHECK: ),
// CHECK: # Load Command 1
// CHECK: (('command', 2)
// CHECK: ('size', 24)
// CHECK: ('symoff', 260)
// CHECK: ('nsyms', 9)
// CHECK: ('stroff', 368)
// CHECK: ('strsize', 140)
// CHECK: ('_string_data', '\x00sym_globl_def_B\x00sym_globl_undef_B\x00sym_globl_def_A\x00sym_globl_undef_A\x00sym_globl_def_C\x00sym_globl_undef_C\x00sym_local_B\x00sym_local_A\x00sym_local_C\x00\x00')
// CHECK: ('_symbols', [
// CHECK: # Symbol 0
// CHECK: (('n_strx', 103)
// CHECK: ('n_type', 0xe)
// CHECK: ('n_sect', 1)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 0)
// CHECK: ('_string', 'sym_local_B')
// CHECK: ),
// CHECK: # Symbol 1
// CHECK: (('n_strx', 115)
// CHECK: ('n_type', 0xe)
// CHECK: ('n_sect', 1)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 0)
// CHECK: ('_string', 'sym_local_A')
// CHECK: ),
// CHECK: # Symbol 2
// CHECK: (('n_strx', 127)
// CHECK: ('n_type', 0xe)
// CHECK: ('n_sect', 1)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 0)
// CHECK: ('_string', 'sym_local_C')
// CHECK: ),
// CHECK: # Symbol 3
// CHECK: (('n_strx', 35)
// CHECK: ('n_type', 0xf)
// CHECK: ('n_sect', 1)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 0)
// CHECK: ('_string', 'sym_globl_def_A')
// CHECK: ),
// CHECK: # Symbol 4
// CHECK: (('n_strx', 1)
// CHECK: ('n_type', 0xf)
// CHECK: ('n_sect', 1)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 0)
// CHECK: ('_string', 'sym_globl_def_B')
// CHECK: ),
// CHECK: # Symbol 5
// CHECK: (('n_strx', 69)
// CHECK: ('n_type', 0xf)
// CHECK: ('n_sect', 1)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 0)
// CHECK: ('_string', 'sym_globl_def_C')
// CHECK: ),
// CHECK: # Symbol 6
// CHECK: (('n_strx', 51)
// CHECK: ('n_type', 0x1)
// CHECK: ('n_sect', 0)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 0)
// CHECK: ('_string', 'sym_globl_undef_A')
// CHECK: ),
// CHECK: # Symbol 7
// CHECK: (('n_strx', 17)
// CHECK: ('n_type', 0x1)
// CHECK: ('n_sect', 0)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 0)
// CHECK: ('_string', 'sym_globl_undef_B')
// CHECK: ),
// CHECK: # Symbol 8
// CHECK: (('n_strx', 85)
// CHECK: ('n_type', 0x1)
// CHECK: ('n_sect', 0)
// CHECK: ('n_desc', 0)
// CHECK: ('n_value', 0)
// CHECK: ('_string', 'sym_globl_undef_C')
// CHECK: ),
// CHECK: ])
// CHECK: ),
// CHECK: # Load Command 2
// CHECK: (('command', 11)
// CHECK: ('size', 80)
// CHECK: ('ilocalsym', 0)
// CHECK: ('nlocalsym', 3)
// CHECK: ('iextdefsym', 3)
// CHECK: ('nextdefsym', 3)
// CHECK: ('iundefsym', 6)
// CHECK: ('nundefsym', 3)
// CHECK: ('tocoff', 0)
// CHECK: ('ntoc', 0)
// CHECK: ('modtaboff', 0)
// CHECK: ('nmodtab', 0)
// CHECK: ('extrefsymoff', 0)
// CHECK: ('nextrefsyms', 0)
// CHECK: ('indirectsymoff', 0)
// CHECK: ('nindirectsyms', 0)
// CHECK: ('extreloff', 0)
// CHECK: ('nextrel', 0)
// CHECK: ('locreloff', 0)
// CHECK: ('nlocrel', 0)
// CHECK: ('_indirect_symbols', [
// CHECK: ])
// CHECK: ),
// CHECK: ])