diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h index d4a8efe01d4..bcd678deaa3 100644 --- a/include/llvm/MC/MCAssembler.h +++ b/include/llvm/MC/MCAssembler.h @@ -312,6 +312,10 @@ public: /// Offset - The offset to apply to the fragment address to form this symbol's /// value. uint64_t Offset; + + /// IsExternal - True if this symbol is visible outside this translation + /// unit. + unsigned IsExternal : 1; public: // Only for use as sentinel. @@ -330,6 +334,13 @@ public: uint64_t getOffset() const { return Offset; } void setOffset(uint64_t Value) { Offset = Value; } + /// @} + /// @name Symbol Attributes + /// @{ + + bool isExternal() const { return IsExternal; } + void setExternal(bool Value) { IsExternal = Value; } + /// @} }; diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h index 2b570a7ad3c..122e897a92d 100644 --- a/include/llvm/MC/MCSymbol.h +++ b/include/llvm/MC/MCSymbol.h @@ -43,7 +43,7 @@ namespace llvm { /// undefined symbols, and the special AbsolutePseudoSection value for /// absolute symbols. const MCSection *Section; - + /// IsTemporary - True if this is an assembler temporary label, which /// typically does not survive in the .o file's symbol table. Usually /// "Lfoo" or ".foo". @@ -52,8 +52,7 @@ namespace llvm { private: // MCContext creates and uniques these. friend class MCContext; MCSymbol(const StringRef &_Name, bool _IsTemporary) - : Name(_Name), Section(0), - IsTemporary(_IsTemporary) {} + : Name(_Name), Section(0), IsTemporary(_IsTemporary) {} MCSymbol(const MCSymbol&); // DO NOT IMPLEMENT void operator=(const MCSymbol&); // DO NOT IMPLEMENT @@ -61,7 +60,7 @@ namespace llvm { /// getName - Get the symbol name. const std::string &getName() const { return Name; } - /// @name Symbol Location Functions + /// @name Symbol Type /// @{ /// isUndefined - Check if this symbol undefined (i.e., implicitly defined). diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 2daa6ccbcd4..9bd704a0e2c 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -9,6 +9,7 @@ #include "llvm/MC/MCAssembler.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" @@ -50,6 +51,37 @@ class MachObjectWriter { LCT_Dysymtab = 0xb }; + // See . + enum SymbolTypeType { + STT_Undefined = 0x00, + STT_Absolute = 0x02, + STT_Section = 0x0e + }; + + enum SymbolTypeFlags { + // If any of these bits are set, then the entry is a stab entry number (see + // . Otherwise the other masks apply. + STF_StabsEntryMask = 0xe0, + + STF_TypeMask = 0x0e, + STF_External = 0x01, + STF_PrivateExtern = 0x10 + }; + + /// MachSymbolData - Helper struct for containing some precomputed information + /// on symbols. + struct MachSymbolData { + MCSymbolData *SymbolData; + uint64_t StringIndex; + uint8_t SectionIndex; + + // Support lexicographic sorting. + bool operator<(const MachSymbolData &RHS) const { + const std::string &Name = SymbolData->getSymbol().getName(); + return Name < RHS.SymbolData->getSymbol().getName(); + } + }; + raw_ostream &OS; bool IsLSB; @@ -245,44 +277,131 @@ public: assert(OS.tell() - Start == DysymtabLoadCommandSize); } - void WriteNlist32(uint32_t StringIndex, uint8_t Type, uint8_t Sect, - int16_t Desc, uint32_t Value) { + void WriteNlist32(MachSymbolData &MSD) { + MCSymbol &Symbol = MSD.SymbolData->getSymbol(); + uint8_t Type = 0; + + // Set the N_TYPE bits. See . + // + // FIXME: Are the prebound or indirect fields possible here? + if (Symbol.isUndefined()) + Type = STT_Undefined; + else if (Symbol.isAbsolute()) + Type = STT_Absolute; + else + Type = STT_Section; + + // FIXME: Set STAB bits. + + // FIXME: Set private external bit. + + // Set external bit. + if (MSD.SymbolData->isExternal()) + Type |= STF_External; + // struct nlist (12 bytes) - Write32(StringIndex); + Write32(MSD.StringIndex); Write8(Type); - Write8(Sect); - Write16(Desc); - Write32(Value); + Write8(MSD.SectionIndex); + Write16(0); // FIXME: Desc + Write32(0); // FIXME: Value } - /// ComputeStringTable - Compute the string table, for use in the symbol - /// table. + /// ComputeSymbolTable - Compute the symbol table data /// /// \param StringTable [out] - The string table data. /// \param StringIndexMap [out] - Map from symbol names to offsets in the /// string table. - void ComputeStringTable(MCAssembler &Asm, SmallString<256> &StringTable, - StringMap &StringIndexMap) { - // Build the string table. - // - // FIXME: Does 'as' ever bother to compress this when we have a suffix - // match? + + void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, + std::vector &LocalSymbolData, + std::vector &ExternalSymbolData, + std::vector &UndefinedSymbolData) { + // Build section lookup table. + DenseMap SectionIndexMap; + unsigned Index = 1; + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it, ++Index) + SectionIndexMap[&it->getSection()] = Index; + assert(Index <= 256 && "Too many sections!"); // Index 0 is always the empty string. + StringMap StringIndexMap; StringTable += '\x00'; + + // Build the symbol arrays and the string table, but only for non-local + // symbols. + // + // The particular order that we collect the symbols and create the string + // table, then sort the symbols is chosen to match 'as'. Even though it + // doesn't matter for correctness, this is important for letting us diff .o + // files. for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), ie = Asm.symbol_end(); it != ie; ++it) { - StringRef Name = it->getSymbol().getName(); - uint64_t &Entry = StringIndexMap[Name]; + MCSymbol &Symbol = it->getSymbol(); + if (!it->isExternal()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; if (!Entry) { Entry = StringTable.size(); - StringTable += Name; + StringTable += Symbol.getName(); StringTable += '\x00'; } + + MachSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; + + if (Symbol.isUndefined()) { + MSD.SectionIndex = 0; + UndefinedSymbolData.push_back(MSD); + } else if (Symbol.isAbsolute()) { + MSD.SectionIndex = 0; + ExternalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + ExternalSymbolData.push_back(MSD); + } } + // Now add the data for local symbols. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + MCSymbol &Symbol = it->getSymbol(); + + if (it->isExternal()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; + } + + MachSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; + + assert(!Symbol.isUndefined() && "Local symbol can not be undefined!"); + if (Symbol.isAbsolute()) { + MSD.SectionIndex = 0; + LocalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + LocalSymbolData.push_back(MSD); + } + } + + // External and undefined symbols are required to be in lexicographic order. + std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); + std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); + // The string table is padded to a multiple of 4. // // FIXME: Check to see if this varies per arch. @@ -295,12 +414,15 @@ public: // Compute symbol table information. SmallString<256> StringTable; - StringMap StringIndexMap; + std::vector LocalSymbolData; + std::vector ExternalSymbolData; + std::vector UndefinedSymbolData; unsigned NumSymbols = Asm.symbol_size(); // No symbol table command is written if there are no symbols. if (NumSymbols) - ComputeStringTable(Asm, StringTable, StringIndexMap); + ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, + UndefinedSymbolData); // Compute the file offsets for all the sections in advance, so that we can // write things out in order. @@ -350,13 +472,13 @@ public: WriteSymtabLoadCommand(SymbolTableOffset, NumSymbols, StringTableOffset, StringTable.size()); - // FIXME: Get correct symbol indices and counts. unsigned FirstLocalSymbol = 0; - unsigned NumLocalSymbols = NumSymbols; - unsigned FirstExternalSymbol = NumLocalSymbols; - unsigned NumExternalSymbols = 0; - unsigned FirstUndefinedSymbol = NumLocalSymbols; - unsigned NumUndefinedSymbols = 0; + unsigned NumLocalSymbols = LocalSymbolData.size(); + unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; + unsigned NumExternalSymbols = ExternalSymbolData.size(); + unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; + unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); + // FIXME: Get correct symbol indices and counts for indirect symbols. unsigned IndirectSymbolOffset = 0; unsigned NumIndirectSymbols = 0; WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, @@ -373,18 +495,15 @@ public: if (NumSymbols) { // FIXME: Check that offsets match computed ones. - // FIXME: These need to be reordered, both to segregate into categories - // as well as to order some sublists. + // FIXME: Some of these are ordered by name to help the linker. // Write the symbol table entries. - for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), - ie = Asm.symbol_end(); it != ie; ++it) { - MCSymbol &Sym = it->getSymbol(); - uint64_t Index = StringIndexMap[Sym.getName()]; - assert(Index && "Invalid index!"); - WriteNlist32(Index, /*FIXME: Type=*/0, /*FIXME: Sect=*/0, - /*FIXME: Desc=*/0, /*FIXME: Value=*/0); - } + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) + WriteNlist32(LocalSymbolData[i]); + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) + WriteNlist32(ExternalSymbolData[i]); + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) + WriteNlist32(UndefinedSymbolData[i]); // Write the string table. OS << StringTable.str(); @@ -427,7 +546,8 @@ MCSymbolData::MCSymbolData() : Symbol(*(MCSymbol*)0) {} MCSymbolData::MCSymbolData(MCSymbol &_Symbol, MCFragment *_Fragment, uint64_t _Offset, MCAssembler *A) - : Symbol(_Symbol), Fragment(_Fragment), Offset(_Offset) + : Symbol(_Symbol), Fragment(_Fragment), Offset(_Offset), + IsExternal(false) { if (A) A->getSymbolList().push_back(this); diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 0b79944933b..1a648ea5e4b 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -142,7 +142,14 @@ void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, SymbolAttr Attribute) { - llvm_unreachable("FIXME: Not yet implemented!"); + switch (Attribute) { + default: + llvm_unreachable("FIXME: Not yet implemented!"); + + case MCStreamer::Global: + getSymbolData(*Symbol).setExternal(true); + break; + } } void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { diff --git a/test/MC/MachO/symbols-1.s b/test/MC/MachO/symbols-1.s new file mode 100644 index 00000000000..2cf1311eb32 --- /dev/null +++ b/test/MC/MachO/symbols-1.s @@ -0,0 +1,160 @@ +// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s + +sym_local_B: +.globl sym_globl_def_B +.globl sym_globl_undef_B +sym_local_A: +.globl sym_globl_def_A +.globl sym_globl_undef_A +sym_local_C: +.globl sym_globl_def_C +.globl sym_globl_undef_C + +sym_globl_def_A: +sym_globl_def_B: +sym_globl_def_C: + .long 0 + +// CHECK: ('cputype', 7) +// CHECK: ('cpusubtype', 3) +// CHECK: ('filetype', 1) +// CHECK: ('num_load_commands', 1) +// CHECK: ('load_commands_size', 228) +// CHECK: ('flag', 0) +// CHECK: ('load_commands', [ +// CHECK: # Load Command 0 +// CHECK: (('command', 1) +// CHECK: ('size', 124) +// CHECK: ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('vm_addr', 0) +// CHECK: ('vm_size', 4) +// CHECK: ('file_offset', 256) +// CHECK: ('file_size', 4) +// CHECK: ('maxprot', 7) +// CHECK: ('initprot', 7) +// CHECK: ('num_sections', 1) +// CHECK: ('flags', 0) +// CHECK: ('sections', [ +// CHECK: # Section 0 +// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00') +// CHECK: ('address', 0) +// CHECK: ('size', 4) +// CHECK: ('offset', 256) +// CHECK: ('alignment', 0) +// CHECK: ('reloc_offset', 0) +// CHECK: ('num_reloc', 0) +// CHECK: ('flags', 0x80000000) +// CHECK: ('reserved1', 0) +// CHECK: ('reserved2', 0) +// CHECK: ), +// CHECK: ]) +// CHECK: ), +// CHECK: # Load Command 1 +// CHECK: (('command', 2) +// CHECK: ('size', 24) +// CHECK: ('symoff', 260) +// CHECK: ('nsyms', 9) +// CHECK: ('stroff', 368) +// CHECK: ('strsize', 140) +// CHECK: ('_string_data', '\x00sym_globl_def_B\x00sym_globl_undef_B\x00sym_globl_def_A\x00sym_globl_undef_A\x00sym_globl_def_C\x00sym_globl_undef_C\x00sym_local_B\x00sym_local_A\x00sym_local_C\x00\x00') +// CHECK: ('_symbols', [ +// CHECK: # Symbol 0 +// CHECK: (('n_strx', 103) +// CHECK: ('n_type', 0xe) +// CHECK: ('n_sect', 1) +// CHECK: ('n_desc', 0) +// CHECK: ('n_value', 0) +// CHECK: ('_string', 'sym_local_B') +// CHECK: ), +// CHECK: # Symbol 1 +// CHECK: (('n_strx', 115) +// CHECK: ('n_type', 0xe) +// CHECK: ('n_sect', 1) +// CHECK: ('n_desc', 0) +// CHECK: ('n_value', 0) +// CHECK: ('_string', 'sym_local_A') +// CHECK: ), +// CHECK: # Symbol 2 +// CHECK: (('n_strx', 127) +// CHECK: ('n_type', 0xe) +// CHECK: ('n_sect', 1) +// CHECK: ('n_desc', 0) +// CHECK: ('n_value', 0) +// CHECK: ('_string', 'sym_local_C') +// CHECK: ), +// CHECK: # Symbol 3 +// CHECK: (('n_strx', 35) +// CHECK: ('n_type', 0xf) +// CHECK: ('n_sect', 1) +// CHECK: ('n_desc', 0) +// CHECK: ('n_value', 0) +// CHECK: ('_string', 'sym_globl_def_A') +// CHECK: ), +// CHECK: # Symbol 4 +// CHECK: (('n_strx', 1) +// CHECK: ('n_type', 0xf) +// CHECK: ('n_sect', 1) +// CHECK: ('n_desc', 0) +// CHECK: ('n_value', 0) +// CHECK: ('_string', 'sym_globl_def_B') +// CHECK: ), +// CHECK: # Symbol 5 +// CHECK: (('n_strx', 69) +// CHECK: ('n_type', 0xf) +// CHECK: ('n_sect', 1) +// CHECK: ('n_desc', 0) +// CHECK: ('n_value', 0) +// CHECK: ('_string', 'sym_globl_def_C') +// CHECK: ), +// CHECK: # Symbol 6 +// CHECK: (('n_strx', 51) +// CHECK: ('n_type', 0x1) +// CHECK: ('n_sect', 0) +// CHECK: ('n_desc', 0) +// CHECK: ('n_value', 0) +// CHECK: ('_string', 'sym_globl_undef_A') +// CHECK: ), +// CHECK: # Symbol 7 +// CHECK: (('n_strx', 17) +// CHECK: ('n_type', 0x1) +// CHECK: ('n_sect', 0) +// CHECK: ('n_desc', 0) +// CHECK: ('n_value', 0) +// CHECK: ('_string', 'sym_globl_undef_B') +// CHECK: ), +// CHECK: # Symbol 8 +// CHECK: (('n_strx', 85) +// CHECK: ('n_type', 0x1) +// CHECK: ('n_sect', 0) +// CHECK: ('n_desc', 0) +// CHECK: ('n_value', 0) +// CHECK: ('_string', 'sym_globl_undef_C') +// CHECK: ), +// CHECK: ]) +// CHECK: ), +// CHECK: # Load Command 2 +// CHECK: (('command', 11) +// CHECK: ('size', 80) +// CHECK: ('ilocalsym', 0) +// CHECK: ('nlocalsym', 3) +// CHECK: ('iextdefsym', 3) +// CHECK: ('nextdefsym', 3) +// CHECK: ('iundefsym', 6) +// CHECK: ('nundefsym', 3) +// CHECK: ('tocoff', 0) +// CHECK: ('ntoc', 0) +// CHECK: ('modtaboff', 0) +// CHECK: ('nmodtab', 0) +// CHECK: ('extrefsymoff', 0) +// CHECK: ('nextrefsyms', 0) +// CHECK: ('indirectsymoff', 0) +// CHECK: ('nindirectsyms', 0) +// CHECK: ('extreloff', 0) +// CHECK: ('nextrel', 0) +// CHECK: ('locreloff', 0) +// CHECK: ('nlocrel', 0) +// CHECK: ('_indirect_symbols', [ +// CHECK: ]) +// CHECK: ), +// CHECK: ])