diff --git a/lib/CodeGen/MachO.h b/lib/CodeGen/MachO.h new file mode 100644 index 00000000000..bd9bd61e9ed --- /dev/null +++ b/lib/CodeGen/MachO.h @@ -0,0 +1,423 @@ +//=== MachO.h - Mach-O structures and constants -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines MachO . +// +//===----------------------------------------------------------------------===// + +#ifndef MACHO_H +#define MACHO_H + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/Target/TargetAsmInfo.h" +#include +#include + +namespace llvm { + +typedef std::vector DataBuffer; + +/// MachOSym - This struct contains information about each symbol that is +/// added to logical symbol table for the module. This is eventually +/// turned into a real symbol table in the file. +struct MachOSym { + const GlobalValue *GV; // The global value this corresponds to. + std::string GVName; // The mangled name of the global value. + uint32_t n_strx; // index into the string table + uint8_t n_type; // type flag + uint8_t n_sect; // section number or NO_SECT + int16_t n_desc; // see + uint64_t n_value; // value for this symbol (or stab offset) + + // Constants for the n_sect field + // see + enum { NO_SECT = 0 }; // symbol is not in any section + + // Constants for the n_type field + // see + enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT + N_ABS = 0x2, // absolute, n_sect == NO_SECT + N_SECT = 0xe, // defined in section number n_sect + N_PBUD = 0xc, // prebound undefined (defined in a dylib) + N_INDR = 0xa // indirect + }; + // The following bits are OR'd into the types above. For example, a type + // of 0x0f would be an external N_SECT symbol (0x0e | 0x01). + enum { N_EXT = 0x01, // external symbol bit + N_PEXT = 0x10 // private external symbol bit + }; + + // Constants for the n_desc field + // see + enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0, + REFERENCE_FLAG_UNDEFINED_LAZY = 1, + REFERENCE_FLAG_DEFINED = 2, + REFERENCE_FLAG_PRIVATE_DEFINED = 3, + REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4, + REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5 + }; + enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped + N_WEAK_REF = 0x0040, // symbol is weak referenced + N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition + }; + + MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, + const TargetAsmInfo *TAI); + + struct SymCmp { + // FIXME: this does not appear to be sorting 'f' after 'F' + bool operator()(const MachOSym &LHS, const MachOSym &RHS) { + return LHS.GVName < RHS.GVName; + } + }; + + + /// PartitionByLocal - Simple boolean predicate that returns true if Sym is + /// a local symbol rather than an external symbol. + + static inline bool PartitionByLocal(const MachOSym &Sym) { + return (Sym.n_type & (MachOSym::N_EXT | MachOSym::N_PEXT)) == 0; + } + + /// PartitionByDefined - Simple boolean predicate that returns true if Sym is + /// defined in this module. + + static inline bool PartitionByDefined(const MachOSym &Sym) { + // FIXME: Do N_ABS or N_INDR count as defined? + return (Sym.n_type & MachOSym::N_SECT) == MachOSym::N_SECT; + } + +}; // end struct MachOSym + +/// MachOHeader - This struct contains the header information about a +/// specific architecture type/subtype pair that is emitted to the file. + +struct MachOHeader { + uint32_t magic; // mach magic number identifier + uint32_t filetype; // type of file + uint32_t ncmds; // number of load commands + uint32_t sizeofcmds; // the size of all the load commands + uint32_t flags; // flags + uint32_t reserved; // 64-bit only + + /// HeaderData - The actual data for the header which we are building + /// up for emission to the file. + DataBuffer HeaderData; + + // Constants for the filetype field + // see for additional info on the various types + enum { MH_OBJECT = 1, // relocatable object file + MH_EXECUTE = 2, // demand paged executable file + MH_FVMLIB = 3, // fixed VM shared library file + MH_CORE = 4, // core file + MH_PRELOAD = 5, // preloaded executable file + MH_DYLIB = 6, // dynamically bound shared library + MH_DYLINKER = 7, // dynamic link editor + MH_BUNDLE = 8, // dynamically bound bundle file + MH_DYLIB_STUB = 9, // shared library stub for static linking only + MH_DSYM = 10 // companion file wiht only debug sections + }; + + // Constants for the flags field + enum { MH_NOUNDEFS = 1 << 0, + // the object file has no undefined references + MH_INCRLINK = 1 << 1, + // the object file is the output of an incremental link against + // a base file and cannot be link edited again + MH_DYLDLINK = 1 << 2, + // the object file is input for the dynamic linker and cannot be + // statically link edited again. + MH_BINDATLOAD = 1 << 3, + // the object file's undefined references are bound by the + // dynamic linker when loaded. + MH_PREBOUND = 1 << 4, + // the file has its dynamic undefined references prebound + MH_SPLIT_SEGS = 1 << 5, + // the file has its read-only and read-write segments split + // see + MH_LAZY_INIT = 1 << 6, + // the shared library init routine is to be run lazily via + // catching memory faults to its writable segments (obsolete) + MH_TWOLEVEL = 1 << 7, + // the image is using two-level namespace bindings + MH_FORCE_FLAT = 1 << 8, + // the executable is forcing all images to use flat namespace + // bindings. + MH_NOMULTIDEFS = 1 << 8, + // this umbrella guarantees no multiple definitions of symbols + // in its sub-images so the two-level namespace hints can + // always be used. + MH_NOFIXPREBINDING = 1 << 10, + // do not have dyld notify the prebidning agent about this + // executable. + MH_PREBINDABLE = 1 << 11, + // the binary is not prebound but can have its prebinding + // redone. only used when MH_PREBOUND is not set. + MH_ALLMODSBOUND = 1 << 12, + // indicates that this binary binds to all two-level namespace + // modules of its dependent libraries. Only used when + // MH_PREBINDABLE and MH_TWOLEVEL are both set. + MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13, + // safe to divide up the sections into sub-sections via symbols + // for dead code stripping. + MH_CANONICAL = 1 << 14, + // the binary has been canonicalized via the unprebind operation + MH_WEAK_DEFINES = 1 << 15, + // the final linked image contains external weak symbols + MH_BINDS_TO_WEAK = 1 << 16, + // the final linked image uses weak symbols + MH_ALLOW_STACK_EXECUTION = 1 << 17 + // When this bit is set, all stacks in the task will be given + // stack execution privilege. Only used in MH_EXECUTE filetype + }; + + MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0), + reserved(0) { } + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 8 * sizeof(uint32_t); + else + return 7 * sizeof(uint32_t); + } + + /// setMagic - This routine sets the appropriate value for the 'magic' + /// field based on pointer size and endianness. + void setMagic(bool isLittleEndian, bool is64Bit) { + if (isLittleEndian) + if (is64Bit) magic = 0xcffaedfe; + else magic = 0xcefaedfe; + else + if (is64Bit) magic = 0xfeedfacf; + else magic = 0xfeedface; + } + +}; // end struct MachOHeader + +/// MachOSegment - This struct contains the necessary information to +/// emit the load commands for each section in the file. +struct MachOSegment { + uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64 + uint32_t cmdsize; // Total size of this struct and section commands + std::string segname; // segment name + uint64_t vmaddr; // address of this segment + uint64_t vmsize; // size of this segment, may be larger than filesize + uint64_t fileoff; // offset in file + uint64_t filesize; // amount to read from file + uint32_t maxprot; // maximum VM protection + uint32_t initprot; // initial VM protection + uint32_t nsects; // number of sections in this segment + uint32_t flags; // flags + + // The following constants are getting pulled in by one of the + // system headers, which creates a neat clash with the enum. +#if !defined(VM_PROT_NONE) +#define VM_PROT_NONE 0x00 +#endif +#if !defined(VM_PROT_READ) +#define VM_PROT_READ 0x01 +#endif +#if !defined(VM_PROT_WRITE) +#define VM_PROT_WRITE 0x02 +#endif +#if !defined(VM_PROT_EXECUTE) +#define VM_PROT_EXECUTE 0x04 +#endif +#if !defined(VM_PROT_ALL) +#define VM_PROT_ALL 0x07 +#endif + + // Constants for the vm protection fields + // see + enum { SEG_VM_PROT_NONE = VM_PROT_NONE, + SEG_VM_PROT_READ = VM_PROT_READ, // read permission + SEG_VM_PROT_WRITE = VM_PROT_WRITE, // write permission + SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE, + SEG_VM_PROT_ALL = VM_PROT_ALL + }; + + // Constants for the cmd field + // see + enum { LC_SEGMENT = 0x01, // segment of this file to be mapped + LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped + }; + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16; + else + return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits + } + + MachOSegment(const std::string &seg, bool is64Bit) + : cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg), + vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL), + initprot(VM_PROT_ALL), nsects(0), flags(0) { } +}; + +/// MachOSection - This struct contains information about each section in a +/// particular segment that is emitted to the file. This is eventually +/// turned into the SectionCommand in the load command for a particlar +/// segment. + +struct MachOSection { + std::string sectname; // name of this section, + std::string segname; // segment this section goes in + uint64_t addr; // memory address of this section + uint64_t size; // size in bytes of this section + uint32_t offset; // file offset of this section + uint32_t align; // section alignment (power of 2) + uint32_t reloff; // file offset of relocation entries + uint32_t nreloc; // number of relocation entries + uint32_t flags; // flags (section type and attributes) + uint32_t reserved1; // reserved (for offset or index) + uint32_t reserved2; // reserved (for count or sizeof) + uint32_t reserved3; // reserved (64 bit only) + + /// A unique number for this section, which will be used to match symbols + /// to the correct section. + uint32_t Index; + + /// SectionData - The actual data for this section which we are building + /// up for emission to the file. + DataBuffer SectionData; + + /// RelocBuffer - A buffer to hold the mach-o relocations before we write + /// them out at the appropriate location in the file. + DataBuffer RelocBuffer; + + /// Relocations - The relocations that we have encountered so far in this + /// section that we will need to convert to MachORelocation entries when + /// the file is written. + std::vector Relocations; + + // Constants for the section types (low 8 bits of flags field) + // see + enum { S_REGULAR = 0, + // regular section + S_ZEROFILL = 1, + // zero fill on demand section + S_CSTRING_LITERALS = 2, + // section with only literal C strings + S_4BYTE_LITERALS = 3, + // section with only 4 byte literals + S_8BYTE_LITERALS = 4, + // section with only 8 byte literals + S_LITERAL_POINTERS = 5, + // section with only pointers to literals + S_NON_LAZY_SYMBOL_POINTERS = 6, + // section with only non-lazy symbol pointers + S_LAZY_SYMBOL_POINTERS = 7, + // section with only lazy symbol pointers + S_SYMBOL_STUBS = 8, + // section with only symbol stubs + // byte size of stub in the reserved2 field + S_MOD_INIT_FUNC_POINTERS = 9, + // section with only function pointers for initialization + S_MOD_TERM_FUNC_POINTERS = 10, + // section with only function pointers for termination + S_COALESCED = 11, + // section contains symbols that are coalesced + S_GB_ZEROFILL = 12, + // zero fill on demand section (that can be larger than 4GB) + S_INTERPOSING = 13, + // section with only pairs of function pointers for interposing + S_16BYTE_LITERALS = 14 + // section with only 16 byte literals + }; + + // Constants for the section flags (high 24 bits of flags field) + // see + enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31, + // section contains only true machine instructions + S_ATTR_NO_TOC = 1 << 30, + // section contains coalesced symbols that are not to be in a + // ranlib table of contents + S_ATTR_STRIP_STATIC_SYMS = 1 << 29, + // ok to strip static symbols in this section in files with the + // MY_DYLDLINK flag + S_ATTR_NO_DEAD_STRIP = 1 << 28, + // no dead stripping + S_ATTR_LIVE_SUPPORT = 1 << 27, + // blocks are live if they reference live blocks + S_ATTR_SELF_MODIFYING_CODE = 1 << 26, + // used with i386 code stubs written on by dyld + S_ATTR_DEBUG = 1 << 25, + // a debug section + S_ATTR_SOME_INSTRUCTIONS = 1 << 10, + // section contains some machine instructions + S_ATTR_EXT_RELOC = 1 << 9, + // section has external relocation entries + S_ATTR_LOC_RELOC = 1 << 8 + // section has local relocation entries + }; + + /// cmdSize - This routine returns the size of the MachOSection as written + /// to disk, depending on whether the destination is a 64 bit Mach-O file. + unsigned cmdSize(bool is64Bit) const { + if (is64Bit) + return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32; + else + return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits + } + + MachOSection(const std::string &seg, const std::string §) + : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2), + reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0), + reserved3(0) { } + +}; // end struct MachOSection + + /// MachOSymTab - This struct contains information about the offsets and + /// size of symbol table information. + /// segment. + struct MachODySymTab { + uint32_t cmd; // LC_DYSYMTAB + uint32_t cmdsize; // sizeof( MachODySymTab ) + uint32_t ilocalsym; // index to local symbols + uint32_t nlocalsym; // number of local symbols + uint32_t iextdefsym; // index to externally defined symbols + uint32_t nextdefsym; // number of externally defined symbols + uint32_t iundefsym; // index to undefined symbols + uint32_t nundefsym; // number of undefined symbols + uint32_t tocoff; // file offset to table of contents + uint32_t ntoc; // number of entries in table of contents + uint32_t modtaboff; // file offset to module table + uint32_t nmodtab; // number of module table entries + uint32_t extrefsymoff; // offset to referenced symbol table + uint32_t nextrefsyms; // number of referenced symbol table entries + uint32_t indirectsymoff; // file offset to the indirect symbol table + uint32_t nindirectsyms; // number of indirect symbol table entries + uint32_t extreloff; // offset to external relocation entries + uint32_t nextrel; // number of external relocation entries + uint32_t locreloff; // offset to local relocation entries + uint32_t nlocrel; // number of local relocation entries + + // Constants for the cmd field + // see + enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info + }; + + MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)), + ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0), + iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0), + nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0), + nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { } + }; + +} // end namespace llvm + +#endif + diff --git a/lib/CodeGen/MachOCodeEmitter.cpp b/lib/CodeGen/MachOCodeEmitter.cpp new file mode 100644 index 00000000000..02b02de9ec3 --- /dev/null +++ b/lib/CodeGen/MachOCodeEmitter.cpp @@ -0,0 +1,207 @@ +//===-- MachOEmitter.cpp - Target-independent Mach-O Emitter code --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MachOCodeEmitter.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Support/Mangler.h" +#include "llvm/Support/OutputBuffer.h" + +//===----------------------------------------------------------------------===// +// MachOCodeEmitter Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { + +/// startFunction - This callback is invoked when a new machine function is +/// about to be emitted. + +void MachOCodeEmitter::startFunction(MachineFunction &MF) { + const TargetData *TD = TM.getTargetData(); + const Function *F = MF.getFunction(); + + // Align the output buffer to the appropriate alignment, power of 2. + unsigned FnAlign = F->getAlignment(); + unsigned TDAlign = TD->getPrefTypeAlignment(F->getType()); + unsigned Align = Log2_32(std::max(FnAlign, TDAlign)); + assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); + + // Get the Mach-O Section that this function belongs in. + MachOSection *MOS = MOW.getTextSection(); + + // FIXME: better memory management + MOS->SectionData.reserve(4096); + BufferBegin = &MOS->SectionData[0]; + BufferEnd = BufferBegin + MOS->SectionData.capacity(); + + // Upgrade the section alignment if required. + if (MOS->align < Align) MOS->align = Align; + + // Round the size up to the correct alignment for starting the new function. + if ((MOS->size & ((1 << Align) - 1)) != 0) { + MOS->size += (1 << Align); + MOS->size &= ~((1 << Align) - 1); + } + + // FIXME: Using MOS->size directly here instead of calculating it from the + // output buffer size (impossible because the code emitter deals only in raw + // bytes) forces us to manually synchronize size and write padding zero bytes + // to the output buffer for all non-text sections. For text sections, we do + // not synchonize the output buffer, and we just blow up if anyone tries to + // write non-code to it. An assert should probably be added to + // AddSymbolToSection to prevent calling it on the text section. + CurBufferPtr = BufferBegin + MOS->size; +} + +/// finishFunction - This callback is invoked after the function is completely +/// finished. + +bool MachOCodeEmitter::finishFunction(MachineFunction &MF) { + + // Get the Mach-O Section that this function belongs in. + MachOSection *MOS = MOW.getTextSection(); + + // Get a symbol for the function to add to the symbol table + // FIXME: it seems like we should call something like AddSymbolToSection + // in startFunction rather than changing the section size and symbol n_value + // here. + const GlobalValue *FuncV = MF.getFunction(); + MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TAI); + FnSym.n_value = MOS->size; + MOS->size = CurBufferPtr - BufferBegin; + + // Emit constant pool to appropriate section(s) + emitConstantPool(MF.getConstantPool()); + + // Emit jump tables to appropriate section + emitJumpTables(MF.getJumpTableInfo()); + + // If we have emitted any relocations to function-specific objects such as + // basic blocks, constant pools entries, or jump tables, record their + // addresses now so that we can rewrite them with the correct addresses + // later. + for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { + MachineRelocation &MR = Relocations[i]; + intptr_t Addr; + + if (MR.isBasicBlock()) { + Addr = getMachineBasicBlockAddress(MR.getBasicBlock()); + MR.setConstantVal(MOS->Index); + MR.setResultPointer((void*)Addr); + } else if (MR.isJumpTableIndex()) { + Addr = getJumpTableEntryAddress(MR.getJumpTableIndex()); + MR.setConstantVal(MOW.getJumpTableSection()->Index); + MR.setResultPointer((void*)Addr); + } else if (MR.isConstantPoolIndex()) { + Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex()); + MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]); + MR.setResultPointer((void*)Addr); + } else if (MR.isGlobalValue()) { + // FIXME: This should be a set or something that uniques + MOW.PendingGlobals.push_back(MR.getGlobalValue()); + } else { + assert(0 && "Unhandled relocation type"); + } + MOS->Relocations.push_back(MR); + } + Relocations.clear(); + + // Finally, add it to the symtab. + MOW.SymbolTable.push_back(FnSym); + + // Clear per-function data structures. + CPLocations.clear(); + CPSections.clear(); + JTLocations.clear(); + MBBLocations.clear(); + + return false; +} + +/// emitConstantPool - For each constant pool entry, figure out which section +/// the constant should live in, allocate space for it, and emit it to the +/// Section data buffer. +void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { + const std::vector &CP = MCP->getConstants(); + if (CP.empty()) return; + + // FIXME: handle PIC codegen + assert(TM.getRelocationModel() != Reloc::PIC_ && + "PIC codegen not yet handled for mach-o jump tables!"); + + // Although there is no strict necessity that I am aware of, we will do what + // gcc for OS X does and put each constant pool entry in a section of constant + // objects of a certain size. That means that float constants go in the + // literal4 section, and double objects go in literal8, etc. + // + // FIXME: revisit this decision if we ever do the "stick everything into one + // "giant object for PIC" optimization. + for (unsigned i = 0, e = CP.size(); i != e; ++i) { + const Type *Ty = CP[i].getType(); + unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); + + MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal); + OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + + CPLocations.push_back(Sec->SectionData.size()); + CPSections.push_back(Sec->Index); + + // FIXME: remove when we have unified size + output buffer + Sec->size += Size; + + // Allocate space in the section for the global. + // FIXME: need alignment? + // FIXME: share between here and AddSymbolToSection? + for (unsigned j = 0; j < Size; ++j) + SecDataOut.outbyte(0); + + MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i], + TM.getTargetData(), Sec->Relocations); + } +} + +/// emitJumpTables - Emit all the jump tables for a given jump table info +/// record to the appropriate section. + +void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { + const std::vector &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + // FIXME: handle PIC codegen + assert(TM.getRelocationModel() != Reloc::PIC_ && + "PIC codegen not yet handled for mach-o jump tables!"); + + MachOSection *Sec = MOW.getJumpTableSection(); + unsigned TextSecIndex = MOW.getTextSection()->Index; + OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); + + for (unsigned i = 0, e = JT.size(); i != e; ++i) { + // For each jump table, record its offset from the start of the section, + // reserve space for the relocations to the MBBs, and add the relocations. + const std::vector &MBBs = JT[i].MBBs; + JTLocations.push_back(Sec->SectionData.size()); + for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { + MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(), + MBBs[mi])); + MR.setResultPointer((void *)JTLocations[i]); + MR.setConstantVal(TextSecIndex); + Sec->Relocations.push_back(MR); + SecDataOut.outaddr(0); + } + } + // FIXME: remove when we have unified size + output buffer + Sec->size = Sec->SectionData.size(); +} + +} // end namespace llvm + diff --git a/lib/CodeGen/MachOCodeEmitter.h b/lib/CodeGen/MachOCodeEmitter.h new file mode 100644 index 00000000000..0a6e4e4d19e --- /dev/null +++ b/lib/CodeGen/MachOCodeEmitter.h @@ -0,0 +1,129 @@ +//===-- MachOEmitter.h - Target-independent Mach-O Emitter class ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef MACHOCODEEMITTER_H +#define MACHOCODEEMITTER_H + +#include "MachOWriter.h" +#include "llvm/CodeGen/MachineCodeEmitter.h" +#include + +namespace llvm { + +/// MachOCodeEmitter - This class is used by the MachOWriter to emit the code +/// for functions to the Mach-O file. + +class MachOCodeEmitter : public MachineCodeEmitter { + MachOWriter &MOW; + + /// Target machine description. + TargetMachine &TM; + + /// is64Bit/isLittleEndian - This information is inferred from the target + /// machine directly, indicating what header values and flags to set. + bool is64Bit, isLittleEndian; + + const TargetAsmInfo *TAI; + + /// Relocations - These are the relocations that the function needs, as + /// emitted. + std::vector Relocations; + + /// CPLocations - This is a map of constant pool indices to offsets from the + /// start of the section for that constant pool index. + std::vector CPLocations; + + /// CPSections - This is a map of constant pool indices to the MachOSection + /// containing the constant pool entry for that index. + std::vector CPSections; + + /// JTLocations - This is a map of jump table indices to offsets from the + /// start of the section for that jump table index. + std::vector JTLocations; + + /// MBBLocations - This vector is a mapping from MBB ID's to their address. + /// It is filled in by the StartMachineBasicBlock callback and queried by + /// the getMachineBasicBlockAddress callback. + std::vector MBBLocations; + +public: + MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM) + { + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); + TAI = TM.getTargetAsmInfo(); + } + + virtual void startFunction(MachineFunction &MF); + virtual bool finishFunction(MachineFunction &MF); + + virtual void addRelocation(const MachineRelocation &MR) { + Relocations.push_back(MR); + } + + void emitConstantPool(MachineConstantPool *MCP); + void emitJumpTables(MachineJumpTableInfo *MJTI); + + virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { + assert(CPLocations.size() > Index && "CP not emitted!"); + return CPLocations[Index]; + } + virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { + assert(JTLocations.size() > Index && "JT not emitted!"); + return JTLocations[Index]; + } + + virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { + if (MBBLocations.size() <= (unsigned)MBB->getNumber()) + MBBLocations.resize((MBB->getNumber()+1)*2); + MBBLocations[MBB->getNumber()] = getCurrentPCOffset(); + } + + virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + assert(MBBLocations.size() > (unsigned)MBB->getNumber() && + MBBLocations[MBB->getNumber()] && "MBB not emitted!"); + return MBBLocations[MBB->getNumber()]; + } + + virtual uintptr_t getLabelAddress(uint64_t Label) const { + assert(0 && "get Label not implemented"); + abort(); + return 0; + } + + virtual void emitLabel(uint64_t LabelID) { + assert(0 && "emit Label not implemented"); + abort(); + } + + virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { } + + /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE! + virtual void startGVStub(const GlobalValue* F, unsigned StubSize, + unsigned Alignment = 1) { + assert(0 && "JIT specific function called!"); + abort(); + } + virtual void startGVStub(const GlobalValue* F, void *Buffer, + unsigned StubSize) { + assert(0 && "JIT specific function called!"); + abort(); + } + virtual void *finishGVStub(const GlobalValue* F) { + assert(0 && "JIT specific function called!"); + abort(); + return 0; + } + +}; // end class MachOCodeEmitter + +} // end namespace llvm + +#endif + diff --git a/lib/CodeGen/MachOWriter.cpp b/lib/CodeGen/MachOWriter.cpp index 43326272c1b..163df6994aa 100644 --- a/lib/CodeGen/MachOWriter.cpp +++ b/lib/CodeGen/MachOWriter.cpp @@ -23,6 +23,7 @@ //===----------------------------------------------------------------------===// #include "MachOWriter.h" +#include "MachOCodeEmitter.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" @@ -40,11 +41,12 @@ #include "llvm/Support/raw_ostream.h" #include #include -using namespace llvm; + +namespace llvm { /// AddMachOWriter - Concrete function to add the Mach-O writer to the function /// pass manager. -MachineCodeEmitter *llvm::AddMachOWriter(PassManagerBase &PM, +MachineCodeEmitter *AddMachOWriter(PassManagerBase &PM, raw_ostream &O, TargetMachine &TM) { MachOWriter *MOW = new MachOWriter(O, TM); @@ -52,300 +54,21 @@ MachineCodeEmitter *llvm::AddMachOWriter(PassManagerBase &PM, return &MOW->getMachineCodeEmitter(); } -//===----------------------------------------------------------------------===// -// MachOCodeEmitter Implementation -//===----------------------------------------------------------------------===// - -namespace llvm { - /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code - /// for functions to the Mach-O file. - class MachOCodeEmitter : public MachineCodeEmitter { - MachOWriter &MOW; - - /// Target machine description. - TargetMachine &TM; - - /// is64Bit/isLittleEndian - This information is inferred from the target - /// machine directly, indicating what header values and flags to set. - bool is64Bit, isLittleEndian; - - /// Relocations - These are the relocations that the function needs, as - /// emitted. - std::vector Relocations; - - /// CPLocations - This is a map of constant pool indices to offsets from the - /// start of the section for that constant pool index. - std::vector CPLocations; - - /// CPSections - This is a map of constant pool indices to the MachOSection - /// containing the constant pool entry for that index. - std::vector CPSections; - - /// JTLocations - This is a map of jump table indices to offsets from the - /// start of the section for that jump table index. - std::vector JTLocations; - - /// MBBLocations - This vector is a mapping from MBB ID's to their address. - /// It is filled in by the StartMachineBasicBlock callback and queried by - /// the getMachineBasicBlockAddress callback. - std::vector MBBLocations; - - public: - MachOCodeEmitter(MachOWriter &mow) : MOW(mow), TM(MOW.TM) { - is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; - isLittleEndian = TM.getTargetData()->isLittleEndian(); - } - - virtual void startFunction(MachineFunction &MF); - virtual bool finishFunction(MachineFunction &MF); - - virtual void addRelocation(const MachineRelocation &MR) { - Relocations.push_back(MR); - } - - void emitConstantPool(MachineConstantPool *MCP); - void emitJumpTables(MachineJumpTableInfo *MJTI); - - virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const { - assert(CPLocations.size() > Index && "CP not emitted!"); - return CPLocations[Index]; - } - virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const { - assert(JTLocations.size() > Index && "JT not emitted!"); - return JTLocations[Index]; - } - - virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { - if (MBBLocations.size() <= (unsigned)MBB->getNumber()) - MBBLocations.resize((MBB->getNumber()+1)*2); - MBBLocations[MBB->getNumber()] = getCurrentPCOffset(); - } - - virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { - assert(MBBLocations.size() > (unsigned)MBB->getNumber() && - MBBLocations[MBB->getNumber()] && "MBB not emitted!"); - return MBBLocations[MBB->getNumber()]; - } - - virtual uintptr_t getLabelAddress(uint64_t Label) const { - assert(0 && "get Label not implemented"); - abort(); - return 0; - } - - virtual void emitLabel(uint64_t LabelID) { - assert(0 && "emit Label not implemented"); - abort(); - } - - - virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { } - - /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE! - virtual void startGVStub(const GlobalValue* F, unsigned StubSize, - unsigned Alignment = 1) { - assert(0 && "JIT specific function called!"); - abort(); - } - virtual void startGVStub(const GlobalValue* F, void *Buffer, - unsigned StubSize) { - assert(0 && "JIT specific function called!"); - abort(); - } - virtual void *finishGVStub(const GlobalValue* F) { - assert(0 && "JIT specific function called!"); - abort(); - return 0; - } - }; -} - -/// startFunction - This callback is invoked when a new machine function is -/// about to be emitted. -void MachOCodeEmitter::startFunction(MachineFunction &MF) { - const TargetData *TD = TM.getTargetData(); - const Function *F = MF.getFunction(); - - // Align the output buffer to the appropriate alignment, power of 2. - unsigned FnAlign = F->getAlignment(); - unsigned TDAlign = TD->getPrefTypeAlignment(F->getType()); - unsigned Align = Log2_32(std::max(FnAlign, TDAlign)); - assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); - - // Get the Mach-O Section that this function belongs in. - MachOWriter::MachOSection *MOS = MOW.getTextSection(); - - // FIXME: better memory management - MOS->SectionData.reserve(4096); - BufferBegin = &MOS->SectionData[0]; - BufferEnd = BufferBegin + MOS->SectionData.capacity(); - - // Upgrade the section alignment if required. - if (MOS->align < Align) MOS->align = Align; - - // Round the size up to the correct alignment for starting the new function. - if ((MOS->size & ((1 << Align) - 1)) != 0) { - MOS->size += (1 << Align); - MOS->size &= ~((1 << Align) - 1); - } - - // FIXME: Using MOS->size directly here instead of calculating it from the - // output buffer size (impossible because the code emitter deals only in raw - // bytes) forces us to manually synchronize size and write padding zero bytes - // to the output buffer for all non-text sections. For text sections, we do - // not synchonize the output buffer, and we just blow up if anyone tries to - // write non-code to it. An assert should probably be added to - // AddSymbolToSection to prevent calling it on the text section. - CurBufferPtr = BufferBegin + MOS->size; - - // Clear per-function data structures. - CPLocations.clear(); - CPSections.clear(); - JTLocations.clear(); - MBBLocations.clear(); -} - -/// finishFunction - This callback is invoked after the function is completely -/// finished. -bool MachOCodeEmitter::finishFunction(MachineFunction &MF) { - // Get the Mach-O Section that this function belongs in. - MachOWriter::MachOSection *MOS = MOW.getTextSection(); - - // Get a symbol for the function to add to the symbol table - // FIXME: it seems like we should call something like AddSymbolToSection - // in startFunction rather than changing the section size and symbol n_value - // here. - const GlobalValue *FuncV = MF.getFunction(); - MachOSym FnSym(FuncV, MOW.Mang->getValueName(FuncV), MOS->Index, TM); - FnSym.n_value = MOS->size; - MOS->size = CurBufferPtr - BufferBegin; - - // Emit constant pool to appropriate section(s) - emitConstantPool(MF.getConstantPool()); - - // Emit jump tables to appropriate section - emitJumpTables(MF.getJumpTableInfo()); - - // If we have emitted any relocations to function-specific objects such as - // basic blocks, constant pools entries, or jump tables, record their - // addresses now so that we can rewrite them with the correct addresses - // later. - for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { - MachineRelocation &MR = Relocations[i]; - intptr_t Addr; - - if (MR.isBasicBlock()) { - Addr = getMachineBasicBlockAddress(MR.getBasicBlock()); - MR.setConstantVal(MOS->Index); - MR.setResultPointer((void*)Addr); - } else if (MR.isJumpTableIndex()) { - Addr = getJumpTableEntryAddress(MR.getJumpTableIndex()); - MR.setConstantVal(MOW.getJumpTableSection()->Index); - MR.setResultPointer((void*)Addr); - } else if (MR.isConstantPoolIndex()) { - Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex()); - MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]); - MR.setResultPointer((void*)Addr); - } else if (MR.isGlobalValue()) { - // FIXME: This should be a set or something that uniques - MOW.PendingGlobals.push_back(MR.getGlobalValue()); - } else { - assert(0 && "Unhandled relocation type"); - } - MOS->Relocations.push_back(MR); - } - Relocations.clear(); - - // Finally, add it to the symtab. - MOW.SymbolTable.push_back(FnSym); - return false; -} - -/// emitConstantPool - For each constant pool entry, figure out which section -/// the constant should live in, allocate space for it, and emit it to the -/// Section data buffer. -void MachOCodeEmitter::emitConstantPool(MachineConstantPool *MCP) { - const std::vector &CP = MCP->getConstants(); - if (CP.empty()) return; - - // FIXME: handle PIC codegen - assert(TM.getRelocationModel() != Reloc::PIC_ && - "PIC codegen not yet handled for mach-o jump tables!"); - - // Although there is no strict necessity that I am aware of, we will do what - // gcc for OS X does and put each constant pool entry in a section of constant - // objects of a certain size. That means that float constants go in the - // literal4 section, and double objects go in literal8, etc. - // - // FIXME: revisit this decision if we ever do the "stick everything into one - // "giant object for PIC" optimization. - for (unsigned i = 0, e = CP.size(); i != e; ++i) { - const Type *Ty = CP[i].getType(); - unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); - - MachOWriter::MachOSection *Sec = MOW.getConstSection(CP[i].Val.ConstVal); - OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); - - CPLocations.push_back(Sec->SectionData.size()); - CPSections.push_back(Sec->Index); - - // FIXME: remove when we have unified size + output buffer - Sec->size += Size; - - // Allocate space in the section for the global. - // FIXME: need alignment? - // FIXME: share between here and AddSymbolToSection? - for (unsigned j = 0; j < Size; ++j) - SecDataOut.outbyte(0); - - MOW.InitMem(CP[i].Val.ConstVal, &Sec->SectionData[0], CPLocations[i], - TM.getTargetData(), Sec->Relocations); - } -} - -/// emitJumpTables - Emit all the jump tables for a given jump table info -/// record to the appropriate section. -void MachOCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) { - const std::vector &JT = MJTI->getJumpTables(); - if (JT.empty()) return; - - // FIXME: handle PIC codegen - assert(TM.getRelocationModel() != Reloc::PIC_ && - "PIC codegen not yet handled for mach-o jump tables!"); - - MachOWriter::MachOSection *Sec = MOW.getJumpTableSection(); - unsigned TextSecIndex = MOW.getTextSection()->Index; - OutputBuffer SecDataOut(Sec->SectionData, is64Bit, isLittleEndian); - - for (unsigned i = 0, e = JT.size(); i != e; ++i) { - // For each jump table, record its offset from the start of the section, - // reserve space for the relocations to the MBBs, and add the relocations. - const std::vector &MBBs = JT[i].MBBs; - JTLocations.push_back(Sec->SectionData.size()); - for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) { - MachineRelocation MR(MOW.GetJTRelocation(Sec->SectionData.size(), - MBBs[mi])); - MR.setResultPointer((void *)JTLocations[i]); - MR.setConstantVal(TextSecIndex); - Sec->Relocations.push_back(MR); - SecDataOut.outaddr(0); - } - } - // FIXME: remove when we have unified size + output buffer - Sec->size = Sec->SectionData.size(); -} - //===----------------------------------------------------------------------===// // MachOWriter Implementation //===----------------------------------------------------------------------===// char MachOWriter::ID = 0; -MachOWriter::MachOWriter(raw_ostream &o, TargetMachine &tm) + +MachOWriter::MachOWriter(raw_ostream &o, TargetMachine &tm) : MachineFunctionPass(&ID), O(o), TM(tm) { is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; isLittleEndian = TM.getTargetData()->isLittleEndian(); + TAI = TM.getTargetAsmInfo(); + // Create the machine code emitter object for this target. + MCE = new MachOCodeEmitter(*this); } @@ -353,6 +76,55 @@ MachOWriter::~MachOWriter() { delete MCE; } +bool MachOWriter::doInitialization(Module &M) { + // Set the magic value, now that we know the pointer size and endianness + Header.setMagic(isLittleEndian, is64Bit); + + // Set the file type + // FIXME: this only works for object files, we do not support the creation + // of dynamic libraries or executables at this time. + Header.filetype = MachOHeader::MH_OBJECT; + + Mang = new Mangler(M); + return false; +} + +bool MachOWriter::runOnMachineFunction(MachineFunction &MF) { + return false; +} + +/// doFinalization - Now that the module has been completely processed, emit +/// the Mach-O file to 'O'. +bool MachOWriter::doFinalization(Module &M) { + // FIXME: we don't handle debug info yet, we should probably do that. + + // Okay, the.text section has been completed, build the .data, .bss, and + // "common" sections next. + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) + EmitGlobal(I); + + // Emit the header and load commands. + EmitHeaderAndLoadCommands(); + + // Emit the various sections and their relocation info. + EmitSections(); + EmitRelocations(); + + // Write the symbol table and the string table to the end of the file. + O.write((char*)&SymT[0], SymT.size()); + O.write((char*)&StrT[0], StrT.size()); + + // We are done with the abstract symbols. + SectionList.clear(); + SymbolTable.clear(); + DynamicSymbolTable.clear(); + + // Release the name mangler object. + delete Mang; Mang = 0; + return false; +} + void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) { const Type *Ty = GV->getType()->getElementType(); unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); @@ -368,7 +140,7 @@ void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) { Align = Log2_32(Align); Sec->align = std::max(unsigned(Sec->align), Align); Sec->size = (Sec->size + Align - 1) & ~(Align-1); - + // Add alignment padding to buffer as well. // FIXME: remove when we have unified size + output buffer unsigned AlignedSize = Sec->size - OrigSize; @@ -377,7 +149,7 @@ void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) { } // Globals without external linkage apparently do not go in the symbol table. if (!GV->hasLocalLinkage()) { - MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TM); + MachOSym Sym(GV, Mang->getValueName(GV), Sec->Index, TAI); Sym.n_value = Sec->size; SymbolTable.push_back(Sym); } @@ -385,14 +157,14 @@ void MachOWriter::AddSymbolToSection(MachOSection *Sec, GlobalVariable *GV) { // Record the offset of the symbol, and then allocate space for it. // FIXME: remove when we have unified size + output buffer Sec->size += Size; - - // Now that we know what section the GlovalVariable is going to be emitted + + // Now that we know what section the GlovalVariable is going to be emitted // into, update our mappings. // FIXME: We may also need to update this when outputting non-GlobalVariable // GlobalValues such as functions. GVSection[GV] = Sec; GVOffset[GV] = Sec->SectionData.size(); - + // Allocate space in the section for the global. for (unsigned i = 0; i < Size; ++i) SecDataOut.outbyte(0); @@ -402,7 +174,7 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) { const Type *Ty = GV->getType()->getElementType(); unsigned Size = TM.getTargetData()->getTypeAllocSize(Ty); bool NoInit = !GV->hasInitializer(); - + // If this global has a zero initializer, it is part of the .bss or common // section. if (NoInit || GV->getInitializer()->isNullValue()) { @@ -411,7 +183,8 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) { // merged with other symbols. if (NoInit || GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() || GV->hasCommonLinkage()) { - MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), MachOSym::NO_SECT,TM); + MachOSym ExtOrCommonSym(GV, Mang->getValueName(GV), + MachOSym::NO_SECT, TAI); // For undefined (N_UNDF) external (N_EXT) types, n_value is the size in // bytes of the symbol. ExtOrCommonSym.n_value = Size; @@ -425,11 +198,11 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) { AddSymbolToSection(BSS, GV); return; } - + // Scalar read-only data goes in a literal section if the scalar is 4, 8, or // 16 bytes, or a cstring. Other read only data goes into a regular const // section. Read-write data goes in the data section. - MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) : + MachOSection *Sec = GV->isConstant() ? getConstSection(GV->getInitializer()) : getDataSection(); AddSymbolToSection(Sec, GV); InitMem(GV->getInitializer(), &Sec->SectionData[0], GVOffset[GV], @@ -437,73 +210,25 @@ void MachOWriter::EmitGlobal(GlobalVariable *GV) { } -bool MachOWriter::runOnMachineFunction(MachineFunction &MF) { - // Nothing to do here, this is all done through the MCE object. - return false; -} - -bool MachOWriter::doInitialization(Module &M) { - // Set the magic value, now that we know the pointer size and endianness - Header.setMagic(isLittleEndian, is64Bit); - - // Set the file type - // FIXME: this only works for object files, we do not support the creation - // of dynamic libraries or executables at this time. - Header.filetype = MachOHeader::MH_OBJECT; - - Mang = new Mangler(M); - return false; -} - -/// doFinalization - Now that the module has been completely processed, emit -/// the Mach-O file to 'O'. -bool MachOWriter::doFinalization(Module &M) { - // FIXME: we don't handle debug info yet, we should probably do that. - - // Okay, the.text section has been completed, build the .data, .bss, and - // "common" sections next. - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); - I != E; ++I) - EmitGlobal(I); - - // Emit the header and load commands. - EmitHeaderAndLoadCommands(); - - // Emit the various sections and their relocation info. - EmitSections(); - - // Write the symbol table and the string table to the end of the file. - O.write((char*)&SymT[0], SymT.size()); - O.write((char*)&StrT[0], StrT.size()); - - // We are done with the abstract symbols. - SectionList.clear(); - SymbolTable.clear(); - DynamicSymbolTable.clear(); - - // Release the name mangler object. - delete Mang; Mang = 0; - return false; -} void MachOWriter::EmitHeaderAndLoadCommands() { // Step #0: Fill in the segment load command size, since we need it to figure // out the rest of the header fields MachOSegment SEG("", is64Bit); SEG.nsects = SectionList.size(); - SEG.cmdsize = SEG.cmdSize(is64Bit) + + SEG.cmdsize = SEG.cmdSize(is64Bit) + SEG.nsects * SectionList[0]->cmdSize(is64Bit); - + // Step #1: calculate the number of load commands. We always have at least // one, for the LC_SEGMENT load command, plus two for the normal // and dynamic symbol tables, if there are any symbols. Header.ncmds = SymbolTable.empty() ? 1 : 3; - + // Step #2: calculate the size of the load commands Header.sizeofcmds = SEG.cmdsize; if (!SymbolTable.empty()) Header.sizeofcmds += SymTab.cmdsize + DySymTab.cmdsize; - + // Step #3: write the header to the file // Local alias to shortenify coming code. DataBuffer &FH = Header.HeaderData; @@ -518,7 +243,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() { FHOut.outword(Header.flags); if (is64Bit) FHOut.outword(Header.reserved); - + // Step #4: Finish filling in the segment load command and write it out for (std::vector::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) @@ -526,7 +251,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() { SEG.vmsize = SEG.filesize; SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds; - + FHOut.outword(SEG.cmd); FHOut.outword(SEG.cmdsize); FHOut.outstring(SEG.segname, 16); @@ -538,8 +263,8 @@ void MachOWriter::EmitHeaderAndLoadCommands() { FHOut.outword(SEG.initprot); FHOut.outword(SEG.nsects); FHOut.outword(SEG.flags); - - // Step #5: Finish filling in the fields of the MachOSections + + // Step #5: Finish filling in the fields of the MachOSections uint64_t currentAddr = 0; for (std::vector::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) { @@ -550,13 +275,13 @@ void MachOWriter::EmitHeaderAndLoadCommands() { // FIXME: do we need to do something with alignment here? currentAddr += MOS->size; } - + // Step #6: Emit the symbol table to temporary buffers, so that we know the // size of the string table when we write the next load command. This also // sorts and assigns indices to each of the symbols, which is necessary for // emitting relocations to externally-defined objects. BufferSymbolAndStringTable(); - + // Step #7: Calculate the number of relocations for each section and write out // the section commands for each section currentAddr += SEG.fileoff; @@ -568,7 +293,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() { CalculateRelocations(*MOS); MOS->reloff = MOS->nreloc ? currentAddr : 0; currentAddr += MOS->nreloc * 8; - + // write the finalized section command to the output buffer FHOut.outstring(MOS->sectname, 16); FHOut.outstring(MOS->segname, 16); @@ -584,7 +309,7 @@ void MachOWriter::EmitHeaderAndLoadCommands() { if (is64Bit) FHOut.outword(MOS->reserved3); } - + // Step #8: Emit LC_SYMTAB/LC_DYSYMTAB load commands SymTab.symoff = currentAddr; SymTab.nsyms = SymbolTable.size(); @@ -620,94 +345,92 @@ void MachOWriter::EmitHeaderAndLoadCommands() { FHOut.outword(DySymTab.nextrel); FHOut.outword(DySymTab.locreloff); FHOut.outword(DySymTab.nlocrel); - + O.write((char*)&FH[0], FH.size()); } /// EmitSections - Now that we have constructed the file header and load /// commands, emit the data for each section to the file. + void MachOWriter::EmitSections() { for (std::vector::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) // Emit the contents of each section O.write((char*)&(*I)->SectionData[0], (*I)->size); +} +void MachOWriter::EmitRelocations() { for (std::vector::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) // Emit the relocation entry data for each section. O.write((char*)&(*I)->RelocBuffer[0], (*I)->RelocBuffer.size()); } -/// PartitionByLocal - Simple boolean predicate that returns true if Sym is -/// a local symbol rather than an external symbol. -bool MachOWriter::PartitionByLocal(const MachOSym &Sym) { - return (Sym.n_type & (MachOSym::N_EXT | MachOSym::N_PEXT)) == 0; -} - -/// PartitionByDefined - Simple boolean predicate that returns true if Sym is -/// defined in this module. -bool MachOWriter::PartitionByDefined(const MachOSym &Sym) { - // FIXME: Do N_ABS or N_INDR count as defined? - return (Sym.n_type & MachOSym::N_SECT) == MachOSym::N_SECT; -} - /// BufferSymbolAndStringTable - Sort the symbols we encountered and assign them /// each a string table index so that they appear in the correct order in the /// output file. + void MachOWriter::BufferSymbolAndStringTable() { // The order of the symbol table is: // 1. local symbols // 2. defined external symbols (sorted by name) // 3. undefined external symbols (sorted by name) - + // Before sorting the symbols, check the PendingGlobals for any undefined // globals that need to be put in the symbol table. + for (std::vector::iterator I = PendingGlobals.begin(), E = PendingGlobals.end(); I != E; ++I) { if (GVOffset[*I] == 0 && GVSection[*I] == 0) { - MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TM); + MachOSym UndfSym(*I, Mang->getValueName(*I), MachOSym::NO_SECT, TAI); SymbolTable.push_back(UndfSym); GVOffset[*I] = -1; } } - + // Sort the symbols by name, so that when we partition the symbols by scope // of definition, we won't have to sort by name within each partition. - std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSymCmp()); - // Parition the symbol table entries so that all local symbols come before + std::sort(SymbolTable.begin(), SymbolTable.end(), MachOSym::SymCmp()); + + // Parition the symbol table entries so that all local symbols come before // all symbols with external linkage. { 1 | 2 3 } - std::partition(SymbolTable.begin(), SymbolTable.end(), PartitionByLocal); - + + std::partition(SymbolTable.begin(), SymbolTable.end(), + MachOSym::PartitionByLocal); + // Advance iterator to beginning of external symbols and partition so that // all external symbols defined in this module come before all external // symbols defined elsewhere. { 1 | 2 | 3 } + for (std::vector::iterator I = SymbolTable.begin(), E = SymbolTable.end(); I != E; ++I) { - if (!PartitionByLocal(*I)) { - std::partition(I, E, PartitionByDefined); + if (!MachOSym::PartitionByLocal(*I)) { + std::partition(I, E, MachOSym::PartitionByDefined); break; } } - // Calculate the starting index for each of the local, extern defined, and + // Calculate the starting index for each of the local, extern defined, and // undefined symbols, as well as the number of each to put in the LC_DYSYMTAB // load command. + for (std::vector::iterator I = SymbolTable.begin(), E = SymbolTable.end(); I != E; ++I) { - if (PartitionByLocal(*I)) { + if (MachOSym::PartitionByLocal(*I)) { ++DySymTab.nlocalsym; ++DySymTab.iextdefsym; ++DySymTab.iundefsym; - } else if (PartitionByDefined(*I)) { + } else if (MachOSym::PartitionByDefined(*I)) { ++DySymTab.nextdefsym; ++DySymTab.iundefsym; } else { ++DySymTab.nundefsym; } } - + // Write out a leading zero byte when emitting string table, for n_strx == 0 // which means an empty string. + OutputBuffer StrTOut(StrT, is64Bit, isLittleEndian); StrTOut.outbyte(0); @@ -716,6 +439,7 @@ void MachOWriter::BufferSymbolAndStringTable() { // 2. strings for local symbols // Since this is the opposite order from the symbol table, which we have just // sorted, we can walk the symbol table backwards to output the string table. + for (std::vector::reverse_iterator I = SymbolTable.rbegin(), E = SymbolTable.rend(); I != E; ++I) { if (I->GVName == "") { @@ -739,7 +463,7 @@ void MachOWriter::BufferSymbolAndStringTable() { I->n_value += GVSection[GV]->addr; if (GV && (GVOffset[GV] == -1)) GVOffset[GV] = index; - + // Emit nlist to buffer SymTOut.outword(I->n_strx); SymTOut.outbyte(I->n_type); @@ -754,6 +478,7 @@ void MachOWriter::BufferSymbolAndStringTable() { /// and the offset into that section. From this information, create the /// appropriate target-specific MachORelocation type and add buffer it to be /// written out after we are finished writing out sections. + void MachOWriter::CalculateRelocations(MachOSection &MOS) { for (unsigned i = 0, e = MOS.Relocations.size(); i != e; ++i) { MachineRelocation &MR = MOS.Relocations[i]; @@ -763,19 +488,22 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) { // This is a scattered relocation entry if it points to a global value with // a non-zero offset. + bool Scattered = false; bool Extern = false; // Since we may not have seen the GlobalValue we were interested in yet at // the time we emitted the relocation for it, fix it up now so that it // points to the offset into the correct section. + if (MR.isGlobalValue()) { GlobalValue *GV = MR.getGlobalValue(); MachOSection *MOSPtr = GVSection[GV]; intptr_t Offset = GVOffset[GV]; - + // If we have never seen the global before, it must be to a symbol // defined in another module (N_UNDF). + if (!MOSPtr) { // FIXME: need to append stub suffix Extern = true; @@ -787,9 +515,10 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) { } MR.setResultPointer((void*)Offset); } - + // If the symbol is locally defined, pass in the address of the section and // the section index to the code which will generate the target relocation. + if (!Extern) { MachOSection &To = *SectionList[TargetSection - 1]; TargetAddr = To.addr; @@ -798,7 +527,7 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) { OutputBuffer RelocOut(MOS.RelocBuffer, is64Bit, isLittleEndian); OutputBuffer SecOut(MOS.SectionData, is64Bit, isLittleEndian); - + MOS.nreloc += GetTargetRelocation(MR, MOS.Index, TargetAddr, TargetIndex, RelocOut, SecOut, Scattered, Extern); } @@ -806,21 +535,22 @@ void MachOWriter::CalculateRelocations(MachOSection &MOS) { // InitMem - Write the value of a Constant to the specified memory location, // converting it into bytes and relocations. + void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, - const TargetData *TD, + const TargetData *TD, std::vector &MRs) { typedef std::pair CPair; std::vector WorkList; - + WorkList.push_back(CPair(C,(intptr_t)Addr + Offset)); - + intptr_t ScatteredOffset = 0; - + while (!WorkList.empty()) { const Constant *PC = WorkList.back().first; intptr_t PA = WorkList.back().second; WorkList.pop_back(); - + if (isa(PC)) { continue; } else if (const ConstantVector *CP = dyn_cast(PC)) { @@ -847,7 +577,7 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, break; } } else if (PC->getType()->isSingleValueType()) { - uint8_t *ptr = (uint8_t *)PA; + unsigned char *ptr = (unsigned char *)PA; switch (PC->getType()->getTypeID()) { case Type::IntegerTyID: { unsigned NumBits = cast(PC->getType())->getBitWidth(); @@ -945,13 +675,15 @@ void MachOWriter::InitMem(const Constant *C, void *Addr, intptr_t Offset, } } +//===----------------------------------------------------------------------===// +// MachOSym Implementation +//===----------------------------------------------------------------------===// + MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, - TargetMachine &TM) : + const TargetAsmInfo *TAI) : GV(gv), n_strx(0), n_type(sect == NO_SECT ? N_UNDF : N_SECT), n_sect(sect), n_desc(0), n_value(0) { - const TargetAsmInfo *TAI = TM.getTargetAsmInfo(); - switch (GV->getLinkage()) { default: assert(0 && "Unexpected linkage type!"); @@ -974,3 +706,6 @@ MachOSym::MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, break; } } + +} // end namespace llvm + diff --git a/lib/CodeGen/MachOWriter.h b/lib/CodeGen/MachOWriter.h index 6ab66eee926..3af2b0af4b7 100644 --- a/lib/CodeGen/MachOWriter.h +++ b/lib/CodeGen/MachOWriter.h @@ -14,10 +14,8 @@ #ifndef MACHOWRITER_H #define MACHOWRITER_H -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" +#include "MachO.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRelocation.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetMachOWriterInfo.h" @@ -31,53 +29,6 @@ namespace llvm { class OutputBuffer; class raw_ostream; - /// MachOSym - This struct contains information about each symbol that is - /// added to logical symbol table for the module. This is eventually - /// turned into a real symbol table in the file. - struct MachOSym { - const GlobalValue *GV; // The global value this corresponds to. - std::string GVName; // The mangled name of the global value. - uint32_t n_strx; // index into the string table - uint8_t n_type; // type flag - uint8_t n_sect; // section number or NO_SECT - int16_t n_desc; // see - uint64_t n_value; // value for this symbol (or stab offset) - - // Constants for the n_sect field - // see - enum { NO_SECT = 0 }; // symbol is not in any section - - // Constants for the n_type field - // see - enum { N_UNDF = 0x0, // undefined, n_sect == NO_SECT - N_ABS = 0x2, // absolute, n_sect == NO_SECT - N_SECT = 0xe, // defined in section number n_sect - N_PBUD = 0xc, // prebound undefined (defined in a dylib) - N_INDR = 0xa // indirect - }; - // The following bits are OR'd into the types above. For example, a type - // of 0x0f would be an external N_SECT symbol (0x0e | 0x01). - enum { N_EXT = 0x01, // external symbol bit - N_PEXT = 0x10 // private external symbol bit - }; - - // Constants for the n_desc field - // see - enum { REFERENCE_FLAG_UNDEFINED_NON_LAZY = 0, - REFERENCE_FLAG_UNDEFINED_LAZY = 1, - REFERENCE_FLAG_DEFINED = 2, - REFERENCE_FLAG_PRIVATE_DEFINED = 3, - REFERENCE_FLAG_PRIVATE_UNDEFINED_NON_LAZY = 4, - REFERENCE_FLAG_PRIVATE_UNDEFINED_LAZY = 5 - }; - enum { N_NO_DEAD_STRIP = 0x0020, // symbol is not to be dead stripped - N_WEAK_REF = 0x0040, // symbol is weak referenced - N_WEAK_DEF = 0x0080 // coalesced symbol is a weak definition - }; - - MachOSym(const GlobalValue *gv, std::string name, uint8_t sect, - TargetMachine &TM); - }; /// MachOWriter - This class implements the common target-independent code for /// writing Mach-O files. Targets should derive a class from this to @@ -98,7 +49,6 @@ namespace llvm { return "Mach-O Writer"; } - typedef std::vector DataBuffer; protected: /// Output stream to send the resultant object file to. /// @@ -114,326 +64,61 @@ namespace llvm { /// MCE - The MachineCodeEmitter object that we are exposing to emit machine /// code for functions to the .o file. + MachOCodeEmitter *MCE; /// is64Bit/isLittleEndian - This information is inferred from the target /// machine directly, indicating what header values and flags to set. + bool is64Bit, isLittleEndian; + // Target Asm Info + + const TargetAsmInfo *TAI; + + /// Header - An instance of MachOHeader that we will update while we build + /// the file, and then emit during finalization. + + MachOHeader Header; + /// doInitialization - Emit the file header and all of the global variables /// for the module to the Mach-O file. + bool doInitialization(Module &M); bool runOnMachineFunction(MachineFunction &MF); /// doFinalization - Now that the module has been completely processed, emit /// the Mach-O file to 'O'. + bool doFinalization(Module &M); - /// MachOHeader - This struct contains the header information about a - /// specific architecture type/subtype pair that is emitted to the file. - struct MachOHeader { - uint32_t magic; // mach magic number identifier - uint32_t filetype; // type of file - uint32_t ncmds; // number of load commands - uint32_t sizeofcmds; // the size of all the load commands - uint32_t flags; // flags - uint32_t reserved; // 64-bit only - - /// HeaderData - The actual data for the header which we are building - /// up for emission to the file. - DataBuffer HeaderData; - - // Constants for the filetype field - // see for additional info on the various types - enum { MH_OBJECT = 1, // relocatable object file - MH_EXECUTE = 2, // demand paged executable file - MH_FVMLIB = 3, // fixed VM shared library file - MH_CORE = 4, // core file - MH_PRELOAD = 5, // preloaded executable file - MH_DYLIB = 6, // dynamically bound shared library - MH_DYLINKER = 7, // dynamic link editor - MH_BUNDLE = 8, // dynamically bound bundle file - MH_DYLIB_STUB = 9, // shared library stub for static linking only - MH_DSYM = 10 // companion file wiht only debug sections - }; - - // Constants for the flags field - enum { MH_NOUNDEFS = 1 << 0, - // the object file has no undefined references - MH_INCRLINK = 1 << 1, - // the object file is the output of an incremental link against - // a base file and cannot be link edited again - MH_DYLDLINK = 1 << 2, - // the object file is input for the dynamic linker and cannot be - // statically link edited again. - MH_BINDATLOAD = 1 << 3, - // the object file's undefined references are bound by the - // dynamic linker when loaded. - MH_PREBOUND = 1 << 4, - // the file has its dynamic undefined references prebound - MH_SPLIT_SEGS = 1 << 5, - // the file has its read-only and read-write segments split - // see - MH_LAZY_INIT = 1 << 6, - // the shared library init routine is to be run lazily via - // catching memory faults to its writable segments (obsolete) - MH_TWOLEVEL = 1 << 7, - // the image is using two-level namespace bindings - MH_FORCE_FLAT = 1 << 8, - // the executable is forcing all images to use flat namespace - // bindings. - MH_NOMULTIDEFS = 1 << 8, - // this umbrella guarantees no multiple definitions of symbols - // in its sub-images so the two-level namespace hints can - // always be used. - MH_NOFIXPREBINDING = 1 << 10, - // do not have dyld notify the prebidning agent about this - // executable. - MH_PREBINDABLE = 1 << 11, - // the binary is not prebound but can have its prebinding - // redone. only used when MH_PREBOUND is not set. - MH_ALLMODSBOUND = 1 << 12, - // indicates that this binary binds to all two-level namespace - // modules of its dependent libraries. Only used when - // MH_PREBINDABLE and MH_TWOLEVEL are both set. - MH_SUBSECTIONS_VIA_SYMBOLS = 1 << 13, - // safe to divide up the sections into sub-sections via symbols - // for dead code stripping. - MH_CANONICAL = 1 << 14, - // the binary has been canonicalized via the unprebind operation - MH_WEAK_DEFINES = 1 << 15, - // the final linked image contains external weak symbols - MH_BINDS_TO_WEAK = 1 << 16, - // the final linked image uses weak symbols - MH_ALLOW_STACK_EXECUTION = 1 << 17 - // When this bit is set, all stacks in the task will be given - // stack execution privilege. Only used in MH_EXECUTE filetype - }; - - MachOHeader() : magic(0), filetype(0), ncmds(0), sizeofcmds(0), flags(0), - reserved(0) { } - - /// cmdSize - This routine returns the size of the MachOSection as written - /// to disk, depending on whether the destination is a 64 bit Mach-O file. - unsigned cmdSize(bool is64Bit) const { - if (is64Bit) - return 8 * sizeof(uint32_t); - else - return 7 * sizeof(uint32_t); - } - - /// setMagic - This routine sets the appropriate value for the 'magic' - /// field based on pointer size and endianness. - void setMagic(bool isLittleEndian, bool is64Bit) { - if (isLittleEndian) - if (is64Bit) magic = 0xcffaedfe; - else magic = 0xcefaedfe; - else - if (is64Bit) magic = 0xfeedfacf; - else magic = 0xfeedface; - } - }; - - /// Header - An instance of MachOHeader that we will update while we build - /// the file, and then emit during finalization. - MachOHeader Header; - - /// MachOSegment - This struct contains the necessary information to - /// emit the load commands for each section in the file. - struct MachOSegment { - uint32_t cmd; // LC_SEGMENT or LC_SEGMENT_64 - uint32_t cmdsize; // Total size of this struct and section commands - std::string segname; // segment name - uint64_t vmaddr; // address of this segment - uint64_t vmsize; // size of this segment, may be larger than filesize - uint64_t fileoff; // offset in file - uint64_t filesize; // amount to read from file - uint32_t maxprot; // maximum VM protection - uint32_t initprot; // initial VM protection - uint32_t nsects; // number of sections in this segment - uint32_t flags; // flags - - // The following constants are getting pulled in by one of the - // system headers, which creates a neat clash with the enum. -#if !defined(VM_PROT_NONE) -#define VM_PROT_NONE 0x00 -#endif -#if !defined(VM_PROT_READ) -#define VM_PROT_READ 0x01 -#endif -#if !defined(VM_PROT_WRITE) -#define VM_PROT_WRITE 0x02 -#endif -#if !defined(VM_PROT_EXECUTE) -#define VM_PROT_EXECUTE 0x04 -#endif -#if !defined(VM_PROT_ALL) -#define VM_PROT_ALL 0x07 -#endif - - // Constants for the vm protection fields - // see - enum { SEG_VM_PROT_NONE = VM_PROT_NONE, - SEG_VM_PROT_READ = VM_PROT_READ, // read permission - SEG_VM_PROT_WRITE = VM_PROT_WRITE, // write permission - SEG_VM_PROT_EXECUTE = VM_PROT_EXECUTE, - SEG_VM_PROT_ALL = VM_PROT_ALL - }; - - // Constants for the cmd field - // see - enum { LC_SEGMENT = 0x01, // segment of this file to be mapped - LC_SEGMENT_64 = 0x19 // 64-bit segment of this file to be mapped - }; - - /// cmdSize - This routine returns the size of the MachOSection as written - /// to disk, depending on whether the destination is a 64 bit Mach-O file. - unsigned cmdSize(bool is64Bit) const { - if (is64Bit) - return 6 * sizeof(uint32_t) + 4 * sizeof(uint64_t) + 16; - else - return 10 * sizeof(uint32_t) + 16; // addresses only 32 bits - } - - MachOSegment(const std::string &seg, bool is64Bit) - : cmd(is64Bit ? LC_SEGMENT_64 : LC_SEGMENT), cmdsize(0), segname(seg), - vmaddr(0), vmsize(0), fileoff(0), filesize(0), maxprot(VM_PROT_ALL), - initprot(VM_PROT_ALL), nsects(0), flags(0) { } - }; - - /// MachOSection - This struct contains information about each section in a - /// particular segment that is emitted to the file. This is eventually - /// turned into the SectionCommand in the load command for a particlar - /// segment. - struct MachOSection { - std::string sectname; // name of this section, - std::string segname; // segment this section goes in - uint64_t addr; // memory address of this section - uint64_t size; // size in bytes of this section - uint32_t offset; // file offset of this section - uint32_t align; // section alignment (power of 2) - uint32_t reloff; // file offset of relocation entries - uint32_t nreloc; // number of relocation entries - uint32_t flags; // flags (section type and attributes) - uint32_t reserved1; // reserved (for offset or index) - uint32_t reserved2; // reserved (for count or sizeof) - uint32_t reserved3; // reserved (64 bit only) - - /// A unique number for this section, which will be used to match symbols - /// to the correct section. - uint32_t Index; - - /// SectionData - The actual data for this section which we are building - /// up for emission to the file. - DataBuffer SectionData; - - /// RelocBuffer - A buffer to hold the mach-o relocations before we write - /// them out at the appropriate location in the file. - DataBuffer RelocBuffer; - - /// Relocations - The relocations that we have encountered so far in this - /// section that we will need to convert to MachORelocation entries when - /// the file is written. - std::vector Relocations; - - // Constants for the section types (low 8 bits of flags field) - // see - enum { S_REGULAR = 0, - // regular section - S_ZEROFILL = 1, - // zero fill on demand section - S_CSTRING_LITERALS = 2, - // section with only literal C strings - S_4BYTE_LITERALS = 3, - // section with only 4 byte literals - S_8BYTE_LITERALS = 4, - // section with only 8 byte literals - S_LITERAL_POINTERS = 5, - // section with only pointers to literals - S_NON_LAZY_SYMBOL_POINTERS = 6, - // section with only non-lazy symbol pointers - S_LAZY_SYMBOL_POINTERS = 7, - // section with only lazy symbol pointers - S_SYMBOL_STUBS = 8, - // section with only symbol stubs - // byte size of stub in the reserved2 field - S_MOD_INIT_FUNC_POINTERS = 9, - // section with only function pointers for initialization - S_MOD_TERM_FUNC_POINTERS = 10, - // section with only function pointers for termination - S_COALESCED = 11, - // section contains symbols that are coalesced - S_GB_ZEROFILL = 12, - // zero fill on demand section (that can be larger than 4GB) - S_INTERPOSING = 13, - // section with only pairs of function pointers for interposing - S_16BYTE_LITERALS = 14 - // section with only 16 byte literals - }; - - // Constants for the section flags (high 24 bits of flags field) - // see - enum { S_ATTR_PURE_INSTRUCTIONS = 1 << 31, - // section contains only true machine instructions - S_ATTR_NO_TOC = 1 << 30, - // section contains coalesced symbols that are not to be in a - // ranlib table of contents - S_ATTR_STRIP_STATIC_SYMS = 1 << 29, - // ok to strip static symbols in this section in files with the - // MY_DYLDLINK flag - S_ATTR_NO_DEAD_STRIP = 1 << 28, - // no dead stripping - S_ATTR_LIVE_SUPPORT = 1 << 27, - // blocks are live if they reference live blocks - S_ATTR_SELF_MODIFYING_CODE = 1 << 26, - // used with i386 code stubs written on by dyld - S_ATTR_DEBUG = 1 << 25, - // a debug section - S_ATTR_SOME_INSTRUCTIONS = 1 << 10, - // section contains some machine instructions - S_ATTR_EXT_RELOC = 1 << 9, - // section has external relocation entries - S_ATTR_LOC_RELOC = 1 << 8 - // section has local relocation entries - }; - - /// cmdSize - This routine returns the size of the MachOSection as written - /// to disk, depending on whether the destination is a 64 bit Mach-O file. - unsigned cmdSize(bool is64Bit) const { - if (is64Bit) - return 7 * sizeof(uint32_t) + 2 * sizeof(uint64_t) + 32; - else - return 9 * sizeof(uint32_t) + 32; // addresses only 32 bits - } - - MachOSection(const std::string &seg, const std::string §) - : sectname(sect), segname(seg), addr(0), size(0), offset(0), align(2), - reloff(0), nreloc(0), flags(0), reserved1(0), reserved2(0), - reserved3(0) { } - }; - private: /// SectionList - This is the list of sections that we have emitted to the /// file. Once the file has been completely built, the segment load command /// SectionCommands are constructed from this info. + std::vector SectionList; /// SectionLookup - This is a mapping from section name to SectionList entry + std::map SectionLookup; /// GVSection - This is a mapping from a GlobalValue to a MachOSection, /// to aid in emitting relocations. + std::map GVSection; /// GVOffset - This is a mapping from a GlobalValue to an offset from the /// start of the section in which the GV resides, to aid in emitting /// relocations. + std::map GVOffset; /// getSection - Return the section with the specified name, creating a new /// section if one does not already exist. + MachOSection *getSection(const std::string &seg, const std::string §, unsigned Flags = 0) { MachOSection *MOS = SectionLookup[seg+sect]; @@ -511,63 +196,11 @@ namespace llvm { nsyms(0), stroff(0), strsize(0) { } }; - /// MachOSymTab - This struct contains information about the offsets and - /// size of symbol table information. - /// segment. - struct MachODySymTab { - uint32_t cmd; // LC_DYSYMTAB - uint32_t cmdsize; // sizeof( MachODySymTab ) - uint32_t ilocalsym; // index to local symbols - uint32_t nlocalsym; // number of local symbols - uint32_t iextdefsym; // index to externally defined symbols - uint32_t nextdefsym; // number of externally defined symbols - uint32_t iundefsym; // index to undefined symbols - uint32_t nundefsym; // number of undefined symbols - uint32_t tocoff; // file offset to table of contents - uint32_t ntoc; // number of entries in table of contents - uint32_t modtaboff; // file offset to module table - uint32_t nmodtab; // number of module table entries - uint32_t extrefsymoff; // offset to referenced symbol table - uint32_t nextrefsyms; // number of referenced symbol table entries - uint32_t indirectsymoff; // file offset to the indirect symbol table - uint32_t nindirectsyms; // number of indirect symbol table entries - uint32_t extreloff; // offset to external relocation entries - uint32_t nextrel; // number of external relocation entries - uint32_t locreloff; // offset to local relocation entries - uint32_t nlocrel; // number of local relocation entries - - // Constants for the cmd field - // see - enum { LC_DYSYMTAB = 0x0B // dynamic link-edit symbol table info - }; - - MachODySymTab() : cmd(LC_DYSYMTAB), cmdsize(20 * sizeof(uint32_t)), - ilocalsym(0), nlocalsym(0), iextdefsym(0), nextdefsym(0), - iundefsym(0), nundefsym(0), tocoff(0), ntoc(0), modtaboff(0), - nmodtab(0), extrefsymoff(0), nextrefsyms(0), indirectsymoff(0), - nindirectsyms(0), extreloff(0), nextrel(0), locreloff(0), nlocrel(0) { } - }; - /// SymTab - The "stab" style symbol table information MachOSymTab SymTab; /// DySymTab - symbol table info for the dynamic link editor MachODySymTab DySymTab; - struct MachOSymCmp { - // FIXME: this does not appear to be sorting 'f' after 'F' - bool operator()(const MachOSym &LHS, const MachOSym &RHS) { - return LHS.GVName < RHS.GVName; - } - }; - - /// PartitionByLocal - Simple boolean predicate that returns true if Sym is - /// a local symbol rather than an external symbol. - static bool PartitionByLocal(const MachOSym &Sym); - - /// PartitionByDefined - Simple boolean predicate that returns true if Sym - /// is defined in this module. - static bool PartitionByDefined(const MachOSym &Sym); - protected: /// SymbolTable - This is the list of symbols we have emitted to the file. @@ -601,6 +234,7 @@ namespace llvm { void EmitGlobal(GlobalVariable *GV); void EmitHeaderAndLoadCommands(); void EmitSections(); + void EmitRelocations(); void BufferSymbolAndStringTable(); void CalculateRelocations(MachOSection &MOS);