//===-- MachOWriter.cpp - Target-independent Mach-O Writer code -----------===// // // The LLVM Compiler Infrastructure // // This file was developed by Nate Begeman and is distributed under the // University of Illinois Open Source License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the target-independent Mach-O writer. This file writes // out the Mach-O file in the following order: // // #1 FatHeader (universal-only) // #2 FatArch (universal-only, 1 per universal arch) // Per arch: // #3 Header // #4 Load Commands // #5 Sections // #6 Relocations // #7 Symbols // #8 Strings // //===----------------------------------------------------------------------===// #include "llvm/Module.h" #include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineRelocation.h" #include "llvm/CodeGen/MachOWriter.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetJITInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Mangler.h" #include using namespace llvm; //===----------------------------------------------------------------------===// // MachOCodeEmitter Implementation //===----------------------------------------------------------------------===// namespace llvm { /// MachOCodeEmitter - This class is used by the MachOWriter to emit the code /// for functions to the Mach-O file. class MachOCodeEmitter : public MachineCodeEmitter { MachOWriter &MOW; /// MOS - The current section we're writing to MachOWriter::MachOSection *MOS; /// Relocations - These are the relocations that the function needs, as /// emitted. std::vector Relocations; /// MBBLocations - This vector is a mapping from MBB ID's to their address. /// It is filled in by the StartMachineBasicBlock callback and queried by /// the getMachineBasicBlockAddress callback. std::vector MBBLocations; public: MachOCodeEmitter(MachOWriter &mow) : MOW(mow) {} void startFunction(MachineFunction &F); bool finishFunction(MachineFunction &F); void addRelocation(const MachineRelocation &MR) { Relocations.push_back(MR); } virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { if (MBBLocations.size() <= (unsigned)MBB->getNumber()) MBBLocations.resize((MBB->getNumber()+1)*2); MBBLocations[MBB->getNumber()] = getCurrentPCValue(); } virtual intptr_t getConstantPoolEntryAddress(unsigned Index) const { assert(0 && "CP not implementated yet!"); return 0; } virtual intptr_t getJumpTableEntryAddress(unsigned Index) const { assert(0 && "JT not implementated yet!"); return 0; } virtual intptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { assert(MBBLocations.size() > (unsigned)MBB->getNumber() && MBBLocations[MBB->getNumber()] && "MBB not emitted!"); return MBBLocations[MBB->getNumber()]; } /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE! void startFunctionStub(unsigned StubSize) { assert(0 && "JIT specific function called!"); abort(); } void *finishFunctionStub(const Function *F) { assert(0 && "JIT specific function called!"); abort(); return 0; } }; } /// startFunction - This callback is invoked when a new machine function is /// about to be emitted. void MachOCodeEmitter::startFunction(MachineFunction &F) { // Align the output buffer to the appropriate alignment, power of 2. // FIXME: GENERICIZE!! unsigned Align = 4; // Get the Mach-O Section that this function belongs in. MOS = &MOW.getTextSection(); // FIXME: better memory management MOS->SectionData.reserve(4096); BufferBegin = &(MOS->SectionData[0]); BufferEnd = BufferBegin + MOS->SectionData.capacity(); CurBufferPtr = BufferBegin + MOS->size; // Upgrade the section alignment if required. if (MOS->align < Align) MOS->align = Align; // Make sure we only relocate to this function's MBBs. MBBLocations.clear(); } /// finishFunction - This callback is invoked after the function is completely /// finished. bool MachOCodeEmitter::finishFunction(MachineFunction &F) { MOS->size += CurBufferPtr - BufferBegin; // Get a symbol for the function to add to the symbol table MachOWriter::MachOSym FnSym(F.getFunction(), MOS->Index); // Figure out the binding (linkage) of the symbol. switch (F.getFunction()->getLinkage()) { default: // appending linkage is illegal for functions. assert(0 && "Unknown linkage type!"); case GlobalValue::ExternalLinkage: FnSym.n_type = MachOWriter::MachOSym::N_SECT | MachOWriter::MachOSym::N_EXT; break; case GlobalValue::InternalLinkage: FnSym.n_type = MachOWriter::MachOSym::N_SECT; break; } // Resolve the function's relocations either to concrete pointers in the case // of branches from one block to another, or to target relocation entries. for (unsigned i = 0, e = Relocations.size(); i != e; ++i) { MachineRelocation &MR = Relocations[i]; if (MR.isBasicBlock()) { void *MBBAddr = (void *)getMachineBasicBlockAddress(MR.getBasicBlock()); MR.setResultPointer(MBBAddr); MOW.TM.getJITInfo()->relocate(BufferBegin, &MR, 1, 0); // FIXME: we basically want the JITInfo relocate() function to rewrite // this guy right now, so we just write the correct displacement // to the file. } else { // isString | isGV | isCPI | isJTI // FIXME: do something smart here. We won't be able to relocate these // until the sections are all layed out, but we still need to // record them. Maybe emit TargetRelocations and then resolve // those at file writing time? std::cerr << "whee!\n"; } } Relocations.clear(); // Finally, add it to the symtab. MOW.SymbolTable.push_back(FnSym); return false; } //===----------------------------------------------------------------------===// // MachOWriter Implementation //===----------------------------------------------------------------------===// MachOWriter::MachOWriter(std::ostream &o, TargetMachine &tm) : O(o), TM(tm) { // FIXME: set cpu type and cpu subtype somehow from TM is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; isLittleEndian = TM.getTargetData()->isLittleEndian(); // Create the machine code emitter object for this target. MCE = new MachOCodeEmitter(*this); } MachOWriter::~MachOWriter() { delete MCE; } void MachOWriter::EmitGlobal(GlobalVariable *GV) { // FIXME: do something smart here. } bool MachOWriter::runOnMachineFunction(MachineFunction &MF) { // Nothing to do here, this is all done through the MCE object. return false; } bool MachOWriter::doInitialization(Module &M) { // Set the magic value, now that we know the pointer size and endianness Header.setMagic(isLittleEndian, is64Bit); // Set the file type // FIXME: this only works for object files, we do not support the creation // of dynamic libraries or executables at this time. Header.filetype = MachOHeader::MH_OBJECT; Mang = new Mangler(M); return false; } /// doFinalization - Now that the module has been completely processed, emit /// the Mach-O file to 'O'. bool MachOWriter::doFinalization(Module &M) { // Okay, the.text section has been completed, build the .data, .bss, and // "common" sections next. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) EmitGlobal(I); // Emit the header and load commands. EmitHeaderAndLoadCommands(); // Emit the text and data sections. EmitSections(); // Emit the relocation entry data for each section. // FIXME: presumably this should be a virtual method, since different targets // have different relocation types. EmitRelocations(); // Emit the symbol table. // FIXME: we don't handle debug info yet, we should probably do that. EmitSymbolTable(); // Emit the string table for the sections we have. EmitStringTable(); // We are done with the abstract symbols. SectionList.clear(); SymbolTable.clear(); DynamicSymbolTable.clear(); // Release the name mangler object. delete Mang; Mang = 0; return false; } void MachOWriter::EmitHeaderAndLoadCommands() { // Step #0: Fill in the segment load command size, since we need it to figure // out the rest of the header fields MachOSegment SEG("", is64Bit); SEG.nsects = SectionList.size(); SEG.cmdsize = SEG.cmdSize(is64Bit) + SEG.nsects * SectionList.begin()->cmdSize(is64Bit); // Step #1: calculate the number of load commands. We always have at least // one, for the LC_SEGMENT load command, plus two for the normal // and dynamic symbol tables, if there are any symbols. Header.ncmds = SymbolTable.empty() ? 1 : 3; // Step #2: calculate the size of the load commands Header.sizeofcmds = SEG.cmdsize; if (!SymbolTable.empty()) Header.sizeofcmds += SymTab.cmdsize + DySymTab.cmdsize; // Step #3: write the header to the file // Local alias to shortenify coming code. DataBuffer &FH = Header.HeaderData; outword(FH, Header.magic); outword(FH, Header.cputype); outword(FH, Header.cpusubtype); outword(FH, Header.filetype); outword(FH, Header.ncmds); outword(FH, Header.sizeofcmds); outword(FH, Header.flags); if (is64Bit) outword(FH, Header.reserved); // Step #4: Finish filling in the segment load command and write it out for (std::list::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) SEG.filesize += I->size; SEG.vmsize = SEG.filesize; SEG.fileoff = Header.cmdSize(is64Bit) + Header.sizeofcmds; outword(FH, SEG.cmd); outword(FH, SEG.cmdsize); outstring(FH, SEG.segname, 16); outaddr(FH, SEG.vmaddr); outaddr(FH, SEG.vmsize); outaddr(FH, SEG.fileoff); outaddr(FH, SEG.filesize); outword(FH, SEG.maxprot); outword(FH, SEG.initprot); outword(FH, SEG.nsects); outword(FH, SEG.flags); // Step #5: Write out the section commands for each section for (std::list::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) { I->offset = SEG.fileoff; // FIXME: separate offset outstring(FH, I->sectname, 16); outstring(FH, I->segname, 16); outaddr(FH, I->addr); outaddr(FH, I->size); outword(FH, I->offset); outword(FH, I->align); outword(FH, I->reloff); outword(FH, I->nreloc); outword(FH, I->flags); outword(FH, I->reserved1); outword(FH, I->reserved2); if (is64Bit) outword(FH, I->reserved3); } // Step #6: Emit LC_SYMTAB/LC_DYSYMTAB load commands // FIXME: We'll need to scan over the symbol table and possibly do the sort // here so that we can set the proper indices in the dysymtab load command for // the index and number of external symbols defined in this module. // FIXME: We'll also need to scan over all the symbols so that we can // calculate the size of the string table. // FIXME: add size of relocs SymTab.symoff = SEG.fileoff + SEG.filesize; SymTab.nsyms = SymbolTable.size(); SymTab.stroff = SymTab.symoff + SymTab.nsyms * MachOSym::entrySize(); SymTab.strsize = 10; outword(FH, SymTab.cmd); outword(FH, SymTab.cmdsize); outword(FH, SymTab.symoff); outword(FH, SymTab.nsyms); outword(FH, SymTab.stroff); outword(FH, SymTab.strsize); // FIXME: set DySymTab fields appropriately outword(FH, DySymTab.cmd); outword(FH, DySymTab.cmdsize); outword(FH, DySymTab.ilocalsym); outword(FH, DySymTab.nlocalsym); outword(FH, DySymTab.iextdefsym); outword(FH, DySymTab.nextdefsym); outword(FH, DySymTab.iundefsym); outword(FH, DySymTab.nundefsym); outword(FH, DySymTab.tocoff); outword(FH, DySymTab.ntoc); outword(FH, DySymTab.modtaboff); outword(FH, DySymTab.nmodtab); outword(FH, DySymTab.extrefsymoff); outword(FH, DySymTab.nextrefsyms); outword(FH, DySymTab.indirectsymoff); outword(FH, DySymTab.nindirectsyms); outword(FH, DySymTab.extreloff); outword(FH, DySymTab.nextrel); outword(FH, DySymTab.locreloff); outword(FH, DySymTab.nlocrel); O.write((char*)&FH[0], FH.size()); } /// EmitSections - Now that we have constructed the file header and load /// commands, emit the data for each section to the file. void MachOWriter::EmitSections() { for (std::list::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) { O.write((char*)&I->SectionData[0], I->size); } } void MachOWriter::EmitRelocations() { // FIXME: this should probably be a pure virtual function, since the // relocation types and layout of the relocations themselves are target // specific. } /// EmitSymbolTable - Sort the symbols we encountered and assign them each a /// string table index so that they appear in the correct order in the output /// file. void MachOWriter::EmitSymbolTable() { // The order of the symbol table is: // local symbols // defined external symbols (sorted by name) // undefined external symbols (sorted by name) DataBuffer ST; // FIXME: enforce the above ordering, presumably by sorting by name, // then partitioning twice. unsigned stringIndex; for (std::vector::iterator I = SymbolTable.begin(), E = SymbolTable.end(); I != E; ++I) { // FIXME: remove when we actually calculate these correctly I->n_strx = 1; StringTable.push_back(Mang->getValueName(I->GV)); // Emit nlist to buffer outword(ST, I->n_strx); outbyte(ST, I->n_type); outbyte(ST, I->n_sect); outhalf(ST, I->n_desc); outaddr(ST, I->n_value); } O.write((char*)&ST[0], ST.size()); } /// EmitStringTable - This method adds and emits a section for the Mach-O /// string table. void MachOWriter::EmitStringTable() { // The order of the string table is: // strings for external symbols // strings for local symbols // This is the symbol table, but backwards. This allows us to avoid a sorting // the symbol table again; all we have to do is use a reverse iterator. DataBuffer ST; // Write out a leading zero byte when emitting string table, for n_strx == 0 // which means an empty string. outbyte(ST, 0); for (std::vector::iterator I = StringTable.begin(), E = StringTable.end(); I != E; ++I) { // FIXME: do not arbitrarily cap symbols to 16 characters // FIXME: do something more efficient than outstring outstring(ST, *I, 16); } O.write((char*)&ST[0], ST.size()); }