diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html index 4adf75e91b5..ed9bd082b56 100644 --- a/docs/BitCodeFormat.html +++ b/docs/BitCodeFormat.html @@ -22,6 +22,8 @@
  • Standard Blocks
  • +
  • Bitcode Wrapper Format +
  • LLVM IR Encoding
    1. Basics
    2. @@ -65,8 +67,12 @@ Unlike XML, the bitstream format is a binary encoding, and unlike XML it provides a mechanism for the file to self-describe "abbreviations", which are effectively size optimizations for the content.

      -

      This document first describes the LLVM bitstream format, then describes the -record structure used by LLVM IR files. +

      LLVM IR files may be optionally embedded into a wrapper structure that makes it easy to embed extra data +along with LLVM IR files.

      + +

      This document first describes the LLVM bitstream format, describes the +wrapper format, then describes the record structure used by LLVM IR files.

      @@ -544,6 +550,36 @@ corresponding blocks. It is not safe to skip them. + + + + +
      + +

      Bitcode files for LLVM IR may optionally be wrapped in a simple wrapper +structure. This structure contains a simple header that indicates the offset +and size of the embedded BC file. This allows additional information to be +stored alongside the BC file. The structure of this file header is: +

      + +

      +

      +[Magic32,
      + Version32,
      + Offset32,
      + Size32,
      + CPUType32]
      +

      + +

      Each of the fields are 32-bit fields stored in little endian form (as with +the rest of the bitcode file fields). The Magic number is always +0x0B17C0DE and the version is currently always 0. The Offset +field is the offset in bytes to the start of the bitcode stream in the file, and +the Size field is a size in bytes of the stream. CPUType is a target-specific +value that can be used to encode the CPU of the target. +

      + + diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h index 3b7e4054147..f76bb88dee8 100644 --- a/include/llvm/Bitcode/BitstreamWriter.h +++ b/include/llvm/Bitcode/BitstreamWriter.h @@ -157,6 +157,15 @@ public: Emit(Val, CurCodeSize); } + // BackpatchWord - Backpatch a 32-bit word in the output with the specified + // value. + void BackpatchWord(unsigned ByteNo, unsigned NewWord) { + Out[ByteNo++] = (unsigned char)(NewWord >> 0); + Out[ByteNo++] = (unsigned char)(NewWord >> 8); + Out[ByteNo++] = (unsigned char)(NewWord >> 16); + Out[ByteNo ] = (unsigned char)(NewWord >> 24); + } + //===--------------------------------------------------------------------===// // Block Manipulation //===--------------------------------------------------------------------===// @@ -227,10 +236,7 @@ public: unsigned ByteNo = B.StartSizeWord*4; // Update the block size field in the header of this sub-block. - Out[ByteNo++] = (unsigned char)(SizeInWords >> 0); - Out[ByteNo++] = (unsigned char)(SizeInWords >> 8); - Out[ByteNo++] = (unsigned char)(SizeInWords >> 16); - Out[ByteNo++] = (unsigned char)(SizeInWords >> 24); + BackpatchWord(ByteNo, SizeInWords); // Restore the inner block's code size and abbrev table. CurCodeSize = B.PrevCodeSize; diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 3fc6b175217..a8c62be88fc 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1184,6 +1184,47 @@ bool BitcodeReader::ParseModule(const std::string &ModuleID) { return Error("Premature end of bitstream"); } +/// SkipWrapperHeader - Some systems wrap bc files with a special header for +/// padding or other reasons. The format of this header is: +/// +/// struct bc_header { +/// uint32_t Magic; // 0x0B17C0DE +/// uint32_t Version; // Version, currently always 0. +/// uint32_t BitcodeOffset; // Offset to traditional bitcode file. +/// uint32_t BitcodeSize; // Size of traditional bitcode file. +/// ... potentially other gunk ... +/// }; +/// +/// This function is called when we find a file with a matching magic number. +/// In this case, skip down to the subsection of the file that is actually a BC +/// file. +static bool SkipWrapperHeader(unsigned char *&BufPtr, unsigned char *&BufEnd) { + enum { + KnownHeaderSize = 4*4, // Size of header we read. + OffsetField = 2*4, // Offset in bytes to Offset field. + SizeField = 3*4 // Offset in bytes to Size field. + }; + + + // Must contain the header! + if (BufEnd-BufPtr < KnownHeaderSize) return true; + + unsigned Offset = ( BufPtr[OffsetField ] | + (BufPtr[OffsetField+1] << 8) | + (BufPtr[OffsetField+2] << 16) | + (BufPtr[OffsetField+3] << 24)); + unsigned Size = ( BufPtr[SizeField ] | + (BufPtr[SizeField +1] << 8) | + (BufPtr[SizeField +2] << 16) | + (BufPtr[SizeField +3] << 24)); + + // Verify that Offset+Size fits in the file. + if (Offset+Size > unsigned(BufEnd-BufPtr)) + return true; + BufPtr += Offset; + BufEnd = BufPtr+Size; + return false; +} bool BitcodeReader::ParseBitcode() { TheModule = 0; @@ -1192,7 +1233,16 @@ bool BitcodeReader::ParseBitcode() { return Error("Bitcode stream should be a multiple of 4 bytes in length"); unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart(); - Stream.init(BufPtr, BufPtr+Buffer->getBufferSize()); + unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); + + // If we have a wrapper header, parse it and ignore the non-bc file contents. + // The magic number is 0x0B17C0DE stored in little endian. + if (BufPtr != BufEnd && BufPtr[0] == 0xDE && BufPtr[1] == 0xC0 && + BufPtr[2] == 0x17 && BufPtr[3] == 0x0B) + if (SkipWrapperHeader(BufPtr, BufEnd)) + return Error("Invalid bitcode wrapper header"); + + Stream.init(BufPtr, BufEnd); // Sniff for the signature. if (Stream.Read(8) != 'B' || diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 0030aca3bc5..9794fac009c 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1273,6 +1273,70 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { Stream.ExitBlock(); } +/// EmitDarwinBCHeader - If generating a bc file on darwin, we have to emit a +/// header and trailer to make it compatible with the system archiver. To do +/// this we emit the following header, and then emit a trailer that pads the +/// file out to be a multiple of 16 bytes. +/// +/// struct bc_header { +/// uint32_t Magic; // 0x0B17C0DE +/// uint32_t Version; // Version, currently always 0. +/// uint32_t BitcodeOffset; // Offset to traditional bitcode file. +/// uint32_t BitcodeSize; // Size of traditional bitcode file. +/// uint32_t CPUType; // CPU specifier. +/// ... potentially more later ... +/// }; +enum { + DarwinBCSizeFieldOffset = 3*4, // Offset to bitcode_size. + DarwinBCHeaderSize = 5*4 +}; + +static void EmitDarwinBCHeader(BitstreamWriter &Stream, + const std::string &TT) { + unsigned CPUType = ~0U; + + // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*. The CPUType is a + // magic number from /usr/include/mach/machine.h. It is ok to reproduce the + // specific constants here because they are implicitly part of the Darwin ABI. + enum { + DARWIN_CPU_ARCH_ABI64 = 0x01000000, + DARWIN_CPU_TYPE_X86 = 7, + DARWIN_CPU_TYPE_POWERPC = 18 + }; + + if (TT.find("x86_64-") == 0) + CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64; + else if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' && + TT[4] == '-' && TT[1] - '3' < 6) + CPUType = DARWIN_CPU_TYPE_X86; + else if (TT.find("powerpc-") == 0) + CPUType = DARWIN_CPU_TYPE_POWERPC; + else if (TT.find("powerpc64-") == 0) + CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64; + + // Traditional Bitcode starts after header. + unsigned BCOffset = DarwinBCHeaderSize; + + Stream.Emit(0x0B17C0DE, 32); + Stream.Emit(0 , 32); // Version. + Stream.Emit(BCOffset , 32); + Stream.Emit(0 , 32); // Filled in later. + Stream.Emit(CPUType , 32); +} + +/// EmitDarwinBCTrailer - Emit the darwin epilog after the bitcode file and +/// finalize the header. +static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) { + // Update the size field in the header. + Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize); + + // If the file is not a multiple of 16 bytes, insert dummy padding. + while (BufferSize & 15) { + Stream.Emit(0, 8); + ++BufferSize; + } +} + /// WriteBitcodeToFile - Write the specified module to the specified output /// stream. @@ -1282,6 +1346,11 @@ void llvm::WriteBitcodeToFile(const Module *M, std::ostream &Out) { Buffer.reserve(256*1024); + // If this is darwin, emit a file header and trailer if needed. + bool isDarwin = M->getTargetTriple().find("-darwin") != std::string::npos; + if (isDarwin) + EmitDarwinBCHeader(Stream, M->getTargetTriple()); + // Emit the file header. Stream.Emit((unsigned)'B', 8); Stream.Emit((unsigned)'C', 8); @@ -1292,10 +1361,14 @@ void llvm::WriteBitcodeToFile(const Module *M, std::ostream &Out) { // Emit the module. WriteModule(M, Stream); + + if (isDarwin) + EmitDarwinBCTrailer(Stream, Buffer.size()); + // If writing to stdout, set binary mode. if (llvm::cout == Out) - sys::Program::ChangeStdoutToBinary(); + sys::Program::ChangeStdoutToBinary(); // Write the generated bitstream to "Out". Out.write((char*)&Buffer.front(), Buffer.size()); diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp index fbb6b6629ce..88479fe5083 100644 --- a/lib/System/Path.cpp +++ b/lib/System/Path.cpp @@ -52,10 +52,15 @@ Path::GetLLVMConfigDir() { } LLVMFileType -sys::IdentifyFileType(const char*magic, unsigned length) { +sys::IdentifyFileType(const char *magic, unsigned length) { assert(magic && "Invalid magic number string"); assert(length >=4 && "Invalid magic number length"); switch (magic[0]) { + case 0xDE: // 0x0B17C0DE = BC wraper + if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 && + magic[3] == (char)0x0B) + return Bitcode_FileType; + break; case 'B': if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE) return Bitcode_FileType;