Enable streaming of bitcode

This CL delays reading of function bodies from initial parse until materialization, allowing overlap of compilation with bitcode download. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@149918 91177308-0d34-0410-b5e6-96231b3b80d8
2025-06-17 04:24:00 +00:00 · 2012-02-06 22:30:29 +00:00
parent 06d7e1b52b
commit 2ea93875b2
26 changed files with 748 additions and 163 deletions
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@ -15,10 +15,12 @@
 #ifndef BITSTREAM_READER_H
 #define BITSTREAM_READER_H

+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Bitcode/BitCodes.h"
 #include <climits>
 #include <string>
 #include <vector>
+#include "llvm/Support/StreamableMemoryObject.h"

 namespace llvm {

@ -36,9 +38,7 @@ public:
    std::vector<std::pair<unsigned, std::string> > RecordNames;
  };
 private:
-  /// FirstChar/LastChar - This remembers the first and last bytes of the
-  /// stream.
-  const unsigned char *FirstChar, *LastChar;
+  OwningPtr<StreamableMemoryObject> BitcodeBytes;
  
  std::vector<BlockInfo> BlockInfoRecords;

@ -47,10 +47,10 @@ private:
  /// uses this.
  bool IgnoreBlockInfoNames;
  
-  BitstreamReader(const BitstreamReader&);  // NOT IMPLEMENTED
-  void operator=(const BitstreamReader&);  // NOT IMPLEMENTED
+  BitstreamReader(const BitstreamReader&);  // DO NOT IMPLEMENT
+  void operator=(const BitstreamReader&);  // DO NOT IMPLEMENT
 public:
-  BitstreamReader() : FirstChar(0), LastChar(0), IgnoreBlockInfoNames(true) {
+  BitstreamReader() : IgnoreBlockInfoNames(true) {
  }

  BitstreamReader(const unsigned char *Start, const unsigned char *End) {
@ -58,12 +58,17 @@ public:
    init(Start, End);
  }

-  void init(const unsigned char *Start, const unsigned char *End) {
-    FirstChar = Start;
-    LastChar = End;
-    assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
+  BitstreamReader(StreamableMemoryObject *bytes) {
+    BitcodeBytes.reset(bytes);
  }

+  void init(const unsigned char *Start, const unsigned char *End) {
+    assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
+    BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
+  }
+
+  StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
+
  ~BitstreamReader() {
    // Free the BlockInfoRecords.
    while (!BlockInfoRecords.empty()) {
@ -75,9 +80,6 @@ public:
      BlockInfoRecords.pop_back();
    }
  }
-  
-  const unsigned char *getFirstChar() const { return FirstChar; }
-  const unsigned char *getLastChar() const { return LastChar; }

  /// CollectBlockInfoNames - This is called by clients that want block/record
  /// name information.
@ -122,7 +124,7 @@ public:
 class BitstreamCursor {
  friend class Deserializer;
  BitstreamReader *BitStream;
-  const unsigned char *NextChar;
+  size_t NextChar;
  
  /// CurWord - This is the current data we have pulled from the stream but have
  /// not returned to the client.
@ -156,8 +158,7 @@ public:
  }
  
  explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
-    NextChar = R.getFirstChar();
-    assert(NextChar && "Bitstream not initialized yet");
+    NextChar = 0;
    CurWord = 0;
    BitsInCurWord = 0;
    CurCodeSize = 2;
@ -167,8 +168,7 @@ public:
    freeState();
    
    BitStream = &R;
-    NextChar = R.getFirstChar();
-    assert(NextChar && "Bitstream not initialized yet");
+    NextChar = 0;
    CurWord = 0;
    BitsInCurWord = 0;
    CurCodeSize = 2;
@ -225,13 +225,38 @@ public:
  /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
  unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
  
-  bool AtEndOfStream() const {
-    return NextChar == BitStream->getLastChar() && BitsInCurWord == 0;
+  bool isEndPos(size_t pos) {
+    return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
+  }
+
+  bool canSkipToPos(size_t pos) const {
+    // pos can be skipped to if it is a valid address or one byte past the end.
+    return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
+        static_cast<uint64_t>(pos - 1));
+  }
+
+  unsigned char getByte(size_t pos) {
+    uint8_t byte = -1;
+    BitStream->getBitcodeBytes().readByte(pos, &byte);
+    return byte;
+  }
+
+  uint32_t getWord(size_t pos) {
+    uint32_t word = -1;
+    BitStream->getBitcodeBytes().readBytes(pos,
+                                           sizeof(word),
+                                           reinterpret_cast<uint8_t *>(&word),
+                                           NULL);
+    return word;
+  }
+
+  bool AtEndOfStream() {
+    return isEndPos(NextChar) && BitsInCurWord == 0;
  }
  
  /// GetCurrentBitNo - Return the bit # of the bit we are reading.
  uint64_t GetCurrentBitNo() const {
-    return (NextChar-BitStream->getFirstChar())*CHAR_BIT - BitsInCurWord;
+    return NextChar*CHAR_BIT - BitsInCurWord;
  }
  
  BitstreamReader *getBitStreamReader() {
@ -246,12 +271,10 @@ public:
  void JumpToBit(uint64_t BitNo) {
    uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3;
    uintptr_t WordBitNo = uintptr_t(BitNo) & 31;
-    assert(ByteNo <= (uintptr_t)(BitStream->getLastChar()-
-                                 BitStream->getFirstChar()) &&
-           "Invalid location");
+    assert(canSkipToPos(ByteNo) && "Invalid location");
    
    // Move the cursor to the right word.
-    NextChar = BitStream->getFirstChar()+ByteNo;
+    NextChar = ByteNo;
    BitsInCurWord = 0;
    CurWord = 0;
    
@ -272,7 +295,7 @@ public:
    }

    // If we run out of data, stop at the end of the stream.
-    if (NextChar == BitStream->getLastChar()) {
+    if (isEndPos(NextChar)) {
      CurWord = 0;
      BitsInCurWord = 0;
      return 0;
@ -281,8 +304,7 @@ public:
    unsigned R = CurWord;

    // Read the next word from the stream.
-    CurWord = (NextChar[0] <<  0) | (NextChar[1] << 8) |
-              (NextChar[2] << 16) | (NextChar[3] << 24);
+    CurWord = getWord(NextChar);
    NextChar += 4;

    // Extract NumBits-BitsInCurWord from what we just read.
@ -376,9 +398,8 @@ public:

    // Check that the block wasn't partially defined, and that the offset isn't
    // bogus.
-    const unsigned char *const SkipTo = NextChar + NumWords*4;
-    if (AtEndOfStream() || SkipTo > BitStream->getLastChar() ||
-                           SkipTo < BitStream->getFirstChar())
+    size_t SkipTo = NextChar + NumWords*4;
+    if (AtEndOfStream() || !canSkipToPos(SkipTo))
      return true;

    NextChar = SkipTo;
@ -409,8 +430,7 @@ public:
    if (NumWordsP) *NumWordsP = NumWords;

    // Validate that this block is sane.
-    if (CurCodeSize == 0 || AtEndOfStream() ||
-        NextChar+NumWords*4 > BitStream->getLastChar())
+    if (CurCodeSize == 0 || AtEndOfStream())
      return true;

    return false;
@ -512,24 +532,25 @@ public:
        SkipToWord();  // 32-bit alignment

        // Figure out where the end of this blob will be including tail padding.
-        const unsigned char *NewEnd = NextChar+((NumElts+3)&~3);
+        size_t NewEnd = NextChar+((NumElts+3)&~3);
        
        // If this would read off the end of the bitcode file, just set the
        // record to empty and return.
-        if (NewEnd > BitStream->getLastChar()) {
+        if (!canSkipToPos(NewEnd)) {
          Vals.append(NumElts, 0);
-          NextChar = BitStream->getLastChar();
+          NextChar = BitStream->getBitcodeBytes().getExtent();
          break;
        }
        
        // Otherwise, read the number of bytes.  If we can return a reference to
        // the data, do so to avoid copying it.
        if (BlobStart) {
-          *BlobStart = (const char*)NextChar;
+          *BlobStart = (const char*)BitStream->getBitcodeBytes().getPointer(
+              NextChar, NumElts);
          *BlobLen = NumElts;
        } else {
          for (; NumElts; ++NextChar, --NumElts)
-            Vals.push_back(*NextChar);
+            Vals.push_back(getByte(NextChar));
        }
        // Skip over tail padding.
        NextChar = NewEnd;
--- a/include/llvm/Bitcode/ReaderWriter.h
+++ b/include/llvm/Bitcode/ReaderWriter.h
@ -17,35 +17,45 @@
 #include <string>

 namespace llvm {
-  class Module;
-  class MemoryBuffer;
-  class ModulePass;
  class BitstreamWriter;
+  class MemoryBuffer;
+  class DataStreamer;
  class LLVMContext;
+  class Module;
+  class ModulePass;
  class raw_ostream;
-  
+
  /// getLazyBitcodeModule - Read the header of the specified bitcode buffer
  /// and prepare for lazy deserialization of function bodies.  If successful,
  /// this takes ownership of 'buffer' and returns a non-null pointer.  On
  /// error, this returns null, *does not* take ownership of Buffer, and fills
  /// in *ErrMsg with an error description if ErrMsg is non-null.
  Module *getLazyBitcodeModule(MemoryBuffer *Buffer,
-                               LLVMContext& Context,
+                               LLVMContext &Context,
                               std::string *ErrMsg = 0);

+  /// getStreamedBitcodeModule - Read the header of the specified stream
+  /// and prepare for lazy deserialization and streaming of function bodies.
+  /// On error, this returns null, and fills in *ErrMsg with an error
+  /// description if ErrMsg is non-null.
+  Module *getStreamedBitcodeModule(const std::string &name,
+                                   DataStreamer *streamer,
+                                   LLVMContext &Context,
+                                   std::string *ErrMsg = 0);
+
  /// getBitcodeTargetTriple - Read the header of the specified bitcode
  /// buffer and extract just the triple information. If successful,
  /// this returns a string and *does not* take ownership
  /// of 'buffer'. On error, this returns "", and fills in *ErrMsg
  /// if ErrMsg is non-null.
  std::string getBitcodeTargetTriple(MemoryBuffer *Buffer,
-                                     LLVMContext& Context,
+                                     LLVMContext &Context,
                                     std::string *ErrMsg = 0);

  /// ParseBitcodeFile - Read the specified bitcode file, returning the module.
  /// If an error occurs, this returns null and fills in *ErrMsg if it is
  /// non-null.  This method *never* takes ownership of Buffer.
-  Module *ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
+  Module *ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext &Context,
                           std::string *ErrMsg = 0);

  /// WriteBitcodeToFile - Write the specified module to the specified
@ -60,8 +70,8 @@ namespace llvm {
  /// createBitcodeWriterPass - Create and return a pass that writes the module
  /// to the specified ostream.
  ModulePass *createBitcodeWriterPass(raw_ostream &Str);
-  
-  
+
+
  /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
  /// for an LLVM IR bitcode wrapper.
  ///
@ -109,21 +119,24 @@ namespace llvm {
  ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
  ///   ... potentially other gunk ...
  /// };
-  /// 
+  ///
  /// This function is called when we find a file with a matching magic number.
  /// In this case, skip down to the subsection of the file that is actually a
  /// BC file.
-  static inline bool SkipBitcodeWrapperHeader(unsigned char *&BufPtr,
-                                              unsigned char *&BufEnd) {
+  /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
+  /// contain the whole bitcode file.
+  static inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
+                                              const unsigned char *&BufEnd,
+                                              bool VerifyBufferSize) {
    enum {
      KnownHeaderSize = 4*4,  // Size of header we read.
      OffsetField = 2*4,      // Offset in bytes to Offset field.
      SizeField = 3*4         // Offset in bytes to Size field.
    };
-    
+
    // Must contain the header!
    if (BufEnd-BufPtr < KnownHeaderSize) return true;
-    
+
    unsigned Offset = ( BufPtr[OffsetField  ]        |
                       (BufPtr[OffsetField+1] << 8)  |
                       (BufPtr[OffsetField+2] << 16) |
@ -132,9 +145,9 @@ namespace llvm {
                       (BufPtr[SizeField  +1] << 8)  |
                       (BufPtr[SizeField  +2] << 16) |
                       (BufPtr[SizeField  +3] << 24));
-    
+
    // Verify that Offset+Size fits in the file.
-    if (Offset+Size > unsigned(BufEnd-BufPtr))
+    if (VerifyBufferSize && Offset+Size > unsigned(BufEnd-BufPtr))
      return true;
    BufPtr += Offset;
    BufEnd = BufPtr+Size;