From e3c6ad7e76240235ed519a03ce296c7b7cea687e Mon Sep 17 00:00:00 2001 From: Reid Spencer Date: Sun, 14 Nov 2004 22:04:46 +0000 Subject: [PATCH] Implement the high level interface to make (de)compression easier. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@17775 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Compressor.cpp | 190 +++++++++++++++++++++++++++++++++++-- 1 file changed, 181 insertions(+), 9 deletions(-) diff --git a/lib/Support/Compressor.cpp b/lib/Support/Compressor.cpp index 6a1fdb9b97d..ecadf283fe3 100644 --- a/lib/Support/Compressor.cpp +++ b/lib/Support/Compressor.cpp @@ -111,13 +111,137 @@ bool NULLCOMP_decompress(NULLCOMP_stream* s) { void NULLCOMP_end(NULLCOMP_stream* strm) { } +/// This structure is only used when a bytecode file is compressed. +/// As bytecode is being decompressed, the memory buffer might need +/// to be reallocated. The buffer allocation is handled in a callback +/// and this structure is needed to retain information across calls +/// to the callback. +/// @brief An internal buffer object used for handling decompression +struct BufferContext { + char* buff; + unsigned size; + BufferContext(unsigned compressedSize ) { + // Null to indicate malloc of a new block + buff = 0; + + // Compute the initial length of the uncompression buffer. Note that this + // is twice the length of the compressed buffer and will be doubled again + // in the callback for an initial allocation of 4x compressedSize. This + // calculation is based on the typical compression ratio of bzip2 on LLVM + // bytecode files which typically ranges in the 50%-75% range. Since we + // tyipcally get at least 50%, doubling is insufficient. By using a 4x + // multiplier on the first allocation, we minimize the impact of having to + // copy the buffer on reallocation. + size = compressedSize*2; + } + + /// This function handles allocation of the buffer used for decompression of + /// compressed bytecode files. It is called by Compressor::decompress which is + /// called by BytecodeReader::ParseBytecode. + static unsigned callback(char*&buff, unsigned& sz, void* ctxt){ + // Case the context variable to our BufferContext + BufferContext* bc = reinterpret_cast(ctxt); + + // Compute the new, doubled, size of the block + unsigned new_size = bc->size * 2; + + // Extend or allocate the block (realloc(0,n) == malloc(n)) + char* new_buff = (char*) ::realloc(bc->buff, new_size); + + // Figure out what to return to the Compressor. If this is the first call, + // then bc->buff will be null. In this case we want to return the entire + // buffer because there was no previous allocation. Otherwise, when the + // buffer is reallocated, we save the new base pointer in the BufferContext.buff + // field but return the address of only the extension, mid-way through the + // buffer (since its size was doubled). Furthermore, the sz result must be + // 1/2 the total size of the buffer. + if (bc->buff == 0 ) { + buff = bc->buff = new_buff; + sz = new_size; + } else { + bc->buff = new_buff; + buff = new_buff + bc->size; + sz = bc->size; + } + + // Retain the size of the allocated block + bc->size = new_size; + + // Make sure we fail (return 1) if we didn't get any memory. + return (bc->buff == 0 ? 1 : 0); + } +}; + +// This structure retains the context when compressing the bytecode file. The +// WriteCompressedData function below uses it to keep track of the previously +// filled chunk of memory (which it writes) and how many bytes have been +// written. +struct WriterContext { + // Initialize the context + WriterContext(std::ostream*OS, unsigned CS) + : chunk(0), sz(0), written(0), compSize(CS), Out(OS) {} + + // Make sure we clean up memory + ~WriterContext() { + if (chunk) + delete [] chunk; + } + + // Write the chunk + void write(unsigned size = 0) { + unsigned write_size = (size == 0 ? sz : size); + Out->write(chunk,write_size); + written += write_size; + delete [] chunk; + chunk = 0; + sz = 0; + } + + // This function is a callback used by the Compressor::compress function to + // allocate memory for the compression buffer. This function fulfills that + // responsibility but also writes the previous (now filled) buffer out to the + // stream. + static unsigned callback(char*& buffer, unsigned& size, void* context) { + // Cast the context to the structure it must point to. + WriterContext* ctxt = + reinterpret_cast(context); + + // If there's a previously allocated chunk, it must now be filled with + // compressed data, so we write it out and deallocate it. + if (ctxt->chunk != 0 && ctxt->sz > 0 ) { + ctxt->write(); + } + + // Compute the size of the next chunk to allocate. We attempt to allocate + // enough memory to handle the compression in a single memory allocation. In + // general, the worst we do on compression of bytecode is about 50% so we + // conservatively estimate compSize / 2 as the size needed for the + // compression buffer. compSize is the size of the compressed data, provided + // by WriteBytecodeToFile. + size = ctxt->sz = ctxt->compSize / 2; + + // Allocate the chunks + buffer = ctxt->chunk = new char [size]; + + // We must return 1 if the allocation failed so that the Compressor knows + // not to use the buffer pointer. + return (ctxt->chunk == 0 ? 1 : 0); + } + + char* chunk; // pointer to the chunk of memory filled by compression + unsigned sz; // size of chunk + unsigned written; // aggregate total of bytes written in all chunks + unsigned compSize; // size of the uncompressed buffer + std::ostream* Out; // The stream we write the data to. +}; + } namespace llvm { // Compress in one of three ways -uint64_t Compressor::compress(char* in, unsigned size, OutputDataCallback* cb, - Algorithm hint, void* context ) { +uint64_t Compressor::compress(const char* in, unsigned size, + OutputDataCallback* cb, Algorithm hint, void* context ) { assert(in && "Can't compress null buffer"); assert(size && "Can't compress empty buffer"); assert(cb && "Can't compress without a callback function"); @@ -132,7 +256,7 @@ uint64_t Compressor::compress(char* in, unsigned size, OutputDataCallback* cb, bzdata.bzalloc = 0; bzdata.bzfree = 0; bzdata.opaque = 0; - bzdata.next_in = in; + bzdata.next_in = (char*)in; bzdata.avail_in = size; bzdata.next_out = 0; bzdata.avail_out = 0; @@ -188,7 +312,7 @@ uint64_t Compressor::compress(char* in, unsigned size, OutputDataCallback* cb, zdata.zalloc = Z_NULL; zdata.zfree = Z_NULL; zdata.opaque = Z_NULL; - zdata.next_in = reinterpret_cast(in); + zdata.next_in = (Bytef*)in; zdata.avail_in = size; if (Z_OK != deflateInit(&zdata,6)) throw std::string(zdata.msg ? zdata.msg : "zlib error"); @@ -227,7 +351,7 @@ uint64_t Compressor::compress(char* in, unsigned size, OutputDataCallback* cb, case COMP_TYPE_SIMPLE: { NULLCOMP_stream sdata; - sdata.next_in = in; + sdata.next_in = (char*)in; sdata.avail_in = size; NULLCOMP_init(&sdata); @@ -254,8 +378,33 @@ uint64_t Compressor::compress(char* in, unsigned size, OutputDataCallback* cb, return result; } +uint64_t +Compressor::compressToNewBuffer(const char* in, unsigned size, char*&out, + Algorithm hint) { + BufferContext bc(size); + unsigned result = compress(in,size,BufferContext::callback,hint,(void*)&bc); + out = bc.buff; + return result; +} + +uint64_t +Compressor::compressToStream(const char*in, unsigned size, std::ostream& out, + Algorithm hint) { + // Set up the context and writer + WriterContext ctxt(&out,size / 2); + + // Compress everything after the magic number (which we'll alter) + uint64_t zipSize = Compressor::compress(in,size, + WriterContext::callback, hint, (void*)&ctxt); + + if (ctxt.chunk) { + ctxt.write(zipSize - ctxt.written); + } + return zipSize; +} + // Decompress in one of three ways -uint64_t Compressor::decompress(char *in, unsigned size, +uint64_t Compressor::decompress(const char *in, unsigned size, OutputDataCallback* cb, void* context) { assert(in && "Can't decompress null buffer"); assert(size > 1 && "Can't decompress empty buffer"); @@ -273,7 +422,7 @@ uint64_t Compressor::decompress(char *in, unsigned size, bzdata.bzalloc = 0; bzdata.bzfree = 0; bzdata.opaque = 0; - bzdata.next_in = in; + bzdata.next_in = (char*)in; bzdata.avail_in = size - 1; bzdata.next_out = 0; bzdata.avail_out = 0; @@ -327,7 +476,7 @@ uint64_t Compressor::decompress(char *in, unsigned size, zdata.zalloc = Z_NULL; zdata.zfree = Z_NULL; zdata.opaque = Z_NULL; - zdata.next_in = reinterpret_cast(in); + zdata.next_in = (Bytef*)(in); zdata.avail_in = size - 1; if ( Z_OK != inflateInit(&zdata)) throw std::string(zdata.msg ? zdata.msg : "zlib error"); @@ -356,7 +505,7 @@ uint64_t Compressor::decompress(char *in, unsigned size, case COMP_TYPE_SIMPLE: { NULLCOMP_stream sdata; - sdata.next_in = in; + sdata.next_in = (char*)in; sdata.avail_in = size - 1; NULLCOMP_init(&sdata); @@ -382,6 +531,29 @@ uint64_t Compressor::decompress(char *in, unsigned size, return result; } +uint64_t +Compressor::decompressToNewBuffer(const char* in, unsigned size, char*&out) { + BufferContext bc(size); + unsigned result = decompress(in,size,BufferContext::callback,(void*)&bc); + out = bc.buff; + return result; +} + +uint64_t +Compressor::decompressToStream(const char*in, unsigned size, std::ostream& out){ + // Set up the context and writer + WriterContext ctxt(&out,size / 2); + + // Compress everything after the magic number (which we'll alter) + uint64_t zipSize = Compressor::decompress(in,size, + WriterContext::callback, (void*)&ctxt); + + if (ctxt.chunk) { + ctxt.write(zipSize - ctxt.written); + } + return zipSize; +} + } // vim: sw=2 ai