From 89e025387eee7f3f03749fd54ab5e6ce36495c0a Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sun, 18 Jan 2004 21:08:15 +0000 Subject: [PATCH] Add support for reading bytecode files with compactiontables for bytecode files. This shrinks the bytecode file for 176.gcc by about 200K (10%), and 254.gap by about 167K, a 25% reduction. There is still a lot of room for improvement in the encoding of the compaction table. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@10914 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bytecode/Reader/ConstantReader.cpp | 9 +- lib/Bytecode/Reader/Reader.cpp | 159 ++++++++++++++++++++----- lib/Bytecode/Reader/ReaderInternals.h | 59 ++++++++- 3 files changed, 186 insertions(+), 41 deletions(-) diff --git a/lib/Bytecode/Reader/ConstantReader.cpp b/lib/Bytecode/Reader/ConstantReader.cpp index c8e9feb0bef..e8cebcb38fa 100644 --- a/lib/Bytecode/Reader/ConstantReader.cpp +++ b/lib/Bytecode/Reader/ConstantReader.cpp @@ -1,4 +1,4 @@ -//===- ReadConst.cpp - Code to constants and constant pools ---------------===// +//===- ConstantReader.cpp - Code to constants and types ====---------------===// // // The LLVM Compiler Infrastructure // @@ -7,11 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file implements functionality to deserialize constants and entire -// constant pools. -// -// Note that this library should be as fast as possible, reentrant, and -// thread-safe!! +// This file implements functionality to deserialize constants and types from +// bytecode files. // //===----------------------------------------------------------------------===// diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp index 16b4ee06ebe..2067f222110 100644 --- a/lib/Bytecode/Reader/Reader.cpp +++ b/lib/Bytecode/Reader/Reader.cpp @@ -27,32 +27,47 @@ unsigned BytecodeParser::getTypeSlot(const Type *Ty) { if (Ty->isPrimitiveType()) return Ty->getPrimitiveID(); + // Scan the compaction table for the type if needed. + if (CompactionTable.size() > Type::TypeTyID) { + std::vector &Plane = CompactionTable[Type::TypeTyID]; + if (!Plane.empty()) { + std::vector::iterator I = find(Plane.begin(), Plane.end(), + const_cast(Ty)); + if (I == Plane.end()) + throw std::string("Couldn't find type specified in compaction table!"); + return Type::FirstDerivedTyID + (&*I - &Plane[0]); + } + } + // Check the function level types first... TypeValuesListTy::iterator I = find(FunctionTypeValues.begin(), FunctionTypeValues.end(), Ty); if (I != FunctionTypeValues.end()) - return FirstDerivedTyID + ModuleTypeValues.size() + + return Type::FirstDerivedTyID + ModuleTypeValues.size() + (&*I - &FunctionTypeValues[0]); I = find(ModuleTypeValues.begin(), ModuleTypeValues.end(), Ty); if (I == ModuleTypeValues.end()) throw std::string("Didn't find type in ModuleTypeValues."); - return FirstDerivedTyID + (&*I - &ModuleTypeValues[0]); + return Type::FirstDerivedTyID + (&*I - &ModuleTypeValues[0]); } const Type *BytecodeParser::getType(unsigned ID) { - if (ID < Type::NumPrimitiveIDs) - if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) - return T; - //cerr << "Looking up Type ID: " << ID << "\n"; - if (ID < Type::NumPrimitiveIDs) + if (ID < Type::FirstDerivedTyID) if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) return T; // Asked for a primitive type... // Otherwise, derived types need offset... - ID -= FirstDerivedTyID; + ID -= Type::FirstDerivedTyID; + + if (CompactionTable.size() > Type::TypeTyID && + !CompactionTable[Type::TypeTyID].empty()) { + if (ID >= CompactionTable[Type::TypeTyID].size()) + throw std::string("Type ID out of range for compaction table!"); + return cast(CompactionTable[Type::TypeTyID][ID]); + } // Is it a module-level type? if (ID < ModuleTypeValues.size()) @@ -80,10 +95,8 @@ unsigned BytecodeParser::insertValue(Value *Val, unsigned type, "Cannot read null values from bytecode!"); assert(type != Type::TypeTyID && "Types should never be insertValue'd!"); - if (ValueTab.size() <= type) { - unsigned OldSize = ValueTab.size(); + if (ValueTab.size() <= type) ValueTab.resize(type+1); - } if (!ValueTab[type]) ValueTab[type] = new ValueList(); @@ -91,7 +104,7 @@ unsigned BytecodeParser::insertValue(Value *Val, unsigned type, // << "] = " << Val << "\n"; ValueTab[type]->push_back(Val); - bool HasOffset = !Val->getType()->isPrimitiveType(); + bool HasOffset = hasImplicitNull(type, hasExplicitPrimitiveZeros); return ValueTab[type]->size()-1 + HasOffset; } @@ -100,16 +113,25 @@ Value *BytecodeParser::getValue(unsigned type, unsigned oNum, bool Create) { assert(type != Type::LabelTyID && "getValue() cannot get blocks!"); unsigned Num = oNum; - if (hasImplicitNull(type, hasExplicitPrimitiveZeros)) { - if (Num == 0) - return Constant::getNullValue(getType(type)); - --Num; - } + // If there is a compaction table active, it defines the low-level numbers. + // If not, the module values define the low-level numbers. + if (CompactionTable.size() > type && !CompactionTable[type].empty()) { + if (Num < CompactionTable[type].size()) + return CompactionTable[type][Num]; + Num -= CompactionTable[type].size(); + } else { - if (type < ModuleValues.size() && ModuleValues[type]) { - if (Num < ModuleValues[type]->size()) - return ModuleValues[type]->getOperand(Num); - Num -= ModuleValues[type]->size(); + if (hasImplicitNull(type, hasExplicitPrimitiveZeros)) { + if (Num == 0) + return Constant::getNullValue(getType(type)); + --Num; + } + + if (type < ModuleValues.size() && ModuleValues[type]) { + if (Num < ModuleValues[type]->size()) + return ModuleValues[type]->getOperand(Num); + Num -= ModuleValues[type]->size(); + } } if (Values.size() > type && Values[type] && Num < Values[type]->size()) @@ -331,14 +353,9 @@ void BytecodeParser::materializeFunction(Function* F) { F->setLinkage(Linkage); - const FunctionType::ParamTypes &Params =F->getFunctionType()->getParamTypes(); - Function::aiterator AI = F->abegin(); - for (FunctionType::ParamTypes::const_iterator It = Params.begin(); - It != Params.end(); ++It, ++AI) - insertValue(AI, getTypeSlot(AI->getType()), Values); - // Keep track of how many basic blocks we have read in... unsigned BlockNum = 0; + bool InsertedArguments = false; while (Buf < EndBuf) { unsigned Type, Size; @@ -347,11 +364,42 @@ void BytecodeParser::materializeFunction(Function* F) { switch (Type) { case BytecodeFormat::ConstantPool: + if (!InsertedArguments) { + // Insert arguments into the value table before we parse the first basic + // block in the function, but after we potentially read in the + // compaction table. + const FunctionType::ParamTypes &Params = + F->getFunctionType()->getParamTypes(); + Function::aiterator AI = F->abegin(); + for (FunctionType::ParamTypes::const_iterator It = Params.begin(); + It != Params.end(); ++It, ++AI) + insertValue(AI, getTypeSlot(AI->getType()), Values); + InsertedArguments = true; + } + BCR_TRACE(2, "BLOCK BytecodeFormat::ConstantPool: {\n"); ParseConstantPool(Buf, Buf+Size, Values, FunctionTypeValues); break; + case BytecodeFormat::CompactionTable: + BCR_TRACE(2, "BLOCK BytecodeFormat::CompactionTable: {\n"); + ParseCompactionTable(Buf, Buf+Size); + break; + case BytecodeFormat::BasicBlock: { + if (!InsertedArguments) { + // Insert arguments into the value table before we parse the first basic + // block in the function, but after we potentially read in the + // compaction table. + const FunctionType::ParamTypes &Params = + F->getFunctionType()->getParamTypes(); + Function::aiterator AI = F->abegin(); + for (FunctionType::ParamTypes::const_iterator It = Params.begin(); + It != Params.end(); ++It, ++AI) + insertValue(AI, getTypeSlot(AI->getType()), Values); + InsertedArguments = true; + } + BCR_TRACE(2, "BLOCK BytecodeFormat::BasicBlock: {\n"); BasicBlock *BB = ParseBasicBlock(Buf, Buf+Size, BlockNum++); F->getBasicBlockList().push_back(BB); @@ -359,6 +407,19 @@ void BytecodeParser::materializeFunction(Function* F) { } case BytecodeFormat::InstructionList: { + // Insert arguments into the value table before we parse the instruction + // list for the function, but after we potentially read in the compaction + // table. + if (!InsertedArguments) { + const FunctionType::ParamTypes &Params = + F->getFunctionType()->getParamTypes(); + Function::aiterator AI = F->abegin(); + for (FunctionType::ParamTypes::const_iterator It = Params.begin(); + It != Params.end(); ++It, ++AI) + insertValue(AI, getTypeSlot(AI->getType()), Values); + InsertedArguments = true; + } + BCR_TRACE(2, "BLOCK BytecodeFormat::InstructionList: {\n"); if (BlockNum) throw std::string("Already parsed basic blocks!"); BlockNum = ParseInstructionList(F, Buf, Buf+Size); @@ -388,13 +449,16 @@ void BytecodeParser::materializeFunction(Function* F) { throw std::string("Illegal basic block operand reference"); ParsedBasicBlocks.clear(); - // Resolve forward references. Replace any uses of a forward reference value // with the real value. // replaceAllUsesWith is very inefficient for instructions which have a LARGE // number of operands. PHI nodes often have forward references, and can also // often have a very large number of operands. + // + // FIXME: REEVALUATE. replaceAllUsesWith is _much_ faster now, and this code + // should be simplified back to using it! + // std::map ForwardRefMapping; for (std::map, Value*>::iterator I = ForwardReferences.begin(), E = ForwardReferences.end(); @@ -424,10 +488,46 @@ void BytecodeParser::materializeFunction(Function* F) { // Clear out function-level types... FunctionTypeValues.clear(); - + CompactionTable.clear(); freeTable(Values); } +void BytecodeParser::ParseCompactionTable(const unsigned char *&Buf, + const unsigned char *End) { + + while (Buf != End) { + unsigned NumEntries = read_vbr_uint(Buf, End); + unsigned Ty = read_vbr_uint(Buf, End); + + if (Ty >= CompactionTable.size()) + CompactionTable.resize(Ty+1); + + if (!CompactionTable[Ty].empty()) + throw std::string("Compaction table plane contains multiple entries!"); + + if (Ty == Type::TypeTyID) { + for (unsigned i = 0; i != NumEntries; ++i) { + const Type *Typ = getGlobalTableType(read_vbr_uint(Buf, End)); + CompactionTable[Type::TypeTyID].push_back(const_cast(Typ)); + } + + CompactionTable.resize(NumEntries+Type::FirstDerivedTyID); + } else { + assert(NumEntries != 0 && "Cannot read zero entries!"); + const Type *Typ = getType(Ty); + // Push the implicit zero + CompactionTable[Ty].push_back(Constant::getNullValue(Typ)); + for (unsigned i = 1; i != NumEntries; ++i) { + Value *V = getGlobalTableValue(Typ, read_vbr_uint(Buf, End)); + CompactionTable[Ty].push_back(V); + } + } + } + +} + + + void BytecodeParser::ParseModuleGlobalInfo(const unsigned char *&Buf, const unsigned char *End) { if (!FunctionSignatureList.empty()) @@ -543,7 +643,6 @@ void BytecodeParser::ParseVersionInfo(const unsigned char *&Buf, hasVarArgCallPadding = false; hasInconsistentModuleGlobalInfo = false; hasExplicitPrimitiveZeros = false; - FirstDerivedTyID = 14; switch (RevisionNum) { case 2: // LLVM pre-1.0 release: will be deleted on the next rev diff --git a/lib/Bytecode/Reader/ReaderInternals.h b/lib/Bytecode/Reader/ReaderInternals.h index 4c0b310fcf4..f5ef3d90b61 100644 --- a/lib/Bytecode/Reader/ReaderInternals.h +++ b/lib/Bytecode/Reader/ReaderInternals.h @@ -45,10 +45,7 @@ class BytecodeParser : public ModuleProvider { BytecodeParser(const BytecodeParser &); // DO NOT IMPLEMENT void operator=(const BytecodeParser &); // DO NOT IMPLEMENT public: - BytecodeParser() { - // Define this in case we don't see a ModuleGlobalInfo block. - FirstDerivedTyID = Type::FirstDerivedTyID; - } + BytecodeParser() {} ~BytecodeParser() { freeState(); @@ -90,7 +87,6 @@ private: // Information about the module, extracted from the bytecode revision number. unsigned char RevisionNum; // The rev # itself - unsigned char FirstDerivedTyID; // First variable index to use for type bool hasExtendedLinkageSpecs; // Supports more than 4 linkage types bool hasOldStyleVarargs; // Has old version of varargs intrinsics? bool hasVarArgCallPadding; // Bytecode has extra padding in vararg call @@ -112,6 +108,10 @@ private: ValueTable ModuleValues; std::map, Value*> ForwardReferences; + /// CompactionTable - If a compaction table is active in the current function, + /// this is the mapping that it contains. + std::vector > CompactionTable; + std::vector ParsedBasicBlocks; // ConstantFwdRefs - This maintains a mapping between 's and @@ -156,6 +156,54 @@ private: } } + /// getGlobalTableType - This is just like getType, but when a compaction + /// table is in use, it is ignored. Also, no forward references or other + /// fancy features are supported. + const Type *getGlobalTableType(unsigned Slot) { + if (Slot < Type::FirstDerivedTyID) { + const Type *Ty = Type::getPrimitiveType((Type::PrimitiveID)Slot); + assert(Ty && "Not a primitive type ID?"); + return Ty; + } + Slot -= Type::FirstDerivedTyID; + if (Slot >= ModuleTypeValues.size()) + throw std::string("Illegal compaction table type reference!"); + return ModuleTypeValues[Slot]; + } + + unsigned getGlobalTableTypeSlot(const Type *Ty) { + if (Ty->isPrimitiveType()) + return Ty->getPrimitiveID(); + TypeValuesListTy::iterator I = find(ModuleTypeValues.begin(), + ModuleTypeValues.end(), Ty); + if (I == ModuleTypeValues.end()) + throw std::string("Didn't find type in ModuleTypeValues."); + return Type::FirstDerivedTyID + (&*I - &ModuleTypeValues[0]); + } + + /// getGlobalTableValue - This is just like getValue, but when a compaction + /// table is in use, it is ignored. Also, no forward references or other + /// fancy features are supported. + Value *getGlobalTableValue(const Type *Ty, unsigned SlotNo) { + // FIXME: getTypeSlot is inefficient! + unsigned TyID = getGlobalTableTypeSlot(Ty); + + if (TyID != Type::LabelTyID) { + if (SlotNo == 0) + return Constant::getNullValue(Ty); + --SlotNo; + } + + if (TyID >= ModuleValues.size() || ModuleValues[TyID] == 0 || + SlotNo >= ModuleValues[TyID]->getNumOperands()) { + std::cerr << TyID << ", " << SlotNo << ": " << ModuleValues.size() << ", " + << (void*)ModuleValues[TyID] << ", " + << ModuleValues[TyID]->getNumOperands() << "\n"; + throw std::string("Corrupt compaction table entry!"); + } + return ModuleValues[TyID]->getOperand(SlotNo); + } + public: void ParseModule(const unsigned char * Buf, const unsigned char *End); void materializeFunction(Function *F); @@ -166,6 +214,7 @@ private: void ParseSymbolTable(const unsigned char *&Buf, const unsigned char *End, SymbolTable *, Function *CurrentFunction); void ParseFunction(const unsigned char *&Buf, const unsigned char *End); + void ParseCompactionTable(const unsigned char *&Buf,const unsigned char *End); void ParseGlobalTypes(const unsigned char *&Buf, const unsigned char *EndBuf); BasicBlock *ParseBasicBlock(const unsigned char *&Buf,