From 00c28a7481324a4804badbdbabb555c0d94e66f3 Mon Sep 17 00:00:00 2001 From: Reid Spencer Date: Thu, 10 Jun 2004 08:09:13 +0000 Subject: [PATCH] Implemented the bulk of the functionality. Cleaned up the code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@14113 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Bytecode/Analyzer.h | 31 +- lib/Bytecode/Analyzer/Analyzer.cpp | 87 +++- lib/Bytecode/Analyzer/AnalyzerWrappers.cpp | 135 +++++- lib/Bytecode/Analyzer/BytecodeHandler.cpp | 220 --------- lib/Bytecode/Analyzer/Dumper.cpp | 3 +- lib/Bytecode/Analyzer/Parser.cpp | 510 ++++++++++++++------- lib/Bytecode/Analyzer/Parser.h | 92 +++- lib/Bytecode/Analyzer/ReaderPrimitives.h | 101 ---- lib/Bytecode/Reader/Analyzer.cpp | 87 +++- lib/Bytecode/Reader/AnalyzerWrappers.cpp | 135 +++++- lib/Bytecode/Reader/Dumper.cpp | 3 +- lib/Bytecode/Reader/Parser.cpp | 510 ++++++++++++++------- lib/Bytecode/Reader/Parser.h | 92 +++- 13 files changed, 1200 insertions(+), 806 deletions(-) delete mode 100644 lib/Bytecode/Analyzer/BytecodeHandler.cpp delete mode 100644 lib/Bytecode/Analyzer/ReaderPrimitives.h diff --git a/include/llvm/Bytecode/Analyzer.h b/include/llvm/Bytecode/Analyzer.h index b1cd009fef4..b60559cd10c 100644 --- a/include/llvm/Bytecode/Analyzer.h +++ b/include/llvm/Bytecode/Analyzer.h @@ -19,6 +19,7 @@ #ifndef LLVM_BYTECODE_ANALYZER_H #define LLVM_BYTECODE_ANALYZER_H +#include "llvm/Bytecode/Format.h" #include #include @@ -33,6 +34,7 @@ struct BytecodeAnalysis { unsigned byteSize; ///< The size of the bytecode file in bytes unsigned numTypes; ///< The number of types unsigned numValues; ///< The number of values + unsigned numBlocks; ///< The number of *bytecode* blocks unsigned numFunctions; ///< The number of functions defined unsigned numConstants; ///< The number of constants unsigned numGlobalVars; ///< The number of global variables @@ -41,29 +43,40 @@ struct BytecodeAnalysis { unsigned numOperands; ///< The number of BBs in all functions unsigned numCmpctnTables; ///< The number of compaction tables unsigned numSymTab; ///< The number of symbol tables + unsigned numAlignment; ///< The number of alignment bytes unsigned maxTypeSlot; ///< The maximum slot number for types unsigned maxValueSlot; ///< The maximum slot number for values - double density; ///< Density of file (bytes/defs) + double fileDensity; ///< Density of file (bytes/definition) + ///< This is the density of the bytecode file. It is the ratio of + ///< the number of bytes to the number of definitions in the file. Smaller + ///< numbers mean the file is more compact (denser). Larger numbers mean + ///< the file is more sparse. + double globalsDensity; ///< density of global defs (bytes/definition) + double functionDensity; ///< Average density of functions (bytes/function) + unsigned vbrCount32; ///< Number of 32-bit vbr values + unsigned vbrCount64; ///< Number of 64-bit vbr values + unsigned vbrCompBytes; ///< Number of vbr bytes (compressed) + unsigned vbrExpdBytes; ///< Number of vbr bytes (expanded) + + typedef std::map BlockSizeMap; + BlockSizeMap BlockSizes; /// A structure that contains various pieces of information related to /// an analysis of a single function. struct BytecodeFunctionInfo { + std::string description; ///< Function type description + std::string name; ///< Name of function if it has one unsigned byteSize; ///< The size of the function in bytecode bytes unsigned numInstructions; ///< The number of instructions in the function unsigned numBasicBlocks; ///< The number of basic blocks in the function unsigned numOperands; ///< The number of operands in the function double density; ///< Density of function double vbrEffectiveness; ///< Effectiveness of variable bit rate encoding. - ///< This is the average number of bytes per unsigned value written in the - ///< vbr encoding. A "perfect" score of 1.0 means all vbr values were - ///< encoded in one byte. A score between 1.0 and 4.0 means that some - ///< savings were achieved. A score of 4.0 means vbr didn't help. A score - ///< greater than 4.0 means vbr negatively impacted size of the file. }; - /// A mapping of function names to the collected information about the - /// function. - std::map FunctionInfo; + /// A mapping of function slot numbers to the collected information about + /// the function. + std::map FunctionInfo; /// The content of the bytecode dump std::string BytecodeDump; diff --git a/lib/Bytecode/Analyzer/Analyzer.cpp b/lib/Bytecode/Analyzer/Analyzer.cpp index eb710711d98..133c1fbaa6f 100644 --- a/lib/Bytecode/Analyzer/Analyzer.cpp +++ b/lib/Bytecode/Analyzer/Analyzer.cpp @@ -30,13 +30,13 @@ public: bool handleError(const std::string& str ) { - std::cerr << "Analysis Error: " << str; return false; } void handleStart() { bca.ModuleId.clear(); + bca.numBlocks = 0; bca.numTypes = 0; bca.numValues = 0; bca.numFunctions = 0; @@ -49,16 +49,38 @@ public: bca.numSymTab = 0; bca.maxTypeSlot = 0; bca.maxValueSlot = 0; - bca.density = 0.0; + bca.numAlignment = 0; + bca.fileDensity = 0.0; + bca.globalsDensity = 0.0; + bca.functionDensity = 0.0; + bca.vbrCount32 = 0; + bca.vbrCount64 = 0; + bca.vbrCompBytes = 0; + bca.vbrExpdBytes = 0; bca.FunctionInfo.clear(); bca.BytecodeDump.clear(); + bca.BlockSizes[BytecodeFormat::Module] = 0; + bca.BlockSizes[BytecodeFormat::Function] = 0; + bca.BlockSizes[BytecodeFormat::ConstantPool] = 0; + bca.BlockSizes[BytecodeFormat::SymbolTable] = 0; + bca.BlockSizes[BytecodeFormat::ModuleGlobalInfo] = 0; + bca.BlockSizes[BytecodeFormat::GlobalTypePlane] = 0; + bca.BlockSizes[BytecodeFormat::BasicBlock] = 0; + bca.BlockSizes[BytecodeFormat::InstructionList] = 0; + bca.BlockSizes[BytecodeFormat::CompactionTable] = 0; } void handleFinish() { - bca.density = bca.numTypes + bca.numFunctions + bca.numConstants + - bca.numGlobalVars + bca.numInstructions; - bca.density /= bca.byteSize; + bca.fileDensity = double(bca.byteSize) / double( bca.numTypes + bca.numValues ); + double globalSize = 0.0; + globalSize += double(bca.BlockSizes[BytecodeFormat::ConstantPool]); + globalSize += double(bca.BlockSizes[BytecodeFormat::ModuleGlobalInfo]); + globalSize += double(bca.BlockSizes[BytecodeFormat::GlobalTypePlane]); + bca.globalsDensity = globalSize / double( bca.numTypes + bca.numConstants + + bca.numGlobalVars ); + bca.functionDensity = double(bca.BlockSizes[BytecodeFormat::Function]) / + double(bca.numFunctions); } void handleModuleBegin(const std::string& id) @@ -78,8 +100,9 @@ public: { } - void handleModuleGlobalsBegin() + void handleModuleGlobalsBegin(unsigned size) { + // bca.globalBytesize += size; } void handleGlobalVariable( @@ -89,6 +112,7 @@ public: ) { bca.numGlobalVars++; + bca.numValues++; } void handleInitializedGV( @@ -99,6 +123,7 @@ public: ) { bca.numGlobalVars++; + bca.numValues++; } virtual void handleType( const Type* Ty ) @@ -111,6 +136,7 @@ public: ) { bca.numFunctions++; + bca.numValues++; } void handleModuleGlobalsEnd() @@ -200,15 +226,19 @@ public: ) { bca.numBasicBlocks++; + bca.numValues++; } bool handleInstruction( unsigned Opcode, const Type* iType, - std::vector& Operands + std::vector& Operands, + unsigned Size ) { bca.numInstructions++; + bca.numValues++; + bca.numOperands += Operands.size(); return Instruction::isTerminator(Opcode); } @@ -227,43 +257,67 @@ public: ) { bca.numConstants++; + bca.numValues++; } void handleConstantValue( Constant * c ) { bca.numConstants++; + bca.numValues++; } void handleConstantArray( - const ArrayType* AT, - std::vector& Elements ) + const ArrayType* AT, + std::vector& Elements ) { bca.numConstants++; + bca.numValues++; } void handleConstantStruct( - const StructType* ST, - std::vector& ElementSlots) + const StructType* ST, + std::vector& ElementSlots) { bca.numConstants++; + bca.numValues++; } void handleConstantPointer( - const PointerType* PT, unsigned Slot) + const PointerType* PT, unsigned Slot) { bca.numConstants++; + bca.numValues++; } void handleConstantString( const ConstantArray* CA ) { bca.numConstants++; + bca.numValues++; } - void handleGlobalConstantsEnd() - { + void handleGlobalConstantsEnd() { } + + void handleAlignment(unsigned numBytes) { + bca.numAlignment += numBytes; } + void handleBlock( + unsigned BType, const unsigned char* StartPtr, unsigned Size) { + bca.numBlocks++; + bca.BlockSizes[llvm::BytecodeFormat::FileBlockIDs(BType)] += Size; + } + + virtual void handleVBR32(unsigned Size ) { + bca.vbrCount32++; + bca.vbrCompBytes += Size; + bca.vbrExpdBytes += sizeof(uint32_t); + } + virtual void handleVBR64(unsigned Size ) { + bca.vbrCount64++; + bca.vbrCompBytes += Size; + bca.vbrExpdBytes += sizeof(uint64_t); + } }; } @@ -277,10 +331,9 @@ void llvm::BytecodeAnalyzer::AnalyzeBytecode( { bca.byteSize = Length; AnalyzerHandler TheHandler(bca); - AbstractBytecodeParser TheParser(&TheHandler); + AbstractBytecodeParser TheParser(&TheHandler, true, true, true); TheParser.ParseBytecode( Buf, Length, ModuleID ); - if ( bca.detailedResults ) - TheParser.ParseAllFunctionBodies(); + TheParser.ParseAllFunctionBodies(); } // vim: sw=2 diff --git a/lib/Bytecode/Analyzer/AnalyzerWrappers.cpp b/lib/Bytecode/Analyzer/AnalyzerWrappers.cpp index 2caf069eb49..fd23dc80867 100644 --- a/lib/Bytecode/Analyzer/AnalyzerWrappers.cpp +++ b/lib/Bytecode/Analyzer/AnalyzerWrappers.cpp @@ -18,6 +18,7 @@ #include "Support/StringExtras.h" #include "Config/unistd.h" #include +#include using namespace llvm; @@ -46,7 +47,7 @@ static std::string ErrnoMessage (int savedErrNum, std::string descr) { } BytecodeFileAnalyzer::BytecodeFileAnalyzer(const std::string &Filename, - BytecodeAnalysis& bca) { + BytecodeAnalysis& bca) { Buffer = (unsigned char*)ReadFileIntoAddressSpace(Filename, Length); if (Buffer == 0) throw "Error reading file '" + Filename + "'."; @@ -84,16 +85,16 @@ namespace { public: BytecodeBufferAnalyzer(const unsigned char *Buf, unsigned Length, - BytecodeAnalysis& bca, const std::string &ModuleID); + BytecodeAnalysis& bca, const std::string &ModuleID); ~BytecodeBufferAnalyzer(); }; } BytecodeBufferAnalyzer::BytecodeBufferAnalyzer(const unsigned char *Buf, - unsigned Length, - BytecodeAnalysis& bca, - const std::string &ModuleID) { + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID) { // If not aligned, allocate a new buffer to hold the bytecode... const unsigned char *ParseBegin = 0; if ((intptr_t)Buf & 3) { @@ -200,28 +201,118 @@ void llvm::AnalyzeBytecodeBuffer( /// This function prints the contents of rhe BytecodeAnalysis structure in /// a human legible form. /// @brief Print BytecodeAnalysis structure to an ostream +namespace { +inline static void print(std::ostream& Out, const char*title, + unsigned val, bool nl = true ) { + Out << std::setw(30) << std::right << title + << std::setw(0) << ": " + << std::setw(9) << val << "\n"; +} + +inline static void print(std::ostream&Out, const char*title, + double val ) { + Out << std::setw(30) << std::right << title + << std::setw(0) << ": " + << std::setw(9) << std::setprecision(6) << val << "\n" ; +} + +inline static void print(std::ostream&Out, const char*title, + double top, double bot ) { + Out << std::setw(30) << std::right << title + << std::setw(0) << ": " + << std::setw(9) << std::setprecision(6) << top + << " (" << std::left << std::setw(0) << std::setprecision(4) + << (top/bot)*100.0 << "%)\n"; +} +inline static void print(std::ostream&Out, const char*title, + std::string val, bool nl = true) { + Out << std::setw(30) << std::right << title + << std::setw(0) << ": " + << std::left << val << (nl ? "\n" : ""); +} + +} + void llvm::PrintBytecodeAnalysis(BytecodeAnalysis& bca, std::ostream& Out ) { - Out << " Bytecode Analysis of: " << bca.ModuleId << "\n"; - Out << " File Size: " << bca.byteSize << "\n"; - Out << " Number Of Types: " << bca.numTypes << "\n"; - Out << " Number Of Constants: " << bca.numConstants << "\n"; - Out << " Number Of Global Variables: " << bca.numGlobalVars << "\n"; - Out << " Number Of Functions: " << bca.numFunctions << "\n"; - Out << " Number Of Basic Blocks: " << bca.numBasicBlocks << "\n"; - Out << " Number Of Instructions: " << bca.numInstructions << "\n"; - Out << " Number Of Operands: " << bca.numOperands << "\n"; - Out << "Number Of Compaction Tables: " << bca.numCmpctnTables << "\n"; - Out << " Number Of Symbol Tables: " << bca.numSymTab << "\n"; - Out << " Maximum Type Slot Number: " << bca.maxTypeSlot << "\n"; - Out << " Maximum Value Slot Number: " << bca.maxValueSlot << "\n"; - Out << " Bytecode Density: " << bca.density << "\n"; + print(Out, "Bytecode Analysis Of Module", bca.ModuleId); + print(Out, "File Size", bca.byteSize); + print(Out, "Bytecode Compression Index",std::string("TBD")); + print(Out, "Number Of Bytecode Blocks", bca.numBlocks); + print(Out, "Number Of Types", bca.numTypes); + print(Out, "Number Of Values", bca.numValues); + print(Out, "Number Of Constants", bca.numConstants); + print(Out, "Number Of Global Variables", bca.numGlobalVars); + print(Out, "Number Of Functions", bca.numFunctions); + print(Out, "Number Of Basic Blocks", bca.numBasicBlocks); + print(Out, "Number Of Instructions", bca.numInstructions); + print(Out, "Number Of Operands", bca.numOperands); + print(Out, "Number Of Compaction Tables", bca.numCmpctnTables); + print(Out, "Number Of Symbol Tables", bca.numSymTab); + print(Out, "Maximum Type Slot Number", bca.maxTypeSlot); + print(Out, "Maximum Value Slot Number", bca.maxValueSlot); + print(Out, "Bytes Thrown To Alignment", double(bca.numAlignment), + double(bca.byteSize)); + print(Out, "File Density (bytes/def)", bca.fileDensity); + print(Out, "Globals Density (bytes/def)", bca.globalsDensity); + print(Out, "Function Density (bytes/func)", bca.functionDensity); + print(Out, "Number of VBR 32-bit Integers", bca.vbrCount32); + print(Out, "Number of VBR 64-bit Integers", bca.vbrCount64); + print(Out, "Number of VBR Compressed Bytes", bca.vbrCompBytes); + print(Out, "Number of VBR Expanded Bytes", bca.vbrExpdBytes); + print(Out, "VBR Savings", + double(bca.vbrExpdBytes)-double(bca.vbrCompBytes), + double(bca.byteSize)); - if ( bca.detailedResults ) - Out << "Detailed Results Not Implemented Yet.\n"; + if ( bca.detailedResults ) { + print(Out, "Module Bytes", + double(bca.BlockSizes[BytecodeFormat::Module]), + double(bca.byteSize)); + print(Out, "Function Bytes", + double(bca.BlockSizes[BytecodeFormat::Function]), + double(bca.byteSize)); + print(Out, "Constant Pool Bytes", + double(bca.BlockSizes[BytecodeFormat::ConstantPool]), + double(bca.byteSize)); + print(Out, "Symbol Table Bytes", + double(bca.BlockSizes[BytecodeFormat::SymbolTable]), + double(bca.byteSize)); + print(Out, "Module Global Info Bytes", + double(bca.BlockSizes[BytecodeFormat::ModuleGlobalInfo]), + double(bca.byteSize)); + print(Out, "Global Type Plane Bytes", + double(bca.BlockSizes[BytecodeFormat::GlobalTypePlane]), + double(bca.byteSize)); + print(Out, "Basic Block Bytes", + double(bca.BlockSizes[BytecodeFormat::BasicBlock]), + double(bca.byteSize)); + print(Out, "Instruction List Bytes", + double(bca.BlockSizes[BytecodeFormat::InstructionList]), + double(bca.byteSize)); + print(Out, "Compaction Table Bytes", + double(bca.BlockSizes[BytecodeFormat::CompactionTable]), + double(bca.byteSize)); + + std::map::iterator I = + bca.FunctionInfo.begin(); + std::map::iterator E = + bca.FunctionInfo.end(); + + while ( I != E ) { + Out << std::left << std::setw(0); + Out << "Function: " << I->second.name << " Slot=" << I->first << "\n"; + print(Out,"Type:", I->second.description); + print(Out,"Byte Size", I->second.byteSize); + print(Out,"Instructions", I->second.numInstructions); + print(Out,"Basic Blocks", I->second.numBasicBlocks); + print(Out,"Operand", I->second.numOperands); + print(Out,"Function Density", I->second.density); + print(Out,"VBR Effectiveness", I->second.vbrEffectiveness); + ++I; + } + } if ( bca.dumpBytecode ) Out << bca.BytecodeDump; } - // vim: sw=2 diff --git a/lib/Bytecode/Analyzer/BytecodeHandler.cpp b/lib/Bytecode/Analyzer/BytecodeHandler.cpp deleted file mode 100644 index 6ceaf381165..00000000000 --- a/lib/Bytecode/Analyzer/BytecodeHandler.cpp +++ /dev/null @@ -1,220 +0,0 @@ -//===-- BytecodeHandler.cpp - Parsing Handler -------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file was developed by Reid Spencer and is distributed under the -// University of Illinois Open Source License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This header file defines the BytecodeHandler class that gets called by the -// AbstractBytecodeParser when parsing events occur. -// -//===----------------------------------------------------------------------===// - -#include "Parser.h" - -using namespace llvm; - -bool BytecodeHandler::handleError(const std::string& str ) -{ - return false; -} - -void BytecodeHandler::handleStart() -{ -} - -void BytecodeHandler::handleFinish() -{ -} - -void BytecodeHandler::handleModuleBegin(const std::string& id) -{ -} - -void BytecodeHandler::handleModuleEnd(const std::string& id) -{ -} - -void BytecodeHandler::handleVersionInfo( - unsigned char RevisionNum, ///< Byte code revision number - Module::Endianness Endianness, ///< Endianness indicator - Module::PointerSize PointerSize ///< PointerSize indicator -) -{ -} - -void BytecodeHandler::handleModuleGlobalsBegin() -{ -} - -void BytecodeHandler::handleGlobalVariable( - const Type* ElemType, ///< The type of the global variable - bool isConstant, ///< Whether the GV is constant or not - GlobalValue::LinkageTypes ///< The linkage type of the GV -) -{ -} - -void BytecodeHandler::handleInitializedGV( - const Type* ElemType, ///< The type of the global variable - bool isConstant, ///< Whether the GV is constant or not - GlobalValue::LinkageTypes,///< The linkage type of the GV - unsigned initSlot ///< Slot number of GV's initializer -) -{ -} - -void BytecodeHandler::handleType( const Type* Ty ) -{ -} - -void BytecodeHandler::handleFunctionDeclaration( - const Type* FuncType ///< The type of the function -) -{ -} - -void BytecodeHandler::handleModuleGlobalsEnd() -{ -} - -void BytecodeHandler::handleCompactionTableBegin() -{ -} - -void BytecodeHandler::handleCompactionTablePlane( - unsigned Ty, - unsigned NumEntries -) -{ -} - -void BytecodeHandler::handleCompactionTableType( - unsigned i, - unsigned TypSlot, - const Type* -) -{ -} - -void BytecodeHandler::handleCompactionTableValue( - unsigned i, - unsigned ValSlot, - const Type* -) -{ -} - -void BytecodeHandler::handleCompactionTableEnd() -{ -} - -void BytecodeHandler::handleSymbolTableBegin() -{ -} - -void BytecodeHandler::handleSymbolTablePlane( - unsigned Ty, - unsigned NumEntries, - const Type* Typ -) -{ -} - -void BytecodeHandler::handleSymbolTableType( - unsigned i, - unsigned slot, - const std::string& name -) -{ -} - -void BytecodeHandler::handleSymbolTableValue( - unsigned i, - unsigned slot, - const std::string& name -) -{ -} - -void BytecodeHandler::handleSymbolTableEnd() -{ -} - -void BytecodeHandler::handleFunctionBegin( - const Type* FType, - GlobalValue::LinkageTypes linkage -) -{ -} - -void BytecodeHandler::handleFunctionEnd( - const Type* FType -) -{ -} - -void BytecodeHandler::handleBasicBlockBegin( - unsigned blocknum -) -{ -} - -bool BytecodeHandler::handleInstruction( - unsigned Opcode, - const Type* iType, - std::vector& Operands -) -{ - return false; -} - -void BytecodeHandler::handleBasicBlockEnd(unsigned blocknum) -{ -} - -void BytecodeHandler::handleGlobalConstantsBegin() -{ -} - -void BytecodeHandler::handleConstantExpression( - unsigned Opcode, - const Type* Typ, - std::vector > ArgVec - ) -{ -} - -void BytecodeHandler::handleConstantValue( Constant * c ) -{ -} - -void BytecodeHandler::handleConstantArray( - const ArrayType* AT, - std::vector& Elements ) -{ -} - -void BytecodeHandler::handleConstantStruct( - const StructType* ST, - std::vector& ElementSlots) -{ -} - -void BytecodeHandler::handleConstantPointer( - const PointerType* PT, unsigned Slot) -{ -} - -void BytecodeHandler::handleConstantString( const ConstantArray* CA ) -{ -} - - -void BytecodeHandler::handleGlobalConstantsEnd() -{ -} - -// vim: sw=2 diff --git a/lib/Bytecode/Analyzer/Dumper.cpp b/lib/Bytecode/Analyzer/Dumper.cpp index 12752ff883d..d61afe42558 100644 --- a/lib/Bytecode/Analyzer/Dumper.cpp +++ b/lib/Bytecode/Analyzer/Dumper.cpp @@ -212,7 +212,8 @@ public: virtual bool handleInstruction( unsigned Opcode, const Type* iType, - std::vector& Operands + std::vector& Operands, + unsigned Size ) { std::cout << " INST: OpCode=" diff --git a/lib/Bytecode/Analyzer/Parser.cpp b/lib/Bytecode/Analyzer/Parser.cpp index 743db6c3153..80800e75a65 100644 --- a/lib/Bytecode/Analyzer/Parser.cpp +++ b/lib/Bytecode/Analyzer/Parser.cpp @@ -1,4 +1,4 @@ -//===- Reader.cpp - Code to read bytecode files ---------------------------===// +//===- Parser.cpp - Code to parse bytecode files --------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This library implements the functionality defined in llvm/Bytecode/Reader.h +// This library implements the functionality defined in llvm/Bytecode/Parser.h // // Note that this library should be as fast as possible, reentrant, and // threadsafe!! @@ -17,7 +17,6 @@ //===----------------------------------------------------------------------===// #include "AnalyzerInternals.h" -#include "ReaderPrimitives.h" #include "llvm/Module.h" #include "llvm/Bytecode/Format.h" #include "Support/StringExtras.h" @@ -37,40 +36,128 @@ using namespace llvm; #define BCR_TRACE(n, X) #endif -#define PARSE_ERROR(inserters) \ - { \ +#define PARSE_ERROR(inserters) { \ std::ostringstream errormsg; \ errormsg << inserters; \ if ( ! handler->handleError( errormsg.str() ) ) \ throw std::string(errormsg.str()); \ } +inline bool AbstractBytecodeParser::moreInBlock() { + return At < BlockEnd; +} -inline void AbstractBytecodeParser::readBlock(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned &Type, unsigned &Size) -{ - Type = read(Buf, EndBuf); - Size = read(Buf, EndBuf); +inline void AbstractBytecodeParser::checkPastBlockEnd(const char * block_name) { + if ( At > BlockEnd ) + PARSE_ERROR("Attempt to read past the end of " << block_name << " block."); +} + +inline void AbstractBytecodeParser::align32() { + BufPtr Save = At; + At = (const unsigned char *)((unsigned long)(At+3) & (~3UL)); + if ( reportAlignment && At > Save ) handler->handleAlignment( At - Save ); + if (At > BlockEnd) + throw std::string("Ran out of data while aligning!"); +} + +inline unsigned AbstractBytecodeParser::read_uint() { + if (At+4 > BlockEnd) + throw std::string("Ran out of data reading uint!"); + At += 4; + return At[-4] | (At[-3] << 8) | (At[-2] << 16) | (At[-1] << 24); +} + +inline unsigned AbstractBytecodeParser::read_vbr_uint() { + unsigned Shift = 0; + unsigned Result = 0; + BufPtr Save = At; + + do { + if (At == BlockEnd) + throw std::string("Ran out of data reading vbr_uint!"); + Result |= (unsigned)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + if (reportVBR) + handler->handleVBR32(At-Save); + return Result; +} + +inline uint64_t AbstractBytecodeParser::read_vbr_uint64() { + unsigned Shift = 0; + uint64_t Result = 0; + BufPtr Save = At; + + do { + if (At == BlockEnd) + throw std::string("Ran out of data reading vbr_uint64!"); + Result |= (uint64_t)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + if (reportVBR) + handler->handleVBR64(At-Save); + return Result; +} + +inline int64_t AbstractBytecodeParser::read_vbr_int64() { + uint64_t R = read_vbr_uint64(); + if (R & 1) { + if (R != 1) + return -(int64_t)(R >> 1); + else // There is no such thing as -0 with integers. "-0" really means + // 0x8000000000000000. + return 1LL << 63; + } else + return (int64_t)(R >> 1); +} + +inline std::string AbstractBytecodeParser::read_str() { + unsigned Size = read_vbr_uint(); + const unsigned char *OldAt = At; + At += Size; + if (At > BlockEnd) // Size invalid? + throw std::string("Ran out of data reading a string!"); + return std::string((char*)OldAt, Size); +} + +inline void AbstractBytecodeParser::read_data(void *Ptr, void *End) { + unsigned char *Start = (unsigned char *)Ptr; + unsigned Amount = (unsigned char *)End - Start; + if (At+Amount > BlockEnd) + throw std::string("Ran out of data!"); + std::copy(At, At+Amount, Start); + At += Amount; +} + +inline void AbstractBytecodeParser::readBlock(unsigned &Type, unsigned &Size) { + Type = read_uint(); + Size = read_uint(); + BlockStart = At; + if ( At + Size > BlockEnd ) + throw std::string("Attempt to size a block past end of memory"); + BlockEnd = At + Size; + if ( reportBlocks ) { + handler->handleBlock( Type, BlockStart, Size ); + } } const Type *AbstractBytecodeParser::getType(unsigned ID) { - //cerr << "Looking up Type ID: " << ID << "\n"; +//cerr << "Looking up Type ID: " << ID << "\n"; - if (ID < Type::FirstDerivedTyID) - if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) - return T; // Asked for a primitive type... +if (ID < Type::FirstDerivedTyID) + if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) + return T; // Asked for a primitive type... - // Otherwise, derived types need offset... - ID -= Type::FirstDerivedTyID; +// Otherwise, derived types need offset... +ID -= Type::FirstDerivedTyID; - if (!CompactionTypeTable.empty()) { - if (ID >= CompactionTypeTable.size()) - PARSE_ERROR("Type ID out of range for compaction table!"); - return CompactionTypeTable[ID]; - } +if (!CompactionTypeTable.empty()) { + if (ID >= CompactionTypeTable.size()) + PARSE_ERROR("Type ID out of range for compaction table!"); + return CompactionTypeTable[ID]; +} - // Is it a module-level type? +// Is it a module-level type? if (ID < ModuleTypes.size()) return ModuleTypes[ID].get(); @@ -83,12 +170,12 @@ const Type *AbstractBytecodeParser::getType(unsigned ID) { return Type::VoidTy; } -bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf, - std::vector &Operands) { +bool AbstractBytecodeParser::ParseInstruction(std::vector &Operands) { + BufPtr SaveAt = At; Operands.clear(); unsigned iType = 0; unsigned Opcode = 0; - unsigned Op = read(Buf, EndBuf); + unsigned Op = read_uint(); // bits Instruction format: Common to all formats // -------------------------- @@ -134,61 +221,56 @@ bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf, Operands[2] = (Op >> 26) & 63; break; case 0: - Buf -= 4; // Hrm, try this again... - Opcode = read_vbr_uint(Buf, EndBuf); + At -= 4; // Hrm, try this again... + Opcode = read_vbr_uint(); Opcode >>= 2; - iType = read_vbr_uint(Buf, EndBuf); + iType = read_vbr_uint(); - unsigned NumOperands = read_vbr_uint(Buf, EndBuf); + unsigned NumOperands = read_vbr_uint(); Operands.resize(NumOperands); if (NumOperands == 0) PARSE_ERROR("Zero-argument instruction found; this is invalid."); for (unsigned i = 0; i != NumOperands; ++i) - Operands[i] = read_vbr_uint(Buf, EndBuf); - align32(Buf, EndBuf); + Operands[i] = read_vbr_uint(); + align32(); break; } - return handler->handleInstruction(Opcode, getType(iType), Operands); + return handler->handleInstruction(Opcode, getType(iType), Operands, At-SaveAt); } /// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one /// basicblock at a time. This method reads in one of the basicblock packets. -void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf, - BufPtr EndBuf, - unsigned BlockNo) { +void AbstractBytecodeParser::ParseBasicBlock( unsigned BlockNo) { handler->handleBasicBlockBegin( BlockNo ); std::vector Args; bool is_terminating = false; - while (Buf < EndBuf) - is_terminating = ParseInstruction(Buf, EndBuf, Args); + while ( moreInBlock() ) + is_terminating = ParseInstruction(Args); if ( ! is_terminating ) - PARSE_ERROR( - "Failed to recognize instruction as terminating at end of block"); + PARSE_ERROR("Non-terminated basic block found!"); handler->handleBasicBlockEnd( BlockNo ); } - /// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the /// body of a function. In post 1.0 bytecode files, we no longer emit basic /// block individually, in order to avoid per-basic-block overhead. -unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, - BufPtr EndBuf) { +unsigned AbstractBytecodeParser::ParseInstructionList() { unsigned BlockNo = 0; std::vector Args; - while (Buf < EndBuf) { + while ( moreInBlock() ) { handler->handleBasicBlockBegin( BlockNo ); // Read instructions into this basic block until we get to a terminator bool is_terminating = false; - while (Buf < EndBuf && !is_terminating ) - is_terminating = ParseInstruction(Buf, EndBuf, Args ) ; + while (moreInBlock() && !is_terminating ) + is_terminating = ParseInstruction(Args ) ; if (!is_terminating) PARSE_ERROR( "Non-terminated basic block found!"); @@ -199,36 +281,34 @@ unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, return BlockNo; } -void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) { +void AbstractBytecodeParser::ParseSymbolTable() { handler->handleSymbolTableBegin(); - while (Buf < EndBuf) { + while ( moreInBlock() ) { // Symtab block header: [num entries][type id number] - unsigned NumEntries = read_vbr_uint(Buf, EndBuf); - unsigned Typ = read_vbr_uint(Buf, EndBuf); + unsigned NumEntries = read_vbr_uint(); + unsigned Typ = read_vbr_uint(); const Type *Ty = getType(Typ); handler->handleSymbolTablePlane( Typ, NumEntries, Ty ); for (unsigned i = 0; i != NumEntries; ++i) { // Symtab entry: [def slot #][name] - unsigned slot = read_vbr_uint(Buf, EndBuf); - std::string Name = read_str(Buf, EndBuf); + unsigned slot = read_vbr_uint(); + std::string Name = read_str(); if (Typ == Type::TypeTyID) handler->handleSymbolTableType( i, slot, Name ); else - handler->handleSymbolTableValue( i, slot, Name ); + handler->handleSymbolTableValue( i, slot, Name ); } } - - if (Buf > EndBuf) - PARSE_ERROR("Tried to read past end of buffer while reading symbol table."); + checkPastBlockEnd("Symbol Table"); handler->handleSymbolTableEnd(); } -void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) { +void AbstractBytecodeParser::ParseFunctionLazily() { if (FunctionSignatureList.empty()) throw std::string("FunctionSignatureList empty!"); @@ -236,9 +316,10 @@ void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) { FunctionSignatureList.pop_back(); // Save the information for future reading of the function - LazyFunctionLoadMap[FType] = LazyFunctionInfo(Buf, EndBuf); + LazyFunctionLoadMap[FType] = LazyFunctionInfo(BlockStart, BlockEnd); + // Pretend we've `parsed' this function - Buf = EndBuf; + At = BlockEnd; } void AbstractBytecodeParser::ParseNextFunction(Type* FType) { @@ -251,21 +332,20 @@ void AbstractBytecodeParser::ParseNextFunction(Type* FType) { return; } - BufPtr Buf = Fi->second.Buf; - BufPtr EndBuf = Fi->second.EndBuf; + BlockStart = At = Fi->second.Buf; + BlockEnd = Fi->second.Buf; assert(Fi->first == FType); LazyFunctionLoadMap.erase(Fi); - this->ParseFunctionBody( FType, Buf, EndBuf ); + this->ParseFunctionBody( FType ); } -void AbstractBytecodeParser::ParseFunctionBody(const Type* FType, - BufPtr &Buf, BufPtr EndBuf ) { +void AbstractBytecodeParser::ParseFunctionBody(const Type* FType ) { GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage; - unsigned LinkageType = read_vbr_uint(Buf, EndBuf); + unsigned LinkageType = read_vbr_uint(); switch (LinkageType) { case 0: Linkage = GlobalValue::ExternalLinkage; break; case 1: Linkage = GlobalValue::WeakLinkage; break; @@ -284,43 +364,45 @@ void AbstractBytecodeParser::ParseFunctionBody(const Type* FType, unsigned BlockNum = 0; bool InsertedArguments = false; - while (Buf < EndBuf) { + BufPtr MyEnd = BlockEnd; + while ( At < MyEnd ) { unsigned Type, Size; - BufPtr OldBuf = Buf; - readBlock(Buf, EndBuf, Type, Size); + BufPtr OldAt = At; + readBlock(Type, Size); switch (Type) { case BytecodeFormat::ConstantPool: - ParseConstantPool(Buf, Buf+Size, FunctionTypes ); + ParseConstantPool(FunctionTypes ); break; case BytecodeFormat::CompactionTable: - ParseCompactionTable(Buf, Buf+Size); + ParseCompactionTable(); break; case BytecodeFormat::BasicBlock: - ParseBasicBlock(Buf, Buf+Size, BlockNum++); + ParseBasicBlock(BlockNum++); break; case BytecodeFormat::InstructionList: if (BlockNum) - PARSE_ERROR("InstructionList must come before basic blocks!"); - BlockNum = ParseInstructionList(Buf, Buf+Size); + PARSE_ERROR("InstructionList must come before basic blocks!"); + BlockNum = ParseInstructionList(); break; case BytecodeFormat::SymbolTable: - ParseSymbolTable(Buf, Buf+Size ); + ParseSymbolTable(); break; default: - Buf += Size; - if (OldBuf > Buf) - PARSE_ERROR("Wrapped around reading bytecode"); + At += Size; + if (OldAt > At) + PARSE_ERROR("Wrapped around reading bytecode"); break; } + BlockEnd = MyEnd; // Malformed bc file if read past end of block. - align32(Buf, EndBuf); + align32(); } handler->handleFunctionEnd(FType); @@ -336,21 +418,24 @@ void AbstractBytecodeParser::ParseAllFunctionBodies() { while ( Fi != Fe ) { const Type* FType = Fi->first; - this->ParseFunctionBody(FType, Fi->second.Buf, Fi->second.EndBuf); + BlockStart = At = Fi->second.Buf; + BlockEnd = Fi->second.EndBuf; + this->ParseFunctionBody(FType); + ++Fi; } } -void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) { +void AbstractBytecodeParser::ParseCompactionTable() { handler->handleCompactionTableBegin(); - while (Buf != End) { - unsigned NumEntries = read_vbr_uint(Buf, End); + while ( moreInBlock() ) { + unsigned NumEntries = read_vbr_uint(); unsigned Ty; if ((NumEntries & 3) == 3) { NumEntries >>= 2; - Ty = read_vbr_uint(Buf, End); + Ty = read_vbr_uint(); } else { Ty = NumEntries >> 2; NumEntries &= 3; @@ -360,25 +445,24 @@ void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) { if (Ty == Type::TypeTyID) { for (unsigned i = 0; i != NumEntries; ++i) { - unsigned TypeSlot = read_vbr_uint(Buf,End); + unsigned TypeSlot = read_vbr_uint(); const Type *Typ = getGlobalTableType(TypeSlot); - handler->handleCompactionTableType( i, TypeSlot, Typ ); + handler->handleCompactionTableType( i, TypeSlot, Typ ); } } else { const Type *Typ = getType(Ty); // Push the implicit zero for (unsigned i = 0; i != NumEntries; ++i) { - unsigned ValSlot = read_vbr_uint(Buf, End); - handler->handleCompactionTableValue( i, ValSlot, Typ ); + unsigned ValSlot = read_vbr_uint(); + handler->handleCompactionTableValue( i, ValSlot, Typ ); } } } handler->handleCompactionTableEnd(); } -const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, - const unsigned char *EndBuf) { - unsigned PrimType = read_vbr_uint(Buf, EndBuf); +const Type *AbstractBytecodeParser::ParseTypeConstant() { + unsigned PrimType = read_vbr_uint(); const Type *Val = 0; if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType))) @@ -386,13 +470,13 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, switch (PrimType) { case Type::FunctionTyID: { - const Type *RetType = getType(read_vbr_uint(Buf, EndBuf)); + const Type *RetType = getType(read_vbr_uint()); - unsigned NumParams = read_vbr_uint(Buf, EndBuf); + unsigned NumParams = read_vbr_uint(); std::vector Params; while (NumParams--) - Params.push_back(getType(read_vbr_uint(Buf, EndBuf))); + Params.push_back(getType(read_vbr_uint())); bool isVarArg = Params.size() && Params.back() == Type::VoidTy; if (isVarArg) Params.pop_back(); @@ -402,10 +486,10 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, return result; } case Type::ArrayTyID: { - unsigned ElTyp = read_vbr_uint(Buf, EndBuf); + unsigned ElTyp = read_vbr_uint(); const Type *ElementType = getType(ElTyp); - unsigned NumElements = read_vbr_uint(Buf, EndBuf); + unsigned NumElements = read_vbr_uint(); BCR_TRACE(5, "Array Type Constant #" << ElTyp << " size=" << NumElements << "\n"); @@ -415,10 +499,10 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, } case Type::StructTyID: { std::vector Elements; - unsigned Typ = read_vbr_uint(Buf, EndBuf); + unsigned Typ = read_vbr_uint(); while (Typ) { // List is terminated by void/0 typeid Elements.push_back(getType(Typ)); - Typ = read_vbr_uint(Buf, EndBuf); + Typ = read_vbr_uint(); } Type* result = StructType::get(Elements); @@ -426,7 +510,7 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, return result; } case Type::PointerTyID: { - unsigned ElTyp = read_vbr_uint(Buf, EndBuf); + unsigned ElTyp = read_vbr_uint(); BCR_TRACE(5, "Pointer Type Constant #" << ElTyp << "\n"); Type* result = PointerType::get(getType(ElTyp)); handler->handleType( result ); @@ -455,10 +539,9 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, // something and when we reread the type later, we can replace the opaque type // with a new resolved concrete type. // -void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf, - const unsigned char *EndBuf, - TypeListTy &Tab, - unsigned NumEntries) { +void AbstractBytecodeParser::ParseTypeConstants( + TypeListTy &Tab, unsigned NumEntries +) { assert(Tab.size() == 0 && "should not have read type constants in before!"); // Insert a bunch of opaque types to be resolved later... @@ -470,7 +553,7 @@ void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf, // opaque types just inserted. // for (unsigned i = 0; i != NumEntries; ++i) { - const Type *NewTy = ParseTypeConstant(Buf, EndBuf), *OldTy = Tab[i].get(); + const Type *NewTy = ParseTypeConstant(), *OldTy = Tab[i].get(); if (NewTy == 0) throw std::string("Couldn't parse type!"); BCR_TRACE(4, "#" << i << ": Read Type Constant: '" << NewTy << "' Replacing: " << OldTy << "\n"); @@ -497,18 +580,16 @@ void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf, } -void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned TypeID) { +void AbstractBytecodeParser::ParseConstantValue(unsigned TypeID) { // We must check for a ConstantExpr before switching by type because // a ConstantExpr can be of any type, and has no explicit value. // // 0 if not expr; numArgs if is expr - unsigned isExprNumArgs = read_vbr_uint(Buf, EndBuf); + unsigned isExprNumArgs = read_vbr_uint(); if (isExprNumArgs) { - unsigned Opcode = read_vbr_uint(Buf, EndBuf); + unsigned Opcode = read_vbr_uint(); const Type* Typ = getType(TypeID); // FIXME: Encoding of constant exprs could be much more compact! @@ -517,8 +598,8 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, // Read the slot number and types of each of the arguments for (unsigned i = 0; i != isExprNumArgs; ++i) { - unsigned ArgValSlot = read_vbr_uint(Buf, EndBuf); - unsigned ArgTypeSlot = read_vbr_uint(Buf, EndBuf); + unsigned ArgValSlot = read_vbr_uint(); + unsigned ArgTypeSlot = read_vbr_uint(); BCR_TRACE(4, "CE Arg " << i << ": Type: '" << *getType(ArgTypeSlot) << "' slot: " << ArgValSlot << "\n"); @@ -534,7 +615,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, const Type *Ty = getType(TypeID); switch (Ty->getPrimitiveID()) { case Type::BoolTyID: { - unsigned Val = read_vbr_uint(Buf, EndBuf); + unsigned Val = read_vbr_uint(); if (Val != 0 && Val != 1) PARSE_ERROR("Invalid boolean value read."); @@ -545,7 +626,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, case Type::UByteTyID: // Unsigned integer types... case Type::UShortTyID: case Type::UIntTyID: { - unsigned Val = read_vbr_uint(Buf, EndBuf); + unsigned Val = read_vbr_uint(); if (!ConstantUInt::isValueValidForType(Ty, Val)) throw std::string("Invalid unsigned byte/short/int read."); handler->handleConstantValue( ConstantUInt::get(Ty, Val) ); @@ -553,7 +634,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, } case Type::ULongTyID: { - handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64(Buf, EndBuf)) ); + handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64()) ); break; } @@ -561,7 +642,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, case Type::ShortTyID: case Type::IntTyID: { case Type::LongTyID: - int64_t Val = read_vbr_int64(Buf, EndBuf); + int64_t Val = read_vbr_int64(); if (!ConstantSInt::isValueValidForType(Ty, Val)) throw std::string("Invalid signed byte/short/int/long read."); handler->handleConstantValue( ConstantSInt::get(Ty, Val) ); @@ -570,14 +651,14 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, case Type::FloatTyID: { float F; - input_data(Buf, EndBuf, &F, &F+1); + read_data(&F, &F+1); handler->handleConstantValue( ConstantFP::get(Ty, F) ); break; } case Type::DoubleTyID: { double Val; - input_data(Buf, EndBuf, &Val, &Val+1); + read_data(&Val, &Val+1); handler->handleConstantValue( ConstantFP::get(Ty, Val) ); break; } @@ -592,7 +673,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, std::vector Elements; Elements.reserve(NumElements); while (NumElements--) // Read all of the elements of the constant. - Elements.push_back(read_vbr_uint(Buf, EndBuf)); + Elements.push_back(read_vbr_uint()); handler->handleConstantArray( AT, Elements ); break; @@ -603,15 +684,16 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, std::vector Elements; Elements.reserve(ST->getNumElements()); for (unsigned i = 0; i != ST->getNumElements(); ++i) - Elements.push_back(read_vbr_uint(Buf, EndBuf)); - + Elements.push_back(read_vbr_uint()); handler->handleConstantStruct( ST, Elements ); + break; } case Type::PointerTyID: { // ConstantPointerRef value... const PointerType *PT = cast(Ty); - unsigned Slot = read_vbr_uint(Buf, EndBuf); + unsigned Slot = read_vbr_uint(); handler->handleConstantPointer( PT, Slot ); + break; } default: @@ -620,16 +702,13 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, } } -void AbstractBytecodeParser::ParseGlobalTypes(const unsigned char *&Buf, - const unsigned char *EndBuf) { - ParseConstantPool(Buf, EndBuf, ModuleTypes); +void AbstractBytecodeParser::ParseGlobalTypes() { + ParseConstantPool(ModuleTypes); } -void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned NumEntries ){ +void AbstractBytecodeParser::ParseStringConstants(unsigned NumEntries ){ for (; NumEntries; --NumEntries) { - unsigned Typ = read_vbr_uint(Buf, EndBuf); + unsigned Typ = read_vbr_uint(); const Type *Ty = getType(Typ); if (!isa(Ty)) throw std::string("String constant data invalid!"); @@ -641,7 +720,7 @@ void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf, // Read character data. The type tells us how long the string is. char Data[ATy->getNumElements()]; - input_data(Buf, EndBuf, Data, Data+ATy->getNumElements()); + read_data(Data, Data+ATy->getNumElements()); std::vector Elements(ATy->getNumElements()); if (ATy->getElementType() == Type::SByteTy) @@ -658,35 +737,33 @@ void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf, } -void AbstractBytecodeParser::ParseConstantPool(const unsigned char *&Buf, - const unsigned char *EndBuf, - TypeListTy &TypeTab) { - while (Buf < EndBuf) { - unsigned NumEntries = read_vbr_uint(Buf, EndBuf); - unsigned Typ = read_vbr_uint(Buf, EndBuf); +void AbstractBytecodeParser::ParseConstantPool( TypeListTy &TypeTab) { + while ( moreInBlock() ) { + unsigned NumEntries = read_vbr_uint(); + unsigned Typ = read_vbr_uint(); if (Typ == Type::TypeTyID) { - ParseTypeConstants(Buf, EndBuf, TypeTab, NumEntries); + ParseTypeConstants(TypeTab, NumEntries); } else if (Typ == Type::VoidTyID) { - ParseStringConstants(Buf, EndBuf, NumEntries); + ParseStringConstants(NumEntries); } else { BCR_TRACE(3, "Type: '" << *getType(Typ) << "' NumEntries: " << NumEntries << "\n"); for (unsigned i = 0; i < NumEntries; ++i) { - ParseConstantValue(Buf, EndBuf, Typ); + ParseConstantValue(Typ); } } } - if (Buf > EndBuf) PARSE_ERROR("Read past end of buffer."); + checkPastBlockEnd("Constant Pool"); } -void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) { +void AbstractBytecodeParser::ParseModuleGlobalInfo() { handler->handleModuleGlobalsBegin(); // Read global variables... - unsigned VarType = read_vbr_uint(Buf, End); + unsigned VarType = read_vbr_uint(); while (VarType != Type::VoidTyID) { // List is terminated by Void // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 = // Linkage, bit4+ = slot# @@ -721,17 +798,17 @@ void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) { // Create the global variable... if (hasInitializer) { - unsigned initSlot = read_vbr_uint(Buf,End); + unsigned initSlot = read_vbr_uint(); handler->handleInitializedGV( ElTy, isConstant, Linkage, initSlot ); } else handler->handleGlobalVariable( ElTy, isConstant, Linkage ); // Get next item - VarType = read_vbr_uint(Buf, End); + VarType = read_vbr_uint(); } // Read the function objects for all of the functions that are coming - unsigned FnSignature = read_vbr_uint(Buf, End); + unsigned FnSignature = read_vbr_uint(); while (FnSignature != Type::VoidTyID) { // List is terminated by Void const Type *Ty = getType(FnSignature); if (!isa(Ty) || @@ -750,22 +827,26 @@ void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) { handler->handleFunctionDeclaration(Ty); // Get Next function signature - FnSignature = read_vbr_uint(Buf, End); + FnSignature = read_vbr_uint(); } if (hasInconsistentModuleGlobalInfo) - align32(Buf, End); + align32(); + + // Now that the function signature list is set up, reverse it so that we can + // remove elements efficiently from the back of the vector. + std::reverse(FunctionSignatureList.begin(), FunctionSignatureList.end()); // This is for future proofing... in the future extra fields may be added that // we don't understand, so we transparently ignore them. // - Buf = End; + At = BlockEnd; handler->handleModuleGlobalsEnd(); } -void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) { - unsigned Version = read_vbr_uint(Buf, EndBuf); +void AbstractBytecodeParser::ParseVersionInfo() { + unsigned Version = read_vbr_uint(); // Unpack version number: low four bits are for flags, top bits = version Module::Endianness Endianness; @@ -814,85 +895,164 @@ void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) { handler->handleVersionInfo(RevisionNum, Endianness, PointerSize ); } -void AbstractBytecodeParser::ParseModule(BufPtr &Buf, BufPtr EndBuf ) { +void AbstractBytecodeParser::ParseModule() { unsigned Type, Size; - readBlock(Buf, EndBuf, Type, Size); - if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) - // Hrm, not a class? - PARSE_ERROR("Expected Module block! B: " << unsigned(intptr_t(Buf)) << - ", S: " << Size << " E: " << unsigned(intptr_t(EndBuf))); + + FunctionSignatureList.clear(); // Just in case... // Read into instance variables... - ParseVersionInfo(Buf, EndBuf); - align32(Buf, EndBuf); + ParseVersionInfo(); + align32(); /// FIXME: Is this redundant? VI is first and 4 bytes! bool SeenModuleGlobalInfo = false; bool SeenGlobalTypePlane = false; - while (Buf < EndBuf) { - BufPtr OldBuf = Buf; - readBlock(Buf, EndBuf, Type, Size); + BufPtr MyEnd = BlockEnd; + while (At < MyEnd) { + BufPtr OldAt = At; + readBlock(Type, Size); switch (Type) { case BytecodeFormat::GlobalTypePlane: if ( SeenGlobalTypePlane ) - PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!"); + PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!"); - ParseGlobalTypes(Buf, Buf+Size); + ParseGlobalTypes(); SeenGlobalTypePlane = true; break; case BytecodeFormat::ModuleGlobalInfo: if ( SeenModuleGlobalInfo ) - PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!"); - ParseModuleGlobalInfo(Buf, Buf+Size); + PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!"); + ParseModuleGlobalInfo(); SeenModuleGlobalInfo = true; break; case BytecodeFormat::ConstantPool: - ParseConstantPool(Buf, Buf+Size, ModuleTypes); + ParseConstantPool(ModuleTypes); break; case BytecodeFormat::Function: - ParseFunctionLazily(Buf, Buf+Size); + ParseFunctionLazily(); break; case BytecodeFormat::SymbolTable: - ParseSymbolTable(Buf, Buf+Size ); + ParseSymbolTable(); break; default: - Buf += Size; - if (OldBuf > Buf) - { - PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" ); + At += Size; + if (OldAt > At) { + PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" ); } break; } - align32(Buf, EndBuf); + BlockEnd = MyEnd; + align32(); } + + /// Make sure we pulled them all out. If we didn't then there's a declaration + /// but a missing body. That's not allowed. + if (!FunctionSignatureList.empty()) + throw std::string( + "Function declared, but bytecode stream ended before definition"); } void AbstractBytecodeParser::ParseBytecode( - BufPtr Buf, unsigned Length, + BufPtr b, unsigned Length, const std::string &ModuleID) { + At = MemStart = BlockStart = b; + MemEnd = BlockEnd = b + Length; handler->handleStart(); - unsigned char *EndBuf = (unsigned char*)(Buf + Length); // Read and check signature... - unsigned Sig = read(Buf, EndBuf); + unsigned Sig = read_uint(); if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) { PARSE_ERROR("Invalid bytecode signature: " << Sig); } handler->handleModuleBegin(ModuleID); - this->ParseModule(Buf, EndBuf); + unsigned Type, Size; + readBlock(Type, Size); + if ( Type != BytecodeFormat::Module ) { + PARSE_ERROR("Expected Module Block! At: " << unsigned(intptr_t(At)) + << ", Type:" << Type << ", Size:" << Size); + } + if ( At + Size != MemEnd ) { + PARSE_ERROR("Invalid Top Level Block Length! At: " + << unsigned(intptr_t(At)) << ", Type:" << Type << ", Size:" << Size); + } + this->ParseModule(); handler->handleModuleEnd(ModuleID); handler->handleFinish(); } +//===----------------------------------------------------------------------===// +//=== Default Implementations of Handler Methods +//===----------------------------------------------------------------------===// + +bool BytecodeHandler::handleError(const std::string& str ) { return false; } +void BytecodeHandler::handleStart() { } +void BytecodeHandler::handleFinish() { } +void BytecodeHandler::handleModuleBegin(const std::string& id) { } +void BytecodeHandler::handleModuleEnd(const std::string& id) { } +void BytecodeHandler::handleVersionInfo( unsigned char RevisionNum, + Module::Endianness Endianness, Module::PointerSize PointerSize) { } +void BytecodeHandler::handleModuleGlobalsBegin() { } +void BytecodeHandler::handleGlobalVariable( + const Type* ElemType, bool isConstant, GlobalValue::LinkageTypes ) { } +void BytecodeHandler::handleInitializedGV( + const Type* ElemType, bool isConstant, GlobalValue::LinkageTypes, + unsigned initSlot) {} +void BytecodeHandler::handleType( const Type* Ty ) {} +void BytecodeHandler::handleFunctionDeclaration( + const Type* FuncType) {} +void BytecodeHandler::handleModuleGlobalsEnd() { } +void BytecodeHandler::handleCompactionTableBegin() { } +void BytecodeHandler::handleCompactionTablePlane( unsigned Ty, + unsigned NumEntries) {} +void BytecodeHandler::handleCompactionTableType( unsigned i, unsigned TypSlot, + const Type* ) {} +void BytecodeHandler::handleCompactionTableValue( unsigned i, unsigned ValSlot, + const Type* ) {} +void BytecodeHandler::handleCompactionTableEnd() { } +void BytecodeHandler::handleSymbolTableBegin() { } +void BytecodeHandler::handleSymbolTablePlane( unsigned Ty, unsigned NumEntries, + const Type* Typ) { } +void BytecodeHandler::handleSymbolTableType( unsigned i, unsigned slot, + const std::string& name ) { } +void BytecodeHandler::handleSymbolTableValue( unsigned i, unsigned slot, + const std::string& name ) { } +void BytecodeHandler::handleSymbolTableEnd() { } +void BytecodeHandler::handleFunctionBegin( const Type* FType, + GlobalValue::LinkageTypes linkage ) { } +void BytecodeHandler::handleFunctionEnd( const Type* FType) { } +void BytecodeHandler::handleBasicBlockBegin( unsigned blocknum) { } +bool BytecodeHandler::handleInstruction( unsigned Opcode, const Type* iType, + std::vector& Operands, unsigned Size) { + return Instruction::isTerminator(Opcode); + } +void BytecodeHandler::handleBasicBlockEnd(unsigned blocknum) { } +void BytecodeHandler::handleGlobalConstantsBegin() { } +void BytecodeHandler::handleConstantExpression( unsigned Opcode, + const Type* Typ, std::vector > ArgVec ) { } +void BytecodeHandler::handleConstantValue( Constant * c ) { } +void BytecodeHandler::handleConstantArray( const ArrayType* AT, + std::vector& Elements ) { } +void BytecodeHandler::handleConstantStruct( const StructType* ST, + std::vector& ElementSlots) { } +void BytecodeHandler::handleConstantPointer( + const PointerType* PT, unsigned Slot) { } +void BytecodeHandler::handleConstantString( const ConstantArray* CA ) {} +void BytecodeHandler::handleGlobalConstantsEnd() {} +void BytecodeHandler::handleAlignment(unsigned numBytes) {} +void BytecodeHandler::handleBlock( + unsigned BType, const unsigned char* StartPtr, unsigned Size) {} +void BytecodeHandler::handleVBR32(unsigned Size ) {} +void BytecodeHandler::handleVBR64(unsigned Size ) {} + // vim: sw=2 diff --git a/lib/Bytecode/Analyzer/Parser.h b/lib/Bytecode/Analyzer/Parser.h index 38a14717d5e..08758cb5f29 100644 --- a/lib/Bytecode/Analyzer/Parser.h +++ b/lib/Bytecode/Analyzer/Parser.h @@ -41,7 +41,18 @@ class AbstractBytecodeParser { /// @name Constructors /// @{ public: - AbstractBytecodeParser( BytecodeHandler* h ) { handler = h; } + AbstractBytecodeParser( + BytecodeHandler* h, + bool repAlignment = false, + bool repBlocks = false, + bool repVBR = false + ) { + handler = h; + reportAlignment = repAlignment; + reportBlocks = repBlocks; + reportVBR = repVBR; + } + ~AbstractBytecodeParser() { } /// @} @@ -86,64 +97,72 @@ public: /// @{ protected: /// @brief Parse whole module scope - void ParseModule (BufPtr &Buf, BufPtr End); + void ParseModule (); /// @brief Parse the version information block - void ParseVersionInfo (BufPtr &Buf, BufPtr End); + void ParseVersionInfo (); /// @brief Parse the ModuleGlobalInfo block - void ParseModuleGlobalInfo (BufPtr &Buf, BufPtr End); + void ParseModuleGlobalInfo (); /// @brief Parse a symbol table - void ParseSymbolTable (BufPtr &Buf, BufPtr End); + void ParseSymbolTable (); /// This function parses LLVM functions lazily. It obtains the type of the /// function and records where the body of the function is in the bytecode /// buffer. The caller can then use the ParseNextFunction and /// ParseAllFunctionBodies to get handler events for the functions. /// @brief Parse functions lazily. - void ParseFunctionLazily (BufPtr &Buf, BufPtr End); + void ParseFunctionLazily (); /// @brief Parse a function body - void ParseFunctionBody (const Type* FType, BufPtr &Buf, BufPtr EndBuf); + void ParseFunctionBody (const Type* FType); /// @brief Parse a compaction table - void ParseCompactionTable (BufPtr &Buf, BufPtr End); + void ParseCompactionTable (); /// @brief Parse global types - void ParseGlobalTypes (BufPtr &Buf, BufPtr End); + void ParseGlobalTypes (); /// @brief Parse a basic block (for LLVM 1.0 basic block blocks) - void ParseBasicBlock (BufPtr &Buf, BufPtr End, unsigned BlockNo); + void ParseBasicBlock (unsigned BlockNo); /// @brief parse an instruction list (for post LLVM 1.0 instruction lists /// with blocks differentiated by terminating instructions. - unsigned ParseInstructionList(BufPtr &Buf, BufPtr End); + unsigned ParseInstructionList(); /// @brief Parse an instruction. - bool ParseInstruction (BufPtr &Buf, BufPtr End, - std::vector& Args); + bool ParseInstruction (std::vector& Args); /// @brief Parse a constant pool - void ParseConstantPool (BufPtr &Buf, BufPtr End, TypeListTy& List); + void ParseConstantPool (TypeListTy& List); /// @brief Parse a constant value - void ParseConstantValue (BufPtr &Buf, BufPtr End, unsigned TypeID); + void ParseConstantValue (unsigned TypeID); /// @brief Parse a block of types. - void ParseTypeConstants (BufPtr &Buf, BufPtr End, TypeListTy &Tab, - unsigned NumEntries); + void ParseTypeConstants (TypeListTy &Tab, unsigned NumEntries); /// @brief Parse a single type. - const Type *ParseTypeConstant(BufPtr &Buf, BufPtr End); + const Type *ParseTypeConstant(); /// @brief Parse a string constants block - void ParseStringConstants (BufPtr &Buf, BufPtr End, unsigned NumEntries); + void ParseStringConstants (unsigned NumEntries); /// @} /// @name Data /// @{ private: + BufPtr MemStart; ///< Start of the memory buffer + BufPtr MemEnd; ///< End of the memory buffer + BufPtr BlockStart; ///< Start of current block being parsed + BufPtr BlockEnd; ///< End of current block being parsed + BufPtr At; ///< Where we're currently parsing at + + bool reportAlignment; ///< Parser should report alignment? + bool reportBlocks; ///< Parser should report blocks? + bool reportVBR; ///< Report VBR compression events + // Information about the module, extracted from the bytecode revision number. unsigned char RevisionNum; // The rev # itself @@ -219,9 +238,25 @@ private: private: - static inline void readBlock(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned &Type, unsigned &Size) ; + /// Is there more to parse in the current block? + inline bool moreInBlock(); + + /// Have we read past the end of the block + inline void checkPastBlockEnd(const char * block_name); + + /// Align to 32 bits + inline void align32(); + + /// Reader interface + inline unsigned read_uint(); + inline unsigned read_vbr_uint(); + inline uint64_t read_vbr_uint64(); + inline int64_t read_vbr_int64(); + inline std::string read_str(); + inline void read_data(void *Ptr, void *End); + + /// Read a block header + inline void readBlock(unsigned &Type, unsigned &Size); const Type *AbstractBytecodeParser::getType(unsigned ID); /// getGlobalTableType - This is just like getType, but when a compaction @@ -443,7 +478,8 @@ public: virtual bool handleInstruction( unsigned Opcode, const Type* iType, - std::vector& Operands + std::vector& Operands, + unsigned Length ); /// @brief Handle the end of a basic block @@ -488,6 +524,16 @@ public: /// @brief Handle the end of the global constants virtual void handleGlobalConstantsEnd(); + /// @brief Handle an alignment event + virtual void handleAlignment(unsigned numBytes); + + virtual void handleBlock( + unsigned BType, ///< The type of block + const unsigned char* StartPtr, ///< The start of the block + unsigned Size ///< The size of the block + ); + virtual void handleVBR32(unsigned Size ); + virtual void handleVBR64(unsigned Size ); /// @} }; diff --git a/lib/Bytecode/Analyzer/ReaderPrimitives.h b/lib/Bytecode/Analyzer/ReaderPrimitives.h deleted file mode 100644 index 496ab2a5b13..00000000000 --- a/lib/Bytecode/Analyzer/ReaderPrimitives.h +++ /dev/null @@ -1,101 +0,0 @@ -//===-- ReaderPrimitives.h - Bytecode file format reading prims -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file was developed by the LLVM research group and is distributed under -// the University of Illinois Open Source License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This header defines some basic functions for reading basic primitive types -// from a bytecode stream. -// -//===----------------------------------------------------------------------===// - -#ifndef READERPRIMITIVES_H -#define READERPRIMITIVES_H - -#include "Support/DataTypes.h" -#include - -namespace llvm { - - static inline unsigned read(const unsigned char *&Buf, - const unsigned char *EndBuf) { - if (Buf+4 > EndBuf) throw std::string("Ran out of data!"); - Buf += 4; - return Buf[-4] | (Buf[-3] << 8) | (Buf[-2] << 16) | (Buf[-1] << 24); - } - - - // read_vbr - Read an unsigned integer encoded in variable bitrate format. - // - static inline unsigned read_vbr_uint(const unsigned char *&Buf, - const unsigned char *EndBuf) { - unsigned Shift = 0; - unsigned Result = 0; - - do { - if (Buf == EndBuf) throw std::string("Ran out of data!"); - Result |= (unsigned)((*Buf++) & 0x7F) << Shift; - Shift += 7; - } while (Buf[-1] & 0x80); - return Result; - } - - static inline uint64_t read_vbr_uint64(const unsigned char *&Buf, - const unsigned char *EndBuf) { - unsigned Shift = 0; - uint64_t Result = 0; - - do { - if (Buf == EndBuf) throw std::string("Ran out of data!"); - Result |= (uint64_t)((*Buf++) & 0x7F) << Shift; - Shift += 7; - } while (Buf[-1] & 0x80); - return Result; - } - - static inline int64_t read_vbr_int64(const unsigned char *&Buf, - const unsigned char *EndBuf) { - uint64_t R = read_vbr_uint64(Buf, EndBuf); - if (R & 1) { - if (R != 1) - return -(int64_t)(R >> 1); - else // There is no such thing as -0 with integers. "-0" really means - // 0x8000000000000000. - return 1LL << 63; - } else - return (int64_t)(R >> 1); - } - - // align32 - Round up to multiple of 32 bits... - static inline void align32(const unsigned char *&Buf, - const unsigned char *EndBuf) { - Buf = (const unsigned char *)((unsigned long)(Buf+3) & (~3UL)); - if (Buf > EndBuf) throw std::string("Ran out of data!"); - } - - static inline std::string read_str(const unsigned char *&Buf, - const unsigned char *EndBuf) { - unsigned Size = read_vbr_uint(Buf, EndBuf); - const unsigned char *OldBuf = Buf; - Buf += Size; - if (Buf > EndBuf) // Size invalid? - throw std::string("Ran out of data reading a string!"); - return std::string((char*)OldBuf, Size); - } - - static inline void input_data(const unsigned char *&Buf, - const unsigned char *EndBuf, - void *Ptr, void *End) { - unsigned char *Start = (unsigned char *)Ptr; - unsigned Amount = (unsigned char *)End - Start; - if (Buf+Amount > EndBuf) throw std::string("Ran out of data!"); - std::copy(Buf, Buf+Amount, Start); - Buf += Amount; - } - -} // End llvm namespace - -#endif diff --git a/lib/Bytecode/Reader/Analyzer.cpp b/lib/Bytecode/Reader/Analyzer.cpp index eb710711d98..133c1fbaa6f 100644 --- a/lib/Bytecode/Reader/Analyzer.cpp +++ b/lib/Bytecode/Reader/Analyzer.cpp @@ -30,13 +30,13 @@ public: bool handleError(const std::string& str ) { - std::cerr << "Analysis Error: " << str; return false; } void handleStart() { bca.ModuleId.clear(); + bca.numBlocks = 0; bca.numTypes = 0; bca.numValues = 0; bca.numFunctions = 0; @@ -49,16 +49,38 @@ public: bca.numSymTab = 0; bca.maxTypeSlot = 0; bca.maxValueSlot = 0; - bca.density = 0.0; + bca.numAlignment = 0; + bca.fileDensity = 0.0; + bca.globalsDensity = 0.0; + bca.functionDensity = 0.0; + bca.vbrCount32 = 0; + bca.vbrCount64 = 0; + bca.vbrCompBytes = 0; + bca.vbrExpdBytes = 0; bca.FunctionInfo.clear(); bca.BytecodeDump.clear(); + bca.BlockSizes[BytecodeFormat::Module] = 0; + bca.BlockSizes[BytecodeFormat::Function] = 0; + bca.BlockSizes[BytecodeFormat::ConstantPool] = 0; + bca.BlockSizes[BytecodeFormat::SymbolTable] = 0; + bca.BlockSizes[BytecodeFormat::ModuleGlobalInfo] = 0; + bca.BlockSizes[BytecodeFormat::GlobalTypePlane] = 0; + bca.BlockSizes[BytecodeFormat::BasicBlock] = 0; + bca.BlockSizes[BytecodeFormat::InstructionList] = 0; + bca.BlockSizes[BytecodeFormat::CompactionTable] = 0; } void handleFinish() { - bca.density = bca.numTypes + bca.numFunctions + bca.numConstants + - bca.numGlobalVars + bca.numInstructions; - bca.density /= bca.byteSize; + bca.fileDensity = double(bca.byteSize) / double( bca.numTypes + bca.numValues ); + double globalSize = 0.0; + globalSize += double(bca.BlockSizes[BytecodeFormat::ConstantPool]); + globalSize += double(bca.BlockSizes[BytecodeFormat::ModuleGlobalInfo]); + globalSize += double(bca.BlockSizes[BytecodeFormat::GlobalTypePlane]); + bca.globalsDensity = globalSize / double( bca.numTypes + bca.numConstants + + bca.numGlobalVars ); + bca.functionDensity = double(bca.BlockSizes[BytecodeFormat::Function]) / + double(bca.numFunctions); } void handleModuleBegin(const std::string& id) @@ -78,8 +100,9 @@ public: { } - void handleModuleGlobalsBegin() + void handleModuleGlobalsBegin(unsigned size) { + // bca.globalBytesize += size; } void handleGlobalVariable( @@ -89,6 +112,7 @@ public: ) { bca.numGlobalVars++; + bca.numValues++; } void handleInitializedGV( @@ -99,6 +123,7 @@ public: ) { bca.numGlobalVars++; + bca.numValues++; } virtual void handleType( const Type* Ty ) @@ -111,6 +136,7 @@ public: ) { bca.numFunctions++; + bca.numValues++; } void handleModuleGlobalsEnd() @@ -200,15 +226,19 @@ public: ) { bca.numBasicBlocks++; + bca.numValues++; } bool handleInstruction( unsigned Opcode, const Type* iType, - std::vector& Operands + std::vector& Operands, + unsigned Size ) { bca.numInstructions++; + bca.numValues++; + bca.numOperands += Operands.size(); return Instruction::isTerminator(Opcode); } @@ -227,43 +257,67 @@ public: ) { bca.numConstants++; + bca.numValues++; } void handleConstantValue( Constant * c ) { bca.numConstants++; + bca.numValues++; } void handleConstantArray( - const ArrayType* AT, - std::vector& Elements ) + const ArrayType* AT, + std::vector& Elements ) { bca.numConstants++; + bca.numValues++; } void handleConstantStruct( - const StructType* ST, - std::vector& ElementSlots) + const StructType* ST, + std::vector& ElementSlots) { bca.numConstants++; + bca.numValues++; } void handleConstantPointer( - const PointerType* PT, unsigned Slot) + const PointerType* PT, unsigned Slot) { bca.numConstants++; + bca.numValues++; } void handleConstantString( const ConstantArray* CA ) { bca.numConstants++; + bca.numValues++; } - void handleGlobalConstantsEnd() - { + void handleGlobalConstantsEnd() { } + + void handleAlignment(unsigned numBytes) { + bca.numAlignment += numBytes; } + void handleBlock( + unsigned BType, const unsigned char* StartPtr, unsigned Size) { + bca.numBlocks++; + bca.BlockSizes[llvm::BytecodeFormat::FileBlockIDs(BType)] += Size; + } + + virtual void handleVBR32(unsigned Size ) { + bca.vbrCount32++; + bca.vbrCompBytes += Size; + bca.vbrExpdBytes += sizeof(uint32_t); + } + virtual void handleVBR64(unsigned Size ) { + bca.vbrCount64++; + bca.vbrCompBytes += Size; + bca.vbrExpdBytes += sizeof(uint64_t); + } }; } @@ -277,10 +331,9 @@ void llvm::BytecodeAnalyzer::AnalyzeBytecode( { bca.byteSize = Length; AnalyzerHandler TheHandler(bca); - AbstractBytecodeParser TheParser(&TheHandler); + AbstractBytecodeParser TheParser(&TheHandler, true, true, true); TheParser.ParseBytecode( Buf, Length, ModuleID ); - if ( bca.detailedResults ) - TheParser.ParseAllFunctionBodies(); + TheParser.ParseAllFunctionBodies(); } // vim: sw=2 diff --git a/lib/Bytecode/Reader/AnalyzerWrappers.cpp b/lib/Bytecode/Reader/AnalyzerWrappers.cpp index 2caf069eb49..fd23dc80867 100644 --- a/lib/Bytecode/Reader/AnalyzerWrappers.cpp +++ b/lib/Bytecode/Reader/AnalyzerWrappers.cpp @@ -18,6 +18,7 @@ #include "Support/StringExtras.h" #include "Config/unistd.h" #include +#include using namespace llvm; @@ -46,7 +47,7 @@ static std::string ErrnoMessage (int savedErrNum, std::string descr) { } BytecodeFileAnalyzer::BytecodeFileAnalyzer(const std::string &Filename, - BytecodeAnalysis& bca) { + BytecodeAnalysis& bca) { Buffer = (unsigned char*)ReadFileIntoAddressSpace(Filename, Length); if (Buffer == 0) throw "Error reading file '" + Filename + "'."; @@ -84,16 +85,16 @@ namespace { public: BytecodeBufferAnalyzer(const unsigned char *Buf, unsigned Length, - BytecodeAnalysis& bca, const std::string &ModuleID); + BytecodeAnalysis& bca, const std::string &ModuleID); ~BytecodeBufferAnalyzer(); }; } BytecodeBufferAnalyzer::BytecodeBufferAnalyzer(const unsigned char *Buf, - unsigned Length, - BytecodeAnalysis& bca, - const std::string &ModuleID) { + unsigned Length, + BytecodeAnalysis& bca, + const std::string &ModuleID) { // If not aligned, allocate a new buffer to hold the bytecode... const unsigned char *ParseBegin = 0; if ((intptr_t)Buf & 3) { @@ -200,28 +201,118 @@ void llvm::AnalyzeBytecodeBuffer( /// This function prints the contents of rhe BytecodeAnalysis structure in /// a human legible form. /// @brief Print BytecodeAnalysis structure to an ostream +namespace { +inline static void print(std::ostream& Out, const char*title, + unsigned val, bool nl = true ) { + Out << std::setw(30) << std::right << title + << std::setw(0) << ": " + << std::setw(9) << val << "\n"; +} + +inline static void print(std::ostream&Out, const char*title, + double val ) { + Out << std::setw(30) << std::right << title + << std::setw(0) << ": " + << std::setw(9) << std::setprecision(6) << val << "\n" ; +} + +inline static void print(std::ostream&Out, const char*title, + double top, double bot ) { + Out << std::setw(30) << std::right << title + << std::setw(0) << ": " + << std::setw(9) << std::setprecision(6) << top + << " (" << std::left << std::setw(0) << std::setprecision(4) + << (top/bot)*100.0 << "%)\n"; +} +inline static void print(std::ostream&Out, const char*title, + std::string val, bool nl = true) { + Out << std::setw(30) << std::right << title + << std::setw(0) << ": " + << std::left << val << (nl ? "\n" : ""); +} + +} + void llvm::PrintBytecodeAnalysis(BytecodeAnalysis& bca, std::ostream& Out ) { - Out << " Bytecode Analysis of: " << bca.ModuleId << "\n"; - Out << " File Size: " << bca.byteSize << "\n"; - Out << " Number Of Types: " << bca.numTypes << "\n"; - Out << " Number Of Constants: " << bca.numConstants << "\n"; - Out << " Number Of Global Variables: " << bca.numGlobalVars << "\n"; - Out << " Number Of Functions: " << bca.numFunctions << "\n"; - Out << " Number Of Basic Blocks: " << bca.numBasicBlocks << "\n"; - Out << " Number Of Instructions: " << bca.numInstructions << "\n"; - Out << " Number Of Operands: " << bca.numOperands << "\n"; - Out << "Number Of Compaction Tables: " << bca.numCmpctnTables << "\n"; - Out << " Number Of Symbol Tables: " << bca.numSymTab << "\n"; - Out << " Maximum Type Slot Number: " << bca.maxTypeSlot << "\n"; - Out << " Maximum Value Slot Number: " << bca.maxValueSlot << "\n"; - Out << " Bytecode Density: " << bca.density << "\n"; + print(Out, "Bytecode Analysis Of Module", bca.ModuleId); + print(Out, "File Size", bca.byteSize); + print(Out, "Bytecode Compression Index",std::string("TBD")); + print(Out, "Number Of Bytecode Blocks", bca.numBlocks); + print(Out, "Number Of Types", bca.numTypes); + print(Out, "Number Of Values", bca.numValues); + print(Out, "Number Of Constants", bca.numConstants); + print(Out, "Number Of Global Variables", bca.numGlobalVars); + print(Out, "Number Of Functions", bca.numFunctions); + print(Out, "Number Of Basic Blocks", bca.numBasicBlocks); + print(Out, "Number Of Instructions", bca.numInstructions); + print(Out, "Number Of Operands", bca.numOperands); + print(Out, "Number Of Compaction Tables", bca.numCmpctnTables); + print(Out, "Number Of Symbol Tables", bca.numSymTab); + print(Out, "Maximum Type Slot Number", bca.maxTypeSlot); + print(Out, "Maximum Value Slot Number", bca.maxValueSlot); + print(Out, "Bytes Thrown To Alignment", double(bca.numAlignment), + double(bca.byteSize)); + print(Out, "File Density (bytes/def)", bca.fileDensity); + print(Out, "Globals Density (bytes/def)", bca.globalsDensity); + print(Out, "Function Density (bytes/func)", bca.functionDensity); + print(Out, "Number of VBR 32-bit Integers", bca.vbrCount32); + print(Out, "Number of VBR 64-bit Integers", bca.vbrCount64); + print(Out, "Number of VBR Compressed Bytes", bca.vbrCompBytes); + print(Out, "Number of VBR Expanded Bytes", bca.vbrExpdBytes); + print(Out, "VBR Savings", + double(bca.vbrExpdBytes)-double(bca.vbrCompBytes), + double(bca.byteSize)); - if ( bca.detailedResults ) - Out << "Detailed Results Not Implemented Yet.\n"; + if ( bca.detailedResults ) { + print(Out, "Module Bytes", + double(bca.BlockSizes[BytecodeFormat::Module]), + double(bca.byteSize)); + print(Out, "Function Bytes", + double(bca.BlockSizes[BytecodeFormat::Function]), + double(bca.byteSize)); + print(Out, "Constant Pool Bytes", + double(bca.BlockSizes[BytecodeFormat::ConstantPool]), + double(bca.byteSize)); + print(Out, "Symbol Table Bytes", + double(bca.BlockSizes[BytecodeFormat::SymbolTable]), + double(bca.byteSize)); + print(Out, "Module Global Info Bytes", + double(bca.BlockSizes[BytecodeFormat::ModuleGlobalInfo]), + double(bca.byteSize)); + print(Out, "Global Type Plane Bytes", + double(bca.BlockSizes[BytecodeFormat::GlobalTypePlane]), + double(bca.byteSize)); + print(Out, "Basic Block Bytes", + double(bca.BlockSizes[BytecodeFormat::BasicBlock]), + double(bca.byteSize)); + print(Out, "Instruction List Bytes", + double(bca.BlockSizes[BytecodeFormat::InstructionList]), + double(bca.byteSize)); + print(Out, "Compaction Table Bytes", + double(bca.BlockSizes[BytecodeFormat::CompactionTable]), + double(bca.byteSize)); + + std::map::iterator I = + bca.FunctionInfo.begin(); + std::map::iterator E = + bca.FunctionInfo.end(); + + while ( I != E ) { + Out << std::left << std::setw(0); + Out << "Function: " << I->second.name << " Slot=" << I->first << "\n"; + print(Out,"Type:", I->second.description); + print(Out,"Byte Size", I->second.byteSize); + print(Out,"Instructions", I->second.numInstructions); + print(Out,"Basic Blocks", I->second.numBasicBlocks); + print(Out,"Operand", I->second.numOperands); + print(Out,"Function Density", I->second.density); + print(Out,"VBR Effectiveness", I->second.vbrEffectiveness); + ++I; + } + } if ( bca.dumpBytecode ) Out << bca.BytecodeDump; } - // vim: sw=2 diff --git a/lib/Bytecode/Reader/Dumper.cpp b/lib/Bytecode/Reader/Dumper.cpp index 12752ff883d..d61afe42558 100644 --- a/lib/Bytecode/Reader/Dumper.cpp +++ b/lib/Bytecode/Reader/Dumper.cpp @@ -212,7 +212,8 @@ public: virtual bool handleInstruction( unsigned Opcode, const Type* iType, - std::vector& Operands + std::vector& Operands, + unsigned Size ) { std::cout << " INST: OpCode=" diff --git a/lib/Bytecode/Reader/Parser.cpp b/lib/Bytecode/Reader/Parser.cpp index 743db6c3153..80800e75a65 100644 --- a/lib/Bytecode/Reader/Parser.cpp +++ b/lib/Bytecode/Reader/Parser.cpp @@ -1,4 +1,4 @@ -//===- Reader.cpp - Code to read bytecode files ---------------------------===// +//===- Parser.cpp - Code to parse bytecode files --------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This library implements the functionality defined in llvm/Bytecode/Reader.h +// This library implements the functionality defined in llvm/Bytecode/Parser.h // // Note that this library should be as fast as possible, reentrant, and // threadsafe!! @@ -17,7 +17,6 @@ //===----------------------------------------------------------------------===// #include "AnalyzerInternals.h" -#include "ReaderPrimitives.h" #include "llvm/Module.h" #include "llvm/Bytecode/Format.h" #include "Support/StringExtras.h" @@ -37,40 +36,128 @@ using namespace llvm; #define BCR_TRACE(n, X) #endif -#define PARSE_ERROR(inserters) \ - { \ +#define PARSE_ERROR(inserters) { \ std::ostringstream errormsg; \ errormsg << inserters; \ if ( ! handler->handleError( errormsg.str() ) ) \ throw std::string(errormsg.str()); \ } +inline bool AbstractBytecodeParser::moreInBlock() { + return At < BlockEnd; +} -inline void AbstractBytecodeParser::readBlock(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned &Type, unsigned &Size) -{ - Type = read(Buf, EndBuf); - Size = read(Buf, EndBuf); +inline void AbstractBytecodeParser::checkPastBlockEnd(const char * block_name) { + if ( At > BlockEnd ) + PARSE_ERROR("Attempt to read past the end of " << block_name << " block."); +} + +inline void AbstractBytecodeParser::align32() { + BufPtr Save = At; + At = (const unsigned char *)((unsigned long)(At+3) & (~3UL)); + if ( reportAlignment && At > Save ) handler->handleAlignment( At - Save ); + if (At > BlockEnd) + throw std::string("Ran out of data while aligning!"); +} + +inline unsigned AbstractBytecodeParser::read_uint() { + if (At+4 > BlockEnd) + throw std::string("Ran out of data reading uint!"); + At += 4; + return At[-4] | (At[-3] << 8) | (At[-2] << 16) | (At[-1] << 24); +} + +inline unsigned AbstractBytecodeParser::read_vbr_uint() { + unsigned Shift = 0; + unsigned Result = 0; + BufPtr Save = At; + + do { + if (At == BlockEnd) + throw std::string("Ran out of data reading vbr_uint!"); + Result |= (unsigned)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + if (reportVBR) + handler->handleVBR32(At-Save); + return Result; +} + +inline uint64_t AbstractBytecodeParser::read_vbr_uint64() { + unsigned Shift = 0; + uint64_t Result = 0; + BufPtr Save = At; + + do { + if (At == BlockEnd) + throw std::string("Ran out of data reading vbr_uint64!"); + Result |= (uint64_t)((*At++) & 0x7F) << Shift; + Shift += 7; + } while (At[-1] & 0x80); + if (reportVBR) + handler->handleVBR64(At-Save); + return Result; +} + +inline int64_t AbstractBytecodeParser::read_vbr_int64() { + uint64_t R = read_vbr_uint64(); + if (R & 1) { + if (R != 1) + return -(int64_t)(R >> 1); + else // There is no such thing as -0 with integers. "-0" really means + // 0x8000000000000000. + return 1LL << 63; + } else + return (int64_t)(R >> 1); +} + +inline std::string AbstractBytecodeParser::read_str() { + unsigned Size = read_vbr_uint(); + const unsigned char *OldAt = At; + At += Size; + if (At > BlockEnd) // Size invalid? + throw std::string("Ran out of data reading a string!"); + return std::string((char*)OldAt, Size); +} + +inline void AbstractBytecodeParser::read_data(void *Ptr, void *End) { + unsigned char *Start = (unsigned char *)Ptr; + unsigned Amount = (unsigned char *)End - Start; + if (At+Amount > BlockEnd) + throw std::string("Ran out of data!"); + std::copy(At, At+Amount, Start); + At += Amount; +} + +inline void AbstractBytecodeParser::readBlock(unsigned &Type, unsigned &Size) { + Type = read_uint(); + Size = read_uint(); + BlockStart = At; + if ( At + Size > BlockEnd ) + throw std::string("Attempt to size a block past end of memory"); + BlockEnd = At + Size; + if ( reportBlocks ) { + handler->handleBlock( Type, BlockStart, Size ); + } } const Type *AbstractBytecodeParser::getType(unsigned ID) { - //cerr << "Looking up Type ID: " << ID << "\n"; +//cerr << "Looking up Type ID: " << ID << "\n"; - if (ID < Type::FirstDerivedTyID) - if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) - return T; // Asked for a primitive type... +if (ID < Type::FirstDerivedTyID) + if (const Type *T = Type::getPrimitiveType((Type::PrimitiveID)ID)) + return T; // Asked for a primitive type... - // Otherwise, derived types need offset... - ID -= Type::FirstDerivedTyID; +// Otherwise, derived types need offset... +ID -= Type::FirstDerivedTyID; - if (!CompactionTypeTable.empty()) { - if (ID >= CompactionTypeTable.size()) - PARSE_ERROR("Type ID out of range for compaction table!"); - return CompactionTypeTable[ID]; - } +if (!CompactionTypeTable.empty()) { + if (ID >= CompactionTypeTable.size()) + PARSE_ERROR("Type ID out of range for compaction table!"); + return CompactionTypeTable[ID]; +} - // Is it a module-level type? +// Is it a module-level type? if (ID < ModuleTypes.size()) return ModuleTypes[ID].get(); @@ -83,12 +170,12 @@ const Type *AbstractBytecodeParser::getType(unsigned ID) { return Type::VoidTy; } -bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf, - std::vector &Operands) { +bool AbstractBytecodeParser::ParseInstruction(std::vector &Operands) { + BufPtr SaveAt = At; Operands.clear(); unsigned iType = 0; unsigned Opcode = 0; - unsigned Op = read(Buf, EndBuf); + unsigned Op = read_uint(); // bits Instruction format: Common to all formats // -------------------------- @@ -134,61 +221,56 @@ bool AbstractBytecodeParser::ParseInstruction(BufPtr& Buf, BufPtr EndBuf, Operands[2] = (Op >> 26) & 63; break; case 0: - Buf -= 4; // Hrm, try this again... - Opcode = read_vbr_uint(Buf, EndBuf); + At -= 4; // Hrm, try this again... + Opcode = read_vbr_uint(); Opcode >>= 2; - iType = read_vbr_uint(Buf, EndBuf); + iType = read_vbr_uint(); - unsigned NumOperands = read_vbr_uint(Buf, EndBuf); + unsigned NumOperands = read_vbr_uint(); Operands.resize(NumOperands); if (NumOperands == 0) PARSE_ERROR("Zero-argument instruction found; this is invalid."); for (unsigned i = 0; i != NumOperands; ++i) - Operands[i] = read_vbr_uint(Buf, EndBuf); - align32(Buf, EndBuf); + Operands[i] = read_vbr_uint(); + align32(); break; } - return handler->handleInstruction(Opcode, getType(iType), Operands); + return handler->handleInstruction(Opcode, getType(iType), Operands, At-SaveAt); } /// ParseBasicBlock - In LLVM 1.0 bytecode files, we used to output one /// basicblock at a time. This method reads in one of the basicblock packets. -void AbstractBytecodeParser::ParseBasicBlock(BufPtr &Buf, - BufPtr EndBuf, - unsigned BlockNo) { +void AbstractBytecodeParser::ParseBasicBlock( unsigned BlockNo) { handler->handleBasicBlockBegin( BlockNo ); std::vector Args; bool is_terminating = false; - while (Buf < EndBuf) - is_terminating = ParseInstruction(Buf, EndBuf, Args); + while ( moreInBlock() ) + is_terminating = ParseInstruction(Args); if ( ! is_terminating ) - PARSE_ERROR( - "Failed to recognize instruction as terminating at end of block"); + PARSE_ERROR("Non-terminated basic block found!"); handler->handleBasicBlockEnd( BlockNo ); } - /// ParseInstructionList - Parse all of the BasicBlock's & Instruction's in the /// body of a function. In post 1.0 bytecode files, we no longer emit basic /// block individually, in order to avoid per-basic-block overhead. -unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, - BufPtr EndBuf) { +unsigned AbstractBytecodeParser::ParseInstructionList() { unsigned BlockNo = 0; std::vector Args; - while (Buf < EndBuf) { + while ( moreInBlock() ) { handler->handleBasicBlockBegin( BlockNo ); // Read instructions into this basic block until we get to a terminator bool is_terminating = false; - while (Buf < EndBuf && !is_terminating ) - is_terminating = ParseInstruction(Buf, EndBuf, Args ) ; + while (moreInBlock() && !is_terminating ) + is_terminating = ParseInstruction(Args ) ; if (!is_terminating) PARSE_ERROR( "Non-terminated basic block found!"); @@ -199,36 +281,34 @@ unsigned AbstractBytecodeParser::ParseInstructionList( BufPtr &Buf, return BlockNo; } -void AbstractBytecodeParser::ParseSymbolTable(BufPtr &Buf, BufPtr EndBuf) { +void AbstractBytecodeParser::ParseSymbolTable() { handler->handleSymbolTableBegin(); - while (Buf < EndBuf) { + while ( moreInBlock() ) { // Symtab block header: [num entries][type id number] - unsigned NumEntries = read_vbr_uint(Buf, EndBuf); - unsigned Typ = read_vbr_uint(Buf, EndBuf); + unsigned NumEntries = read_vbr_uint(); + unsigned Typ = read_vbr_uint(); const Type *Ty = getType(Typ); handler->handleSymbolTablePlane( Typ, NumEntries, Ty ); for (unsigned i = 0; i != NumEntries; ++i) { // Symtab entry: [def slot #][name] - unsigned slot = read_vbr_uint(Buf, EndBuf); - std::string Name = read_str(Buf, EndBuf); + unsigned slot = read_vbr_uint(); + std::string Name = read_str(); if (Typ == Type::TypeTyID) handler->handleSymbolTableType( i, slot, Name ); else - handler->handleSymbolTableValue( i, slot, Name ); + handler->handleSymbolTableValue( i, slot, Name ); } } - - if (Buf > EndBuf) - PARSE_ERROR("Tried to read past end of buffer while reading symbol table."); + checkPastBlockEnd("Symbol Table"); handler->handleSymbolTableEnd(); } -void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) { +void AbstractBytecodeParser::ParseFunctionLazily() { if (FunctionSignatureList.empty()) throw std::string("FunctionSignatureList empty!"); @@ -236,9 +316,10 @@ void AbstractBytecodeParser::ParseFunctionLazily(BufPtr &Buf, BufPtr EndBuf) { FunctionSignatureList.pop_back(); // Save the information for future reading of the function - LazyFunctionLoadMap[FType] = LazyFunctionInfo(Buf, EndBuf); + LazyFunctionLoadMap[FType] = LazyFunctionInfo(BlockStart, BlockEnd); + // Pretend we've `parsed' this function - Buf = EndBuf; + At = BlockEnd; } void AbstractBytecodeParser::ParseNextFunction(Type* FType) { @@ -251,21 +332,20 @@ void AbstractBytecodeParser::ParseNextFunction(Type* FType) { return; } - BufPtr Buf = Fi->second.Buf; - BufPtr EndBuf = Fi->second.EndBuf; + BlockStart = At = Fi->second.Buf; + BlockEnd = Fi->second.Buf; assert(Fi->first == FType); LazyFunctionLoadMap.erase(Fi); - this->ParseFunctionBody( FType, Buf, EndBuf ); + this->ParseFunctionBody( FType ); } -void AbstractBytecodeParser::ParseFunctionBody(const Type* FType, - BufPtr &Buf, BufPtr EndBuf ) { +void AbstractBytecodeParser::ParseFunctionBody(const Type* FType ) { GlobalValue::LinkageTypes Linkage = GlobalValue::ExternalLinkage; - unsigned LinkageType = read_vbr_uint(Buf, EndBuf); + unsigned LinkageType = read_vbr_uint(); switch (LinkageType) { case 0: Linkage = GlobalValue::ExternalLinkage; break; case 1: Linkage = GlobalValue::WeakLinkage; break; @@ -284,43 +364,45 @@ void AbstractBytecodeParser::ParseFunctionBody(const Type* FType, unsigned BlockNum = 0; bool InsertedArguments = false; - while (Buf < EndBuf) { + BufPtr MyEnd = BlockEnd; + while ( At < MyEnd ) { unsigned Type, Size; - BufPtr OldBuf = Buf; - readBlock(Buf, EndBuf, Type, Size); + BufPtr OldAt = At; + readBlock(Type, Size); switch (Type) { case BytecodeFormat::ConstantPool: - ParseConstantPool(Buf, Buf+Size, FunctionTypes ); + ParseConstantPool(FunctionTypes ); break; case BytecodeFormat::CompactionTable: - ParseCompactionTable(Buf, Buf+Size); + ParseCompactionTable(); break; case BytecodeFormat::BasicBlock: - ParseBasicBlock(Buf, Buf+Size, BlockNum++); + ParseBasicBlock(BlockNum++); break; case BytecodeFormat::InstructionList: if (BlockNum) - PARSE_ERROR("InstructionList must come before basic blocks!"); - BlockNum = ParseInstructionList(Buf, Buf+Size); + PARSE_ERROR("InstructionList must come before basic blocks!"); + BlockNum = ParseInstructionList(); break; case BytecodeFormat::SymbolTable: - ParseSymbolTable(Buf, Buf+Size ); + ParseSymbolTable(); break; default: - Buf += Size; - if (OldBuf > Buf) - PARSE_ERROR("Wrapped around reading bytecode"); + At += Size; + if (OldAt > At) + PARSE_ERROR("Wrapped around reading bytecode"); break; } + BlockEnd = MyEnd; // Malformed bc file if read past end of block. - align32(Buf, EndBuf); + align32(); } handler->handleFunctionEnd(FType); @@ -336,21 +418,24 @@ void AbstractBytecodeParser::ParseAllFunctionBodies() { while ( Fi != Fe ) { const Type* FType = Fi->first; - this->ParseFunctionBody(FType, Fi->second.Buf, Fi->second.EndBuf); + BlockStart = At = Fi->second.Buf; + BlockEnd = Fi->second.EndBuf; + this->ParseFunctionBody(FType); + ++Fi; } } -void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) { +void AbstractBytecodeParser::ParseCompactionTable() { handler->handleCompactionTableBegin(); - while (Buf != End) { - unsigned NumEntries = read_vbr_uint(Buf, End); + while ( moreInBlock() ) { + unsigned NumEntries = read_vbr_uint(); unsigned Ty; if ((NumEntries & 3) == 3) { NumEntries >>= 2; - Ty = read_vbr_uint(Buf, End); + Ty = read_vbr_uint(); } else { Ty = NumEntries >> 2; NumEntries &= 3; @@ -360,25 +445,24 @@ void AbstractBytecodeParser::ParseCompactionTable(BufPtr &Buf, BufPtr End) { if (Ty == Type::TypeTyID) { for (unsigned i = 0; i != NumEntries; ++i) { - unsigned TypeSlot = read_vbr_uint(Buf,End); + unsigned TypeSlot = read_vbr_uint(); const Type *Typ = getGlobalTableType(TypeSlot); - handler->handleCompactionTableType( i, TypeSlot, Typ ); + handler->handleCompactionTableType( i, TypeSlot, Typ ); } } else { const Type *Typ = getType(Ty); // Push the implicit zero for (unsigned i = 0; i != NumEntries; ++i) { - unsigned ValSlot = read_vbr_uint(Buf, End); - handler->handleCompactionTableValue( i, ValSlot, Typ ); + unsigned ValSlot = read_vbr_uint(); + handler->handleCompactionTableValue( i, ValSlot, Typ ); } } } handler->handleCompactionTableEnd(); } -const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, - const unsigned char *EndBuf) { - unsigned PrimType = read_vbr_uint(Buf, EndBuf); +const Type *AbstractBytecodeParser::ParseTypeConstant() { + unsigned PrimType = read_vbr_uint(); const Type *Val = 0; if ((Val = Type::getPrimitiveType((Type::PrimitiveID)PrimType))) @@ -386,13 +470,13 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, switch (PrimType) { case Type::FunctionTyID: { - const Type *RetType = getType(read_vbr_uint(Buf, EndBuf)); + const Type *RetType = getType(read_vbr_uint()); - unsigned NumParams = read_vbr_uint(Buf, EndBuf); + unsigned NumParams = read_vbr_uint(); std::vector Params; while (NumParams--) - Params.push_back(getType(read_vbr_uint(Buf, EndBuf))); + Params.push_back(getType(read_vbr_uint())); bool isVarArg = Params.size() && Params.back() == Type::VoidTy; if (isVarArg) Params.pop_back(); @@ -402,10 +486,10 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, return result; } case Type::ArrayTyID: { - unsigned ElTyp = read_vbr_uint(Buf, EndBuf); + unsigned ElTyp = read_vbr_uint(); const Type *ElementType = getType(ElTyp); - unsigned NumElements = read_vbr_uint(Buf, EndBuf); + unsigned NumElements = read_vbr_uint(); BCR_TRACE(5, "Array Type Constant #" << ElTyp << " size=" << NumElements << "\n"); @@ -415,10 +499,10 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, } case Type::StructTyID: { std::vector Elements; - unsigned Typ = read_vbr_uint(Buf, EndBuf); + unsigned Typ = read_vbr_uint(); while (Typ) { // List is terminated by void/0 typeid Elements.push_back(getType(Typ)); - Typ = read_vbr_uint(Buf, EndBuf); + Typ = read_vbr_uint(); } Type* result = StructType::get(Elements); @@ -426,7 +510,7 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, return result; } case Type::PointerTyID: { - unsigned ElTyp = read_vbr_uint(Buf, EndBuf); + unsigned ElTyp = read_vbr_uint(); BCR_TRACE(5, "Pointer Type Constant #" << ElTyp << "\n"); Type* result = PointerType::get(getType(ElTyp)); handler->handleType( result ); @@ -455,10 +539,9 @@ const Type *AbstractBytecodeParser::ParseTypeConstant(const unsigned char *&Buf, // something and when we reread the type later, we can replace the opaque type // with a new resolved concrete type. // -void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf, - const unsigned char *EndBuf, - TypeListTy &Tab, - unsigned NumEntries) { +void AbstractBytecodeParser::ParseTypeConstants( + TypeListTy &Tab, unsigned NumEntries +) { assert(Tab.size() == 0 && "should not have read type constants in before!"); // Insert a bunch of opaque types to be resolved later... @@ -470,7 +553,7 @@ void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf, // opaque types just inserted. // for (unsigned i = 0; i != NumEntries; ++i) { - const Type *NewTy = ParseTypeConstant(Buf, EndBuf), *OldTy = Tab[i].get(); + const Type *NewTy = ParseTypeConstant(), *OldTy = Tab[i].get(); if (NewTy == 0) throw std::string("Couldn't parse type!"); BCR_TRACE(4, "#" << i << ": Read Type Constant: '" << NewTy << "' Replacing: " << OldTy << "\n"); @@ -497,18 +580,16 @@ void AbstractBytecodeParser::ParseTypeConstants(const unsigned char *&Buf, } -void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned TypeID) { +void AbstractBytecodeParser::ParseConstantValue(unsigned TypeID) { // We must check for a ConstantExpr before switching by type because // a ConstantExpr can be of any type, and has no explicit value. // // 0 if not expr; numArgs if is expr - unsigned isExprNumArgs = read_vbr_uint(Buf, EndBuf); + unsigned isExprNumArgs = read_vbr_uint(); if (isExprNumArgs) { - unsigned Opcode = read_vbr_uint(Buf, EndBuf); + unsigned Opcode = read_vbr_uint(); const Type* Typ = getType(TypeID); // FIXME: Encoding of constant exprs could be much more compact! @@ -517,8 +598,8 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, // Read the slot number and types of each of the arguments for (unsigned i = 0; i != isExprNumArgs; ++i) { - unsigned ArgValSlot = read_vbr_uint(Buf, EndBuf); - unsigned ArgTypeSlot = read_vbr_uint(Buf, EndBuf); + unsigned ArgValSlot = read_vbr_uint(); + unsigned ArgTypeSlot = read_vbr_uint(); BCR_TRACE(4, "CE Arg " << i << ": Type: '" << *getType(ArgTypeSlot) << "' slot: " << ArgValSlot << "\n"); @@ -534,7 +615,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, const Type *Ty = getType(TypeID); switch (Ty->getPrimitiveID()) { case Type::BoolTyID: { - unsigned Val = read_vbr_uint(Buf, EndBuf); + unsigned Val = read_vbr_uint(); if (Val != 0 && Val != 1) PARSE_ERROR("Invalid boolean value read."); @@ -545,7 +626,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, case Type::UByteTyID: // Unsigned integer types... case Type::UShortTyID: case Type::UIntTyID: { - unsigned Val = read_vbr_uint(Buf, EndBuf); + unsigned Val = read_vbr_uint(); if (!ConstantUInt::isValueValidForType(Ty, Val)) throw std::string("Invalid unsigned byte/short/int read."); handler->handleConstantValue( ConstantUInt::get(Ty, Val) ); @@ -553,7 +634,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, } case Type::ULongTyID: { - handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64(Buf, EndBuf)) ); + handler->handleConstantValue( ConstantUInt::get(Ty, read_vbr_uint64()) ); break; } @@ -561,7 +642,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, case Type::ShortTyID: case Type::IntTyID: { case Type::LongTyID: - int64_t Val = read_vbr_int64(Buf, EndBuf); + int64_t Val = read_vbr_int64(); if (!ConstantSInt::isValueValidForType(Ty, Val)) throw std::string("Invalid signed byte/short/int/long read."); handler->handleConstantValue( ConstantSInt::get(Ty, Val) ); @@ -570,14 +651,14 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, case Type::FloatTyID: { float F; - input_data(Buf, EndBuf, &F, &F+1); + read_data(&F, &F+1); handler->handleConstantValue( ConstantFP::get(Ty, F) ); break; } case Type::DoubleTyID: { double Val; - input_data(Buf, EndBuf, &Val, &Val+1); + read_data(&Val, &Val+1); handler->handleConstantValue( ConstantFP::get(Ty, Val) ); break; } @@ -592,7 +673,7 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, std::vector Elements; Elements.reserve(NumElements); while (NumElements--) // Read all of the elements of the constant. - Elements.push_back(read_vbr_uint(Buf, EndBuf)); + Elements.push_back(read_vbr_uint()); handler->handleConstantArray( AT, Elements ); break; @@ -603,15 +684,16 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, std::vector Elements; Elements.reserve(ST->getNumElements()); for (unsigned i = 0; i != ST->getNumElements(); ++i) - Elements.push_back(read_vbr_uint(Buf, EndBuf)); - + Elements.push_back(read_vbr_uint()); handler->handleConstantStruct( ST, Elements ); + break; } case Type::PointerTyID: { // ConstantPointerRef value... const PointerType *PT = cast(Ty); - unsigned Slot = read_vbr_uint(Buf, EndBuf); + unsigned Slot = read_vbr_uint(); handler->handleConstantPointer( PT, Slot ); + break; } default: @@ -620,16 +702,13 @@ void AbstractBytecodeParser::ParseConstantValue(const unsigned char *&Buf, } } -void AbstractBytecodeParser::ParseGlobalTypes(const unsigned char *&Buf, - const unsigned char *EndBuf) { - ParseConstantPool(Buf, EndBuf, ModuleTypes); +void AbstractBytecodeParser::ParseGlobalTypes() { + ParseConstantPool(ModuleTypes); } -void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned NumEntries ){ +void AbstractBytecodeParser::ParseStringConstants(unsigned NumEntries ){ for (; NumEntries; --NumEntries) { - unsigned Typ = read_vbr_uint(Buf, EndBuf); + unsigned Typ = read_vbr_uint(); const Type *Ty = getType(Typ); if (!isa(Ty)) throw std::string("String constant data invalid!"); @@ -641,7 +720,7 @@ void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf, // Read character data. The type tells us how long the string is. char Data[ATy->getNumElements()]; - input_data(Buf, EndBuf, Data, Data+ATy->getNumElements()); + read_data(Data, Data+ATy->getNumElements()); std::vector Elements(ATy->getNumElements()); if (ATy->getElementType() == Type::SByteTy) @@ -658,35 +737,33 @@ void AbstractBytecodeParser::ParseStringConstants(const unsigned char *&Buf, } -void AbstractBytecodeParser::ParseConstantPool(const unsigned char *&Buf, - const unsigned char *EndBuf, - TypeListTy &TypeTab) { - while (Buf < EndBuf) { - unsigned NumEntries = read_vbr_uint(Buf, EndBuf); - unsigned Typ = read_vbr_uint(Buf, EndBuf); +void AbstractBytecodeParser::ParseConstantPool( TypeListTy &TypeTab) { + while ( moreInBlock() ) { + unsigned NumEntries = read_vbr_uint(); + unsigned Typ = read_vbr_uint(); if (Typ == Type::TypeTyID) { - ParseTypeConstants(Buf, EndBuf, TypeTab, NumEntries); + ParseTypeConstants(TypeTab, NumEntries); } else if (Typ == Type::VoidTyID) { - ParseStringConstants(Buf, EndBuf, NumEntries); + ParseStringConstants(NumEntries); } else { BCR_TRACE(3, "Type: '" << *getType(Typ) << "' NumEntries: " << NumEntries << "\n"); for (unsigned i = 0; i < NumEntries; ++i) { - ParseConstantValue(Buf, EndBuf, Typ); + ParseConstantValue(Typ); } } } - if (Buf > EndBuf) PARSE_ERROR("Read past end of buffer."); + checkPastBlockEnd("Constant Pool"); } -void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) { +void AbstractBytecodeParser::ParseModuleGlobalInfo() { handler->handleModuleGlobalsBegin(); // Read global variables... - unsigned VarType = read_vbr_uint(Buf, End); + unsigned VarType = read_vbr_uint(); while (VarType != Type::VoidTyID) { // List is terminated by Void // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 = // Linkage, bit4+ = slot# @@ -721,17 +798,17 @@ void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) { // Create the global variable... if (hasInitializer) { - unsigned initSlot = read_vbr_uint(Buf,End); + unsigned initSlot = read_vbr_uint(); handler->handleInitializedGV( ElTy, isConstant, Linkage, initSlot ); } else handler->handleGlobalVariable( ElTy, isConstant, Linkage ); // Get next item - VarType = read_vbr_uint(Buf, End); + VarType = read_vbr_uint(); } // Read the function objects for all of the functions that are coming - unsigned FnSignature = read_vbr_uint(Buf, End); + unsigned FnSignature = read_vbr_uint(); while (FnSignature != Type::VoidTyID) { // List is terminated by Void const Type *Ty = getType(FnSignature); if (!isa(Ty) || @@ -750,22 +827,26 @@ void AbstractBytecodeParser::ParseModuleGlobalInfo(BufPtr &Buf, BufPtr End) { handler->handleFunctionDeclaration(Ty); // Get Next function signature - FnSignature = read_vbr_uint(Buf, End); + FnSignature = read_vbr_uint(); } if (hasInconsistentModuleGlobalInfo) - align32(Buf, End); + align32(); + + // Now that the function signature list is set up, reverse it so that we can + // remove elements efficiently from the back of the vector. + std::reverse(FunctionSignatureList.begin(), FunctionSignatureList.end()); // This is for future proofing... in the future extra fields may be added that // we don't understand, so we transparently ignore them. // - Buf = End; + At = BlockEnd; handler->handleModuleGlobalsEnd(); } -void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) { - unsigned Version = read_vbr_uint(Buf, EndBuf); +void AbstractBytecodeParser::ParseVersionInfo() { + unsigned Version = read_vbr_uint(); // Unpack version number: low four bits are for flags, top bits = version Module::Endianness Endianness; @@ -814,85 +895,164 @@ void AbstractBytecodeParser::ParseVersionInfo(BufPtr &Buf, BufPtr EndBuf) { handler->handleVersionInfo(RevisionNum, Endianness, PointerSize ); } -void AbstractBytecodeParser::ParseModule(BufPtr &Buf, BufPtr EndBuf ) { +void AbstractBytecodeParser::ParseModule() { unsigned Type, Size; - readBlock(Buf, EndBuf, Type, Size); - if (Type != BytecodeFormat::Module || Buf+Size != EndBuf) - // Hrm, not a class? - PARSE_ERROR("Expected Module block! B: " << unsigned(intptr_t(Buf)) << - ", S: " << Size << " E: " << unsigned(intptr_t(EndBuf))); + + FunctionSignatureList.clear(); // Just in case... // Read into instance variables... - ParseVersionInfo(Buf, EndBuf); - align32(Buf, EndBuf); + ParseVersionInfo(); + align32(); /// FIXME: Is this redundant? VI is first and 4 bytes! bool SeenModuleGlobalInfo = false; bool SeenGlobalTypePlane = false; - while (Buf < EndBuf) { - BufPtr OldBuf = Buf; - readBlock(Buf, EndBuf, Type, Size); + BufPtr MyEnd = BlockEnd; + while (At < MyEnd) { + BufPtr OldAt = At; + readBlock(Type, Size); switch (Type) { case BytecodeFormat::GlobalTypePlane: if ( SeenGlobalTypePlane ) - PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!"); + PARSE_ERROR("Two GlobalTypePlane Blocks Encountered!"); - ParseGlobalTypes(Buf, Buf+Size); + ParseGlobalTypes(); SeenGlobalTypePlane = true; break; case BytecodeFormat::ModuleGlobalInfo: if ( SeenModuleGlobalInfo ) - PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!"); - ParseModuleGlobalInfo(Buf, Buf+Size); + PARSE_ERROR("Two ModuleGlobalInfo Blocks Encountered!"); + ParseModuleGlobalInfo(); SeenModuleGlobalInfo = true; break; case BytecodeFormat::ConstantPool: - ParseConstantPool(Buf, Buf+Size, ModuleTypes); + ParseConstantPool(ModuleTypes); break; case BytecodeFormat::Function: - ParseFunctionLazily(Buf, Buf+Size); + ParseFunctionLazily(); break; case BytecodeFormat::SymbolTable: - ParseSymbolTable(Buf, Buf+Size ); + ParseSymbolTable(); break; default: - Buf += Size; - if (OldBuf > Buf) - { - PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" ); + At += Size; + if (OldAt > At) { + PARSE_ERROR("Unexpected Block of Type" << Type << "encountered!" ); } break; } - align32(Buf, EndBuf); + BlockEnd = MyEnd; + align32(); } + + /// Make sure we pulled them all out. If we didn't then there's a declaration + /// but a missing body. That's not allowed. + if (!FunctionSignatureList.empty()) + throw std::string( + "Function declared, but bytecode stream ended before definition"); } void AbstractBytecodeParser::ParseBytecode( - BufPtr Buf, unsigned Length, + BufPtr b, unsigned Length, const std::string &ModuleID) { + At = MemStart = BlockStart = b; + MemEnd = BlockEnd = b + Length; handler->handleStart(); - unsigned char *EndBuf = (unsigned char*)(Buf + Length); // Read and check signature... - unsigned Sig = read(Buf, EndBuf); + unsigned Sig = read_uint(); if (Sig != ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) { PARSE_ERROR("Invalid bytecode signature: " << Sig); } handler->handleModuleBegin(ModuleID); - this->ParseModule(Buf, EndBuf); + unsigned Type, Size; + readBlock(Type, Size); + if ( Type != BytecodeFormat::Module ) { + PARSE_ERROR("Expected Module Block! At: " << unsigned(intptr_t(At)) + << ", Type:" << Type << ", Size:" << Size); + } + if ( At + Size != MemEnd ) { + PARSE_ERROR("Invalid Top Level Block Length! At: " + << unsigned(intptr_t(At)) << ", Type:" << Type << ", Size:" << Size); + } + this->ParseModule(); handler->handleModuleEnd(ModuleID); handler->handleFinish(); } +//===----------------------------------------------------------------------===// +//=== Default Implementations of Handler Methods +//===----------------------------------------------------------------------===// + +bool BytecodeHandler::handleError(const std::string& str ) { return false; } +void BytecodeHandler::handleStart() { } +void BytecodeHandler::handleFinish() { } +void BytecodeHandler::handleModuleBegin(const std::string& id) { } +void BytecodeHandler::handleModuleEnd(const std::string& id) { } +void BytecodeHandler::handleVersionInfo( unsigned char RevisionNum, + Module::Endianness Endianness, Module::PointerSize PointerSize) { } +void BytecodeHandler::handleModuleGlobalsBegin() { } +void BytecodeHandler::handleGlobalVariable( + const Type* ElemType, bool isConstant, GlobalValue::LinkageTypes ) { } +void BytecodeHandler::handleInitializedGV( + const Type* ElemType, bool isConstant, GlobalValue::LinkageTypes, + unsigned initSlot) {} +void BytecodeHandler::handleType( const Type* Ty ) {} +void BytecodeHandler::handleFunctionDeclaration( + const Type* FuncType) {} +void BytecodeHandler::handleModuleGlobalsEnd() { } +void BytecodeHandler::handleCompactionTableBegin() { } +void BytecodeHandler::handleCompactionTablePlane( unsigned Ty, + unsigned NumEntries) {} +void BytecodeHandler::handleCompactionTableType( unsigned i, unsigned TypSlot, + const Type* ) {} +void BytecodeHandler::handleCompactionTableValue( unsigned i, unsigned ValSlot, + const Type* ) {} +void BytecodeHandler::handleCompactionTableEnd() { } +void BytecodeHandler::handleSymbolTableBegin() { } +void BytecodeHandler::handleSymbolTablePlane( unsigned Ty, unsigned NumEntries, + const Type* Typ) { } +void BytecodeHandler::handleSymbolTableType( unsigned i, unsigned slot, + const std::string& name ) { } +void BytecodeHandler::handleSymbolTableValue( unsigned i, unsigned slot, + const std::string& name ) { } +void BytecodeHandler::handleSymbolTableEnd() { } +void BytecodeHandler::handleFunctionBegin( const Type* FType, + GlobalValue::LinkageTypes linkage ) { } +void BytecodeHandler::handleFunctionEnd( const Type* FType) { } +void BytecodeHandler::handleBasicBlockBegin( unsigned blocknum) { } +bool BytecodeHandler::handleInstruction( unsigned Opcode, const Type* iType, + std::vector& Operands, unsigned Size) { + return Instruction::isTerminator(Opcode); + } +void BytecodeHandler::handleBasicBlockEnd(unsigned blocknum) { } +void BytecodeHandler::handleGlobalConstantsBegin() { } +void BytecodeHandler::handleConstantExpression( unsigned Opcode, + const Type* Typ, std::vector > ArgVec ) { } +void BytecodeHandler::handleConstantValue( Constant * c ) { } +void BytecodeHandler::handleConstantArray( const ArrayType* AT, + std::vector& Elements ) { } +void BytecodeHandler::handleConstantStruct( const StructType* ST, + std::vector& ElementSlots) { } +void BytecodeHandler::handleConstantPointer( + const PointerType* PT, unsigned Slot) { } +void BytecodeHandler::handleConstantString( const ConstantArray* CA ) {} +void BytecodeHandler::handleGlobalConstantsEnd() {} +void BytecodeHandler::handleAlignment(unsigned numBytes) {} +void BytecodeHandler::handleBlock( + unsigned BType, const unsigned char* StartPtr, unsigned Size) {} +void BytecodeHandler::handleVBR32(unsigned Size ) {} +void BytecodeHandler::handleVBR64(unsigned Size ) {} + // vim: sw=2 diff --git a/lib/Bytecode/Reader/Parser.h b/lib/Bytecode/Reader/Parser.h index 38a14717d5e..08758cb5f29 100644 --- a/lib/Bytecode/Reader/Parser.h +++ b/lib/Bytecode/Reader/Parser.h @@ -41,7 +41,18 @@ class AbstractBytecodeParser { /// @name Constructors /// @{ public: - AbstractBytecodeParser( BytecodeHandler* h ) { handler = h; } + AbstractBytecodeParser( + BytecodeHandler* h, + bool repAlignment = false, + bool repBlocks = false, + bool repVBR = false + ) { + handler = h; + reportAlignment = repAlignment; + reportBlocks = repBlocks; + reportVBR = repVBR; + } + ~AbstractBytecodeParser() { } /// @} @@ -86,64 +97,72 @@ public: /// @{ protected: /// @brief Parse whole module scope - void ParseModule (BufPtr &Buf, BufPtr End); + void ParseModule (); /// @brief Parse the version information block - void ParseVersionInfo (BufPtr &Buf, BufPtr End); + void ParseVersionInfo (); /// @brief Parse the ModuleGlobalInfo block - void ParseModuleGlobalInfo (BufPtr &Buf, BufPtr End); + void ParseModuleGlobalInfo (); /// @brief Parse a symbol table - void ParseSymbolTable (BufPtr &Buf, BufPtr End); + void ParseSymbolTable (); /// This function parses LLVM functions lazily. It obtains the type of the /// function and records where the body of the function is in the bytecode /// buffer. The caller can then use the ParseNextFunction and /// ParseAllFunctionBodies to get handler events for the functions. /// @brief Parse functions lazily. - void ParseFunctionLazily (BufPtr &Buf, BufPtr End); + void ParseFunctionLazily (); /// @brief Parse a function body - void ParseFunctionBody (const Type* FType, BufPtr &Buf, BufPtr EndBuf); + void ParseFunctionBody (const Type* FType); /// @brief Parse a compaction table - void ParseCompactionTable (BufPtr &Buf, BufPtr End); + void ParseCompactionTable (); /// @brief Parse global types - void ParseGlobalTypes (BufPtr &Buf, BufPtr End); + void ParseGlobalTypes (); /// @brief Parse a basic block (for LLVM 1.0 basic block blocks) - void ParseBasicBlock (BufPtr &Buf, BufPtr End, unsigned BlockNo); + void ParseBasicBlock (unsigned BlockNo); /// @brief parse an instruction list (for post LLVM 1.0 instruction lists /// with blocks differentiated by terminating instructions. - unsigned ParseInstructionList(BufPtr &Buf, BufPtr End); + unsigned ParseInstructionList(); /// @brief Parse an instruction. - bool ParseInstruction (BufPtr &Buf, BufPtr End, - std::vector& Args); + bool ParseInstruction (std::vector& Args); /// @brief Parse a constant pool - void ParseConstantPool (BufPtr &Buf, BufPtr End, TypeListTy& List); + void ParseConstantPool (TypeListTy& List); /// @brief Parse a constant value - void ParseConstantValue (BufPtr &Buf, BufPtr End, unsigned TypeID); + void ParseConstantValue (unsigned TypeID); /// @brief Parse a block of types. - void ParseTypeConstants (BufPtr &Buf, BufPtr End, TypeListTy &Tab, - unsigned NumEntries); + void ParseTypeConstants (TypeListTy &Tab, unsigned NumEntries); /// @brief Parse a single type. - const Type *ParseTypeConstant(BufPtr &Buf, BufPtr End); + const Type *ParseTypeConstant(); /// @brief Parse a string constants block - void ParseStringConstants (BufPtr &Buf, BufPtr End, unsigned NumEntries); + void ParseStringConstants (unsigned NumEntries); /// @} /// @name Data /// @{ private: + BufPtr MemStart; ///< Start of the memory buffer + BufPtr MemEnd; ///< End of the memory buffer + BufPtr BlockStart; ///< Start of current block being parsed + BufPtr BlockEnd; ///< End of current block being parsed + BufPtr At; ///< Where we're currently parsing at + + bool reportAlignment; ///< Parser should report alignment? + bool reportBlocks; ///< Parser should report blocks? + bool reportVBR; ///< Report VBR compression events + // Information about the module, extracted from the bytecode revision number. unsigned char RevisionNum; // The rev # itself @@ -219,9 +238,25 @@ private: private: - static inline void readBlock(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned &Type, unsigned &Size) ; + /// Is there more to parse in the current block? + inline bool moreInBlock(); + + /// Have we read past the end of the block + inline void checkPastBlockEnd(const char * block_name); + + /// Align to 32 bits + inline void align32(); + + /// Reader interface + inline unsigned read_uint(); + inline unsigned read_vbr_uint(); + inline uint64_t read_vbr_uint64(); + inline int64_t read_vbr_int64(); + inline std::string read_str(); + inline void read_data(void *Ptr, void *End); + + /// Read a block header + inline void readBlock(unsigned &Type, unsigned &Size); const Type *AbstractBytecodeParser::getType(unsigned ID); /// getGlobalTableType - This is just like getType, but when a compaction @@ -443,7 +478,8 @@ public: virtual bool handleInstruction( unsigned Opcode, const Type* iType, - std::vector& Operands + std::vector& Operands, + unsigned Length ); /// @brief Handle the end of a basic block @@ -488,6 +524,16 @@ public: /// @brief Handle the end of the global constants virtual void handleGlobalConstantsEnd(); + /// @brief Handle an alignment event + virtual void handleAlignment(unsigned numBytes); + + virtual void handleBlock( + unsigned BType, ///< The type of block + const unsigned char* StartPtr, ///< The start of the block + unsigned Size ///< The size of the block + ); + virtual void handleVBR32(unsigned Size ); + virtual void handleVBR64(unsigned Size ); /// @} };