//===-- Parser.h - Abstract Interface To Bytecode Parsing -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file was developed by Reid Spencer and is distributed under the // University of Illinois Open Source License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This header file defines the interface to the Bytecode Parser and the // Bytecode Handler interface that it calls. // //===----------------------------------------------------------------------===// #ifndef BYTECODE_PARSER_H #define BYTECODE_PARSER_H #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalValue.h" #include "llvm/Module.h" #include #include #include namespace llvm { class BytecodeHandler; ///< Forward declare the handler interface /// This class defines the interface for parsing a buffer of bytecode. The /// parser itself takes no action except to call the various functions of /// the handler interface. The parser's sole responsibility is the correct /// interpretation of the bytecode buffer. The handler is responsible for /// instantiating and keeping track of all values. As a convenience, the parser /// is responsible for materializing types and will pass them through the /// handler interface as necessary. /// @see BytecodeHandler /// @brief Abstract Bytecode Parser interface class AbstractBytecodeParser { /// @name Constructors /// @{ public: AbstractBytecodeParser( BytecodeHandler* h, bool repAlignment = false, bool repBlocks = false, bool repVBR = false ) { handler = h; reportAlignment = repAlignment; reportBlocks = repBlocks; reportVBR = repVBR; } ~AbstractBytecodeParser() { } /// @} /// @name Types /// @{ public: /// @brief A convenience type for the buffer pointer typedef const unsigned char* BufPtr; /// @brief The type used for vector of potentially abstract types typedef std::vector TypeListTy; /// @brief /// @} /// @name Methods /// @{ public: /// @brief Main interface to parsing a bytecode buffer. void ParseBytecode(const unsigned char *Buf, unsigned Length, const std::string &ModuleID); /// The ParseBytecode method lazily parses functions. Use this /// method to cause the parser to actually parse all the function bodies /// in the bytecode buffer. /// @see ParseBytecode /// @brief Parse all function bodies void ParseAllFunctionBodies (); /// The Parsebytecode method lazily parses functions. Use this /// method to casue the parser to parse the next function of a given /// types. Note that this will remove the function from what is to be /// included by ParseAllFunctionBodies. /// @see ParseAllFunctionBodies /// @see ParseBytecode /// @brief Parse the next function of specific type void ParseNextFunction (Function* Func) ; /// @} /// @name Parsing Units For Subclasses /// @{ protected: /// @brief Parse whole module scope void ParseModule (); /// @brief Parse the version information block void ParseVersionInfo (); /// @brief Parse the ModuleGlobalInfo block void ParseModuleGlobalInfo (); /// @brief Parse a symbol table void ParseSymbolTable (); /// This function parses LLVM functions lazily. It obtains the type of the /// function and records where the body of the function is in the bytecode /// buffer. The caller can then use the ParseNextFunction and /// ParseAllFunctionBodies to get handler events for the functions. /// @brief Parse functions lazily. void ParseFunctionLazily (); /// @brief Parse a function body void ParseFunctionBody (Function* Func); /// @brief Parse a compaction table void ParseCompactionTable (); /// @brief Parse global types void ParseGlobalTypes (); /// @brief Parse a basic block (for LLVM 1.0 basic block blocks) void ParseBasicBlock (unsigned BlockNo); /// @brief parse an instruction list (for post LLVM 1.0 instruction lists /// with blocks differentiated by terminating instructions. unsigned ParseInstructionList(); /// @brief Parse an instruction. bool ParseInstruction (std::vector& Args); /// @brief Parse a constant pool void ParseConstantPool (TypeListTy& List); /// @brief Parse a constant value void ParseConstantValue (unsigned TypeID); /// @brief Parse a block of types. void ParseTypeConstants (TypeListTy &Tab, unsigned NumEntries); /// @brief Parse a single type. const Type *ParseTypeConstant(); /// @brief Parse a string constants block void ParseStringConstants (unsigned NumEntries); /// @} /// @name Data /// @{ private: BufPtr MemStart; ///< Start of the memory buffer BufPtr MemEnd; ///< End of the memory buffer BufPtr BlockStart; ///< Start of current block being parsed BufPtr BlockEnd; ///< End of current block being parsed BufPtr At; ///< Where we're currently parsing at bool reportAlignment; ///< Parser should report alignment? bool reportBlocks; ///< Parser should report blocks? bool reportVBR; ///< Report VBR compression events // Information about the module, extracted from the bytecode revision number. unsigned char RevisionNum; // The rev # itself // Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0) // Revision #0 had an explicit alignment of data only for the ModuleGlobalInfo // block. This was fixed to be like all other blocks in 1.2 bool hasInconsistentModuleGlobalInfo; // Revision #0 also explicitly encoded zero values for primitive types like // int/sbyte/etc. bool hasExplicitPrimitiveZeros; // Flags to control features specific the LLVM 1.2 and before (revision #1) // LLVM 1.2 and earlier required that getelementptr structure indices were // ubyte constants and that sequential type indices were longs. bool hasRestrictedGEPTypes; /// CompactionTable - If a compaction table is active in the current function, /// this is the mapping that it contains. std::vector CompactionTypeTable; // ConstantFwdRefs - This maintains a mapping between 's and // forward references to constants. Such values may be referenced before they // are defined, and if so, the temporary object that they represent is held // here. // typedef std::map, Constant*> ConstantRefsType; ConstantRefsType ConstantFwdRefs; // TypesLoaded - This vector mirrors the Values[TypeTyID] plane. It is used // to deal with forward references to types. // TypeListTy ModuleTypes; TypeListTy FunctionTypes; // When the ModuleGlobalInfo section is read, we create a FunctionType object // for each function in the module. When the function is loaded, this type is // used to instantiate the actual function object. std::vector FunctionSignatureList; // Constant values are read in after global variables. Because of this, we // must defer setting the initializers on global variables until after module // level constants have been read. In the mean time, this list keeps track of // what we must do. // std::vector > GlobalInits; /// @} /// @name Implementation Details /// @{ private: /// This stores the parser's handler. It makes virtual function calls through /// the BytecodeHandler to notify the handler of parsing events. What the /// handler does with the events is completely orthogonal to the business of /// parsing the bytecode. /// @brief The handler of bytecode parsing events. BytecodeHandler* handler; /// For lazy reading-in of functions, we need to save away several pieces of /// information about each function: its begin and end pointer in the buffer /// and its FunctionSlot. struct LazyFunctionInfo { const unsigned char *Buf, *EndBuf; LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0) : Buf(B), EndBuf(EB) {} }; typedef std::map LazyFunctionMap; LazyFunctionMap LazyFunctionLoadMap; private: /// Is there more to parse in the current block? inline bool moreInBlock(); /// Have we read past the end of the block inline void checkPastBlockEnd(const char * block_name); /// Align to 32 bits inline void align32(); /// Reader interface inline unsigned read_uint(); inline unsigned read_vbr_uint(); inline uint64_t read_vbr_uint64(); inline int64_t read_vbr_int64(); inline std::string read_str(); inline void read_data(void *Ptr, void *End); /// Read a block header inline void readBlock(unsigned &Type, unsigned &Size); const Type *AbstractBytecodeParser::getType(unsigned ID); /// getGlobalTableType - This is just like getType, but when a compaction /// table is in use, it is ignored. Also, no forward references or other /// fancy features are supported. const Type *getGlobalTableType(unsigned Slot) { if (Slot < Type::FirstDerivedTyID) { const Type *Ty = Type::getPrimitiveType((Type::PrimitiveID)Slot); assert(Ty && "Not a primitive type ID?"); return Ty; } Slot -= Type::FirstDerivedTyID; if (Slot >= ModuleTypes.size()) throw std::string("Illegal compaction table type reference!"); return ModuleTypes[Slot]; } unsigned getGlobalTableTypeSlot(const Type *Ty) { if (Ty->isPrimitiveType()) return Ty->getPrimitiveID(); TypeListTy::iterator I = find(ModuleTypes.begin(), ModuleTypes.end(), Ty); if (I == ModuleTypes.end()) throw std::string("Didn't find type in ModuleTypes."); return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); } AbstractBytecodeParser(const AbstractBytecodeParser &); // DO NOT IMPLEMENT void operator=(const AbstractBytecodeParser &); // DO NOT IMPLEMENT /// @} }; /// This class provides the interface for the handling bytecode events during /// parsing. The methods on this interface are invoked by the /// AbstractBytecodeParser as it discovers the content of a bytecode stream. /// This class provides a a clear separation of concerns between recognizing /// the semantic units of a bytecode file and deciding what to do with them. /// The AbstractBytecodeParser recognizes the content of the bytecode file and /// calls the BytecodeHandler methods to determine what should be done. This /// arrangement allows Bytecode files to be read and handled for a number of /// purposes simply by creating a subclass of BytecodeHandler. None of the /// parsing details need to be understood, only the meaning of the calls /// made on this interface. /// /// Another paradigm that uses this design pattern is the XML SAX Parser. The /// ContentHandler for SAX plays the same role as the BytecodeHandler here. /// @see AbstractbytecodeParser /// @brief Handle Bytecode Parsing Events class BytecodeHandler { /// @name Constructors And Operators /// @{ public: /// @brief Default constructor (empty) BytecodeHandler() {} /// @brief Virtual destructor (empty) virtual ~BytecodeHandler() {} private: BytecodeHandler(const BytecodeHandler &); // DO NOT IMPLEMENT void operator=(const BytecodeHandler &); // DO NOT IMPLEMENT /// @} /// @name Handler Methods /// @{ public: /// This method is called whenever the parser detects an error in the /// bytecode formatting. Returning true will cause the parser to keep /// going, however this is inadvisable in most cases. Returning false will /// cause the parser to throw the message as a std::string. /// @brief Handle parsing errors. virtual bool handleError(const std::string& str ); /// This method is called at the beginning of a parse before anything is /// read in order to give the handler a chance to initialize. /// @brief Handle the start of a bytecode parse virtual void handleStart(); /// This method is called at the end of a parse after everything has been /// read in order to give the handler a chance to terminate. /// @brief Handle the end of a bytecode parse virtual void handleFinish(); /// This method is called at the start of a module to indicate that a /// module is being parsed. /// @brief Handle the start of a module. virtual void handleModuleBegin(const std::string& id); /// This method is called at the end of a module to indicate that the module /// previously being parsed has concluded. /// @brief Handle the end of a module. virtual void handleModuleEnd(const std::string& id); /// This method is called once the version information has been parsed. It /// provides the information about the version of the bytecode file being /// read. /// @brief Handle the bytecode prolog virtual void handleVersionInfo( unsigned char RevisionNum, ///< Byte code revision number Module::Endianness Endianness, ///< Endianness indicator Module::PointerSize PointerSize ///< PointerSize indicator ); /// This method is called at the start of a module globals block which /// contains the global variables and the function placeholders virtual void handleModuleGlobalsBegin(); /// This method is called when a non-initialized global variable is /// recognized. Its type, constness, and linkage type are provided. /// @brief Handle a non-initialized global variable virtual void handleGlobalVariable( const Type* ElemType, ///< The type of the global variable bool isConstant, ///< Whether the GV is constant or not GlobalValue::LinkageTypes ///< The linkage type of the GV ); /// This method is called when an initialized global variable is recognized. /// Its type constness, linkage type, and the slot number of the initializer /// are provided. /// @brief Handle an intialized global variable. virtual void handleInitializedGV( const Type* ElemType, ///< The type of the global variable bool isConstant, ///< Whether the GV is constant or not GlobalValue::LinkageTypes,///< The linkage type of the GV unsigned initSlot ///< Slot number of GV's initializer ); /// This method is called when a new type is recognized. The type is /// converted from the bytecode and passed to this method. /// @brief Handle a type virtual void handleType( const Type* Ty ); /// This method is called when the function prototype for a function is /// encountered in the module globals block. virtual void handleFunctionDeclaration( Function* Func, const FunctionType* FuncType ///< The type of the function ); /// This method is called at the end of the module globals block. /// @brief Handle end of module globals block. virtual void handleModuleGlobalsEnd(); /// This method is called at the beginning of a compaction table. /// @brief Handle start of compaction table. virtual void handleCompactionTableBegin(); /// @brief Handle start of a compaction table plane virtual void handleCompactionTablePlane( unsigned Ty, unsigned NumEntries ); /// @brief Handle a type entry in the compaction table virtual void handleCompactionTableType( unsigned i, unsigned TypSlot, const Type* ); /// @brief Handle a value entry in the compaction table virtual void handleCompactionTableValue( unsigned i, unsigned ValSlot, const Type* ); /// @brief Handle end of a compaction table virtual void handleCompactionTableEnd(); /// @brief Handle start of a symbol table virtual void handleSymbolTableBegin(); /// @brief Handle start of a symbol table plane virtual void handleSymbolTablePlane( unsigned Ty, unsigned NumEntries, const Type* Ty ); /// @brief Handle a named type in the symbol table virtual void handleSymbolTableType( unsigned i, unsigned slot, const std::string& name ); /// @brief Handle a named value in the symbol table virtual void handleSymbolTableValue( unsigned i, unsigned slot, const std::string& name ); /// @brief Handle the end of a symbol table virtual void handleSymbolTableEnd(); /// @brief Handle the beginning of a function body virtual void handleFunctionBegin( Function* Func, unsigned Size ); /// @brief Handle the end of a function body virtual void handleFunctionEnd( Function* Func ); /// @brief Handle the beginning of a basic block virtual void handleBasicBlockBegin( unsigned blocknum ); /// This method is called for each instruction that is parsed. /// @returns true if the instruction is a block terminating instruction /// @brief Handle an instruction virtual bool handleInstruction( unsigned Opcode, const Type* iType, std::vector& Operands, unsigned Length ); /// @brief Handle the end of a basic block virtual void handleBasicBlockEnd(unsigned blocknum); /// @brief Handle start of global constants block. virtual void handleGlobalConstantsBegin(); /// @brief Handle a constant expression virtual void handleConstantExpression( unsigned Opcode, const Type* Typ, std::vector > ArgVec ); /// @brief Handle a constant array virtual void handleConstantArray( const ArrayType* AT, std::vector& ElementSlots ); /// @brief Handle a constant structure virtual void handleConstantStruct( const StructType* ST, std::vector& ElementSlots ); /// @brief Handle a constant pointer virtual void handleConstantPointer( const PointerType* PT, unsigned Slot ); /// @brief Handle a constant strings (array special case) virtual void handleConstantString( const ConstantArray* CA ); /// @brief Handle a primitive constant value virtual void handleConstantValue( Constant * c ); /// @brief Handle the end of the global constants virtual void handleGlobalConstantsEnd(); /// @brief Handle an alignment event virtual void handleAlignment(unsigned numBytes); virtual void handleBlock( unsigned BType, ///< The type of block const unsigned char* StartPtr, ///< The start of the block unsigned Size ///< The size of the block ); virtual void handleVBR32(unsigned Size ); virtual void handleVBR64(unsigned Size ); /// @} }; } // End llvm namespace // vim: sw=2 #endif