diff --git a/lib/Bytecode/Analyzer/BytecodeHandler.h b/lib/Bytecode/Analyzer/BytecodeHandler.h deleted file mode 100644 index 2b03e2d3320..00000000000 --- a/lib/Bytecode/Analyzer/BytecodeHandler.h +++ /dev/null @@ -1,247 +0,0 @@ -//===-- BytecodeHandler.h - Parsing Handler ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file was developed by Reid Spencer and is distributed under the -// University of Illinois Open Source License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This header file defines the BytecodeHandler class that gets called by the -// AbstractBytecodeParser when parsing events occur. -// -//===----------------------------------------------------------------------===// - -#ifndef BYTECODE_HANDLER_H -#define BYTECODE_HANDLER_H - -#include "llvm/Module.h" -#include "llvm/GlobalValue.h" -#include - -namespace llvm { - -class ArrayType; -class StructType; -class PointerType; -class ConstantArray; - -/// This class provides the interface for the handling bytecode events during -/// parsing. The methods on this interface are invoked by the -/// AbstractBytecodeParser as it discovers the content of a bytecode stream. -/// This class provides a a clear separation of concerns between recognizing -/// the semantic units of a bytecode file and deciding what to do with them. -/// The AbstractBytecodeParser recognizes the content of the bytecode file and -/// calls the BytecodeHandler methods to determine what should be done. This -/// arrangement allows Bytecode files to be read and handled for a number of -/// purposes simply by creating a subclass of BytecodeHandler. None of the -/// parsing details need to be understood, only the meaning of the calls -/// made on this interface. -/// -/// Another paradigm that uses this design pattern is the XML SAX Parser. The -/// ContentHandler for SAX plays the same role as the BytecodeHandler here. -/// @brief Handle Bytecode Parsing Events -class BytecodeHandler { - -/// @name Constructors And Operators -/// @{ -public: - /// @brief Default constructor (empty) - BytecodeHandler() {} - /// @brief Virtual destructor (empty) - virtual ~BytecodeHandler() {} - -private: - BytecodeHandler(const BytecodeHandler &); // DO NOT IMPLEMENT - void operator=(const BytecodeHandler &); // DO NOT IMPLEMENT - -/// @} -/// @name Handler Methods -/// @{ -public: - - /// This method is called whenever the parser detects an error in the - /// bytecode formatting. Returning true will cause the parser to keep - /// going, however this is inadvisable in most cases. Returning false will - /// cause the parser to throw the message as a std::string. - /// @brief Handle parsing errors. - virtual bool handleError(const std::string& str ); - - /// This method is called at the beginning of a parse before anything is - /// read in order to give the handler a chance to initialize. - /// @brief Handle the start of a bytecode parse - virtual void handleStart(); - - /// This method is called at the end of a parse after everything has been - /// read in order to give the handler a chance to terminate. - /// @brief Handle the end of a bytecode parse - virtual void handleFinish(); - - /// This method is called at the start of a module to indicate that a - /// module is being parsed. - /// @brief Handle the start of a module. - virtual void handleModuleBegin(const std::string& id); - - /// This method is called at the end of a module to indicate that the module - /// previously being parsed has concluded. - /// @brief Handle the end of a module. - virtual void handleModuleEnd(const std::string& id); - - /// This method is called once the version information has been parsed. It - /// provides the information about the version of the bytecode file being - /// read. - /// @brief Handle the bytecode prolog - virtual void handleVersionInfo( - unsigned char RevisionNum, ///< Byte code revision number - Module::Endianness Endianness, ///< Endianness indicator - Module::PointerSize PointerSize ///< PointerSize indicator - ); - - /// This method is called at the start of a module globals block which - /// contains the global variables and the function placeholders - virtual void handleModuleGlobalsBegin(); - - /// This method is called when a non-initialized global variable is - /// recognized. Its type, constness, and linkage type are provided. - /// @brief Handle a non-initialized global variable - virtual void handleGlobalVariable( - const Type* ElemType, ///< The type of the global variable - bool isConstant, ///< Whether the GV is constant or not - GlobalValue::LinkageTypes ///< The linkage type of the GV - ); - - /// This method is called when an initialized global variable is recognized. - /// Its type constness, linkage type, and the slot number of the initializer - /// are provided. - /// @brief Handle an intialized global variable. - virtual void handleInitializedGV( - const Type* ElemType, ///< The type of the global variable - bool isConstant, ///< Whether the GV is constant or not - GlobalValue::LinkageTypes,///< The linkage type of the GV - unsigned initSlot ///< Slot number of GV's initializer - ); - - /// This method is called when a new type is recognized. The type is - /// converted from the bytecode and passed to this method. - /// @brief Handle a type - virtual void handleType( const Type* Ty ); - - /// This method is called when the function prototype for a function is - /// encountered in the module globals block. - virtual void handleFunctionDeclaration( - const Type* FuncType ///< The type of the function - ); - - /// This method is called at the end of the module globals block. - /// @brief Handle end of module globals block. - virtual void handleModuleGlobalsEnd(); - - /// This method is called at the beginning of a compaction table. - /// @brief Handle start of compaction table. - virtual void handleCompactionTableBegin(); - virtual void handleCompactionTablePlane( - unsigned Ty, - unsigned NumEntries - ); - - virtual void handleCompactionTableType( - unsigned i, - unsigned TypSlot, - const Type* - ); - - virtual void handleCompactionTableValue( - unsigned i, - unsigned ValSlot, - const Type* - ); - - virtual void handleCompactionTableEnd(); - - virtual void handleSymbolTableBegin(); - - virtual void handleSymbolTablePlane( - unsigned Ty, - unsigned NumEntries, - const Type* Ty - ); - - virtual void handleSymbolTableType( - unsigned i, - unsigned slot, - const std::string& name - ); - - virtual void handleSymbolTableValue( - unsigned i, - unsigned slot, - const std::string& name - ); - - virtual void handleSymbolTableEnd(); - - virtual void handleFunctionBegin( - const Type* FType, - GlobalValue::LinkageTypes linkage - ); - - virtual void handleFunctionEnd( - const Type* FType - ); - - virtual void handleBasicBlockBegin( - unsigned blocknum - ); - - /// This method is called for each instruction that is parsed. - /// @returns true if the instruction is a block terminating instruction - /// @brief Handle an instruction - virtual bool handleInstruction( - unsigned Opcode, - const Type* iType, - std::vector& Operands - ); - - /// This method is called for each block that is parsed. - virtual void handleBasicBlockEnd(unsigned blocknum); - /// This method is called at the start of the global constants block. - /// @brief Handle start of global constants block. - virtual void handleGlobalConstantsBegin(); - - virtual void handleConstantExpression( - unsigned Opcode, - const Type* Typ, - std::vector > ArgVec - ); - - virtual void handleConstantArray( - const ArrayType* AT, - std::vector& ElementSlots - ); - - virtual void handleConstantStruct( - const StructType* ST, - std::vector& ElementSlots - ); - - virtual void handleConstantPointer( - const PointerType* PT, - unsigned Slot - ); - - virtual void handleConstantString( - const ConstantArray* CA - ); - - virtual void handleConstantValue( Constant * c ); - virtual void handleGlobalConstantsEnd(); - -/// @} - -}; - -} // End llvm namespace - -#endif - -// vim: sw=2 diff --git a/lib/Bytecode/Analyzer/Parser.h b/lib/Bytecode/Analyzer/Parser.h index 027047b3f8d..38a14717d5e 100644 --- a/lib/Bytecode/Analyzer/Parser.h +++ b/lib/Bytecode/Analyzer/Parser.h @@ -1,4 +1,4 @@ -//===-- Parser.h - Definitions internal to the reader -----------*- C++ -*-===// +//===-- Parser.h - Abstract Interface To Bytecode Parsing -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,45 +7,142 @@ // //===----------------------------------------------------------------------===// // -// This header file defines the interface to the Bytecode Parser +// This header file defines the interface to the Bytecode Parser and the +// Bytecode Handler interface that it calls. // //===----------------------------------------------------------------------===// #ifndef BYTECODE_PARSER_H #define BYTECODE_PARSER_H -#include "ReaderPrimitives.h" -#include "BytecodeHandler.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/GlobalValue.h" +#include "llvm/Module.h" #include #include #include namespace llvm { -struct LazyFunctionInfo { - const unsigned char *Buf, *EndBuf; - LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0) - : Buf(B), EndBuf(EB) {} -}; - -typedef std::map LazyFunctionMap; +class BytecodeHandler; ///< Forward declare the handler interface +/// This class defines the interface for parsing a buffer of bytecode. The +/// parser itself takes no action except to call the various functions of +/// the handler interface. The parser's sole responsibility is the correct +/// interpretation of the bytecode buffer. The handler is responsible for +/// instantiating and keeping track of all values. As a convenience, the parser +/// is responsible for materializing types and will pass them through the +/// handler interface as necessary. +/// @see BytecodeHandler +/// @brief Abstract Bytecode Parser interface class AbstractBytecodeParser { - AbstractBytecodeParser(const AbstractBytecodeParser &); // DO NOT IMPLEMENT - void operator=(const AbstractBytecodeParser &); // DO NOT IMPLEMENT + +/// @name Constructors +/// @{ public: AbstractBytecodeParser( BytecodeHandler* h ) { handler = h; } ~AbstractBytecodeParser() { } +/// @} +/// @name Types +/// @{ +public: + /// @brief A convenience type for the buffer pointer + typedef const unsigned char* BufPtr; + + /// @brief The type used for vector of potentially abstract types + typedef std::vector TypeListTy; + + /// @brief + +/// @} +/// @name Methods +/// @{ +public: + + /// @brief Main interface to parsing a bytecode buffer. void ParseBytecode(const unsigned char *Buf, unsigned Length, const std::string &ModuleID); - void dump() const { - std::cerr << "AbstractBytecodeParser instance!\n"; - } + /// The ParseBytecode method lazily parses functions. Use this + /// method to cause the parser to actually parse all the function bodies + /// in the bytecode buffer. + /// @see ParseBytecode + /// @brief Parse all function bodies + void ParseAllFunctionBodies (); + /// The Parsebytecode method lazily parses functions. Use this + /// method to casue the parser to parse the next function of a given + /// types. Note that this will remove the function from what is to be + /// included by ParseAllFunctionBodies. + /// @see ParseAllFunctionBodies + /// @see ParseBytecode + /// @brief Parse the next function of specific type + void ParseNextFunction (Type* FType) ; + +/// @} +/// @name Parsing Units For Subclasses +/// @{ +protected: + /// @brief Parse whole module scope + void ParseModule (BufPtr &Buf, BufPtr End); + + /// @brief Parse the version information block + void ParseVersionInfo (BufPtr &Buf, BufPtr End); + + /// @brief Parse the ModuleGlobalInfo block + void ParseModuleGlobalInfo (BufPtr &Buf, BufPtr End); + + /// @brief Parse a symbol table + void ParseSymbolTable (BufPtr &Buf, BufPtr End); + + /// This function parses LLVM functions lazily. It obtains the type of the + /// function and records where the body of the function is in the bytecode + /// buffer. The caller can then use the ParseNextFunction and + /// ParseAllFunctionBodies to get handler events for the functions. + /// @brief Parse functions lazily. + void ParseFunctionLazily (BufPtr &Buf, BufPtr End); + + /// @brief Parse a function body + void ParseFunctionBody (const Type* FType, BufPtr &Buf, BufPtr EndBuf); + + /// @brief Parse a compaction table + void ParseCompactionTable (BufPtr &Buf, BufPtr End); + + /// @brief Parse global types + void ParseGlobalTypes (BufPtr &Buf, BufPtr End); + + /// @brief Parse a basic block (for LLVM 1.0 basic block blocks) + void ParseBasicBlock (BufPtr &Buf, BufPtr End, unsigned BlockNo); + + /// @brief parse an instruction list (for post LLVM 1.0 instruction lists + /// with blocks differentiated by terminating instructions. + unsigned ParseInstructionList(BufPtr &Buf, BufPtr End); + + /// @brief Parse an instruction. + bool ParseInstruction (BufPtr &Buf, BufPtr End, + std::vector& Args); + + /// @brief Parse a constant pool + void ParseConstantPool (BufPtr &Buf, BufPtr End, TypeListTy& List); + + /// @brief Parse a constant value + void ParseConstantValue (BufPtr &Buf, BufPtr End, unsigned TypeID); + + /// @brief Parse a block of types. + void ParseTypeConstants (BufPtr &Buf, BufPtr End, TypeListTy &Tab, + unsigned NumEntries); + + /// @brief Parse a single type. + const Type *ParseTypeConstant(BufPtr &Buf, BufPtr End); + + /// @brief Parse a string constants block + void ParseStringConstants (BufPtr &Buf, BufPtr End, unsigned NumEntries); + +/// @} +/// @name Data +/// @{ private: // Information about the module, extracted from the bytecode revision number. unsigned char RevisionNum; // The rev # itself @@ -82,13 +179,13 @@ private: // TypesLoaded - This vector mirrors the Values[TypeTyID] plane. It is used // to deal with forward references to types. // - typedef std::vector TypeListTy; TypeListTy ModuleTypes; TypeListTy FunctionTypes; // When the ModuleGlobalInfo section is read, we create a FunctionType object // for each function in the module. When the function is loaded, this type is // used to instantiate the actual function object. + std::vector FunctionSignatureList; // Constant values are read in after global variables. Because of this, we @@ -98,16 +195,34 @@ private: // std::vector > GlobalInits; - // For lazy reading-in of functions, we need to save away several pieces of - // information about each function: its begin and end pointer in the buffer - // and its FunctionSlot. - // - LazyFunctionMap LazyFunctionLoadMap; - - /// The handler for parsing +/// @} +/// @name Implementation Details +/// @{ +private: + /// This stores the parser's handler. It makes virtual function calls through + /// the BytecodeHandler to notify the handler of parsing events. What the + /// handler does with the events is completely orthogonal to the business of + /// parsing the bytecode. + /// @brief The handler of bytecode parsing events. BytecodeHandler* handler; + /// For lazy reading-in of functions, we need to save away several pieces of + /// information about each function: its begin and end pointer in the buffer + /// and its FunctionSlot. + struct LazyFunctionInfo { + const unsigned char *Buf, *EndBuf; + LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0) + : Buf(B), EndBuf(EB) {} + }; + typedef std::map LazyFunctionMap; + LazyFunctionMap LazyFunctionLoadMap; + private: + + static inline void readBlock(const unsigned char *&Buf, + const unsigned char *EndBuf, + unsigned &Type, unsigned &Size) ; + const Type *AbstractBytecodeParser::getType(unsigned ID); /// getGlobalTableType - This is just like getType, but when a compaction /// table is in use, it is ignored. Also, no forward references or other @@ -134,45 +249,250 @@ private: return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); } + AbstractBytecodeParser(const AbstractBytecodeParser &); // DO NOT IMPLEMENT + void operator=(const AbstractBytecodeParser &); // DO NOT IMPLEMENT + +/// @} +}; + +/// This class provides the interface for the handling bytecode events during +/// parsing. The methods on this interface are invoked by the +/// AbstractBytecodeParser as it discovers the content of a bytecode stream. +/// This class provides a a clear separation of concerns between recognizing +/// the semantic units of a bytecode file and deciding what to do with them. +/// The AbstractBytecodeParser recognizes the content of the bytecode file and +/// calls the BytecodeHandler methods to determine what should be done. This +/// arrangement allows Bytecode files to be read and handled for a number of +/// purposes simply by creating a subclass of BytecodeHandler. None of the +/// parsing details need to be understood, only the meaning of the calls +/// made on this interface. +/// +/// Another paradigm that uses this design pattern is the XML SAX Parser. The +/// ContentHandler for SAX plays the same role as the BytecodeHandler here. +/// @see AbstractbytecodeParser +/// @brief Handle Bytecode Parsing Events +class BytecodeHandler { + +/// @name Constructors And Operators +/// @{ public: - typedef const unsigned char* BufPtr; - void ParseModule (BufPtr &Buf, BufPtr End); - void ParseNextFunction (Type* FType) ; - void ParseAllFunctionBodies (); + /// @brief Default constructor (empty) + BytecodeHandler() {} + /// @brief Virtual destructor (empty) + virtual ~BytecodeHandler() {} private: - void ParseVersionInfo (BufPtr &Buf, BufPtr End); - void ParseModuleGlobalInfo (BufPtr &Buf, BufPtr End); - void ParseSymbolTable (BufPtr &Buf, BufPtr End); - void ParseFunctionLazily (BufPtr &Buf, BufPtr End); - void ParseFunctionBody (const Type* FType, BufPtr &Buf, BufPtr EndBuf); - void ParseCompactionTable (BufPtr &Buf, BufPtr End); - void ParseGlobalTypes (BufPtr &Buf, BufPtr End); + BytecodeHandler(const BytecodeHandler &); // DO NOT IMPLEMENT + void operator=(const BytecodeHandler &); // DO NOT IMPLEMENT - void ParseBasicBlock (BufPtr &Buf, BufPtr End, unsigned BlockNo); - unsigned ParseInstructionList(BufPtr &Buf, BufPtr End); - - bool ParseInstruction (BufPtr &Buf, BufPtr End, - std::vector& Args); +/// @} +/// @name Handler Methods +/// @{ +public: - void ParseConstantPool (BufPtr &Buf, BufPtr End, TypeListTy& List); - void ParseConstantValue (BufPtr &Buf, BufPtr End, unsigned TypeID); - void ParseTypeConstants (BufPtr &Buf, BufPtr End, TypeListTy &Tab, - unsigned NumEntries); - const Type *ParseTypeConstant(BufPtr &Buf, BufPtr End); - void ParseStringConstants (BufPtr &Buf, BufPtr End, unsigned NumEntries); + /// This method is called whenever the parser detects an error in the + /// bytecode formatting. Returning true will cause the parser to keep + /// going, however this is inadvisable in most cases. Returning false will + /// cause the parser to throw the message as a std::string. + /// @brief Handle parsing errors. + virtual bool handleError(const std::string& str ); + + /// This method is called at the beginning of a parse before anything is + /// read in order to give the handler a chance to initialize. + /// @brief Handle the start of a bytecode parse + virtual void handleStart(); + + /// This method is called at the end of a parse after everything has been + /// read in order to give the handler a chance to terminate. + /// @brief Handle the end of a bytecode parse + virtual void handleFinish(); + + /// This method is called at the start of a module to indicate that a + /// module is being parsed. + /// @brief Handle the start of a module. + virtual void handleModuleBegin(const std::string& id); + + /// This method is called at the end of a module to indicate that the module + /// previously being parsed has concluded. + /// @brief Handle the end of a module. + virtual void handleModuleEnd(const std::string& id); + + /// This method is called once the version information has been parsed. It + /// provides the information about the version of the bytecode file being + /// read. + /// @brief Handle the bytecode prolog + virtual void handleVersionInfo( + unsigned char RevisionNum, ///< Byte code revision number + Module::Endianness Endianness, ///< Endianness indicator + Module::PointerSize PointerSize ///< PointerSize indicator + ); + + /// This method is called at the start of a module globals block which + /// contains the global variables and the function placeholders + virtual void handleModuleGlobalsBegin(); + + /// This method is called when a non-initialized global variable is + /// recognized. Its type, constness, and linkage type are provided. + /// @brief Handle a non-initialized global variable + virtual void handleGlobalVariable( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes ///< The linkage type of the GV + ); + + /// This method is called when an initialized global variable is recognized. + /// Its type constness, linkage type, and the slot number of the initializer + /// are provided. + /// @brief Handle an intialized global variable. + virtual void handleInitializedGV( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes,///< The linkage type of the GV + unsigned initSlot ///< Slot number of GV's initializer + ); + + /// This method is called when a new type is recognized. The type is + /// converted from the bytecode and passed to this method. + /// @brief Handle a type + virtual void handleType( const Type* Ty ); + + /// This method is called when the function prototype for a function is + /// encountered in the module globals block. + virtual void handleFunctionDeclaration( + const Type* FuncType ///< The type of the function + ); + + /// This method is called at the end of the module globals block. + /// @brief Handle end of module globals block. + virtual void handleModuleGlobalsEnd(); + + /// This method is called at the beginning of a compaction table. + /// @brief Handle start of compaction table. + virtual void handleCompactionTableBegin(); + + /// @brief Handle start of a compaction table plane + virtual void handleCompactionTablePlane( + unsigned Ty, + unsigned NumEntries + ); + + + /// @brief Handle a type entry in the compaction table + virtual void handleCompactionTableType( + unsigned i, + unsigned TypSlot, + const Type* + ); + + /// @brief Handle a value entry in the compaction table + virtual void handleCompactionTableValue( + unsigned i, + unsigned ValSlot, + const Type* + ); + + /// @brief Handle end of a compaction table + virtual void handleCompactionTableEnd(); + + /// @brief Handle start of a symbol table + virtual void handleSymbolTableBegin(); + + /// @brief Handle start of a symbol table plane + virtual void handleSymbolTablePlane( + unsigned Ty, + unsigned NumEntries, + const Type* Ty + ); + + /// @brief Handle a named type in the symbol table + virtual void handleSymbolTableType( + unsigned i, + unsigned slot, + const std::string& name + ); + + /// @brief Handle a named value in the symbol table + virtual void handleSymbolTableValue( + unsigned i, + unsigned slot, + const std::string& name + ); + + /// @brief Handle the end of a symbol table + virtual void handleSymbolTableEnd(); + + /// @brief Handle the beginning of a function body + virtual void handleFunctionBegin( + const Type* FType, + GlobalValue::LinkageTypes linkage + ); + + /// @brief Handle the end of a function body + virtual void handleFunctionEnd( + const Type* FType + ); + + /// @brief Handle the beginning of a basic block + virtual void handleBasicBlockBegin( + unsigned blocknum + ); + + /// This method is called for each instruction that is parsed. + /// @returns true if the instruction is a block terminating instruction + /// @brief Handle an instruction + virtual bool handleInstruction( + unsigned Opcode, + const Type* iType, + std::vector& Operands + ); + + /// @brief Handle the end of a basic block + virtual void handleBasicBlockEnd(unsigned blocknum); + + /// @brief Handle start of global constants block. + virtual void handleGlobalConstantsBegin(); + + /// @brief Handle a constant expression + virtual void handleConstantExpression( + unsigned Opcode, + const Type* Typ, + std::vector > ArgVec + ); + + /// @brief Handle a constant array + virtual void handleConstantArray( + const ArrayType* AT, + std::vector& ElementSlots + ); + + /// @brief Handle a constant structure + virtual void handleConstantStruct( + const StructType* ST, + std::vector& ElementSlots + ); + + /// @brief Handle a constant pointer + virtual void handleConstantPointer( + const PointerType* PT, + unsigned Slot + ); + + /// @brief Handle a constant strings (array special case) + virtual void handleConstantString( + const ConstantArray* CA + ); + + /// @brief Handle a primitive constant value + virtual void handleConstantValue( Constant * c ); + + /// @brief Handle the end of the global constants + virtual void handleGlobalConstantsEnd(); + +/// @} }; - -static inline void readBlock(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned &Type, unsigned &Size) { - Type = read(Buf, EndBuf); - Size = read(Buf, EndBuf); -} - } // End llvm namespace -#endif // vim: sw=2 +#endif diff --git a/lib/Bytecode/Reader/Parser.h b/lib/Bytecode/Reader/Parser.h index 027047b3f8d..38a14717d5e 100644 --- a/lib/Bytecode/Reader/Parser.h +++ b/lib/Bytecode/Reader/Parser.h @@ -1,4 +1,4 @@ -//===-- Parser.h - Definitions internal to the reader -----------*- C++ -*-===// +//===-- Parser.h - Abstract Interface To Bytecode Parsing -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,45 +7,142 @@ // //===----------------------------------------------------------------------===// // -// This header file defines the interface to the Bytecode Parser +// This header file defines the interface to the Bytecode Parser and the +// Bytecode Handler interface that it calls. // //===----------------------------------------------------------------------===// #ifndef BYTECODE_PARSER_H #define BYTECODE_PARSER_H -#include "ReaderPrimitives.h" -#include "BytecodeHandler.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/GlobalValue.h" +#include "llvm/Module.h" #include #include #include namespace llvm { -struct LazyFunctionInfo { - const unsigned char *Buf, *EndBuf; - LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0) - : Buf(B), EndBuf(EB) {} -}; - -typedef std::map LazyFunctionMap; +class BytecodeHandler; ///< Forward declare the handler interface +/// This class defines the interface for parsing a buffer of bytecode. The +/// parser itself takes no action except to call the various functions of +/// the handler interface. The parser's sole responsibility is the correct +/// interpretation of the bytecode buffer. The handler is responsible for +/// instantiating and keeping track of all values. As a convenience, the parser +/// is responsible for materializing types and will pass them through the +/// handler interface as necessary. +/// @see BytecodeHandler +/// @brief Abstract Bytecode Parser interface class AbstractBytecodeParser { - AbstractBytecodeParser(const AbstractBytecodeParser &); // DO NOT IMPLEMENT - void operator=(const AbstractBytecodeParser &); // DO NOT IMPLEMENT + +/// @name Constructors +/// @{ public: AbstractBytecodeParser( BytecodeHandler* h ) { handler = h; } ~AbstractBytecodeParser() { } +/// @} +/// @name Types +/// @{ +public: + /// @brief A convenience type for the buffer pointer + typedef const unsigned char* BufPtr; + + /// @brief The type used for vector of potentially abstract types + typedef std::vector TypeListTy; + + /// @brief + +/// @} +/// @name Methods +/// @{ +public: + + /// @brief Main interface to parsing a bytecode buffer. void ParseBytecode(const unsigned char *Buf, unsigned Length, const std::string &ModuleID); - void dump() const { - std::cerr << "AbstractBytecodeParser instance!\n"; - } + /// The ParseBytecode method lazily parses functions. Use this + /// method to cause the parser to actually parse all the function bodies + /// in the bytecode buffer. + /// @see ParseBytecode + /// @brief Parse all function bodies + void ParseAllFunctionBodies (); + /// The Parsebytecode method lazily parses functions. Use this + /// method to casue the parser to parse the next function of a given + /// types. Note that this will remove the function from what is to be + /// included by ParseAllFunctionBodies. + /// @see ParseAllFunctionBodies + /// @see ParseBytecode + /// @brief Parse the next function of specific type + void ParseNextFunction (Type* FType) ; + +/// @} +/// @name Parsing Units For Subclasses +/// @{ +protected: + /// @brief Parse whole module scope + void ParseModule (BufPtr &Buf, BufPtr End); + + /// @brief Parse the version information block + void ParseVersionInfo (BufPtr &Buf, BufPtr End); + + /// @brief Parse the ModuleGlobalInfo block + void ParseModuleGlobalInfo (BufPtr &Buf, BufPtr End); + + /// @brief Parse a symbol table + void ParseSymbolTable (BufPtr &Buf, BufPtr End); + + /// This function parses LLVM functions lazily. It obtains the type of the + /// function and records where the body of the function is in the bytecode + /// buffer. The caller can then use the ParseNextFunction and + /// ParseAllFunctionBodies to get handler events for the functions. + /// @brief Parse functions lazily. + void ParseFunctionLazily (BufPtr &Buf, BufPtr End); + + /// @brief Parse a function body + void ParseFunctionBody (const Type* FType, BufPtr &Buf, BufPtr EndBuf); + + /// @brief Parse a compaction table + void ParseCompactionTable (BufPtr &Buf, BufPtr End); + + /// @brief Parse global types + void ParseGlobalTypes (BufPtr &Buf, BufPtr End); + + /// @brief Parse a basic block (for LLVM 1.0 basic block blocks) + void ParseBasicBlock (BufPtr &Buf, BufPtr End, unsigned BlockNo); + + /// @brief parse an instruction list (for post LLVM 1.0 instruction lists + /// with blocks differentiated by terminating instructions. + unsigned ParseInstructionList(BufPtr &Buf, BufPtr End); + + /// @brief Parse an instruction. + bool ParseInstruction (BufPtr &Buf, BufPtr End, + std::vector& Args); + + /// @brief Parse a constant pool + void ParseConstantPool (BufPtr &Buf, BufPtr End, TypeListTy& List); + + /// @brief Parse a constant value + void ParseConstantValue (BufPtr &Buf, BufPtr End, unsigned TypeID); + + /// @brief Parse a block of types. + void ParseTypeConstants (BufPtr &Buf, BufPtr End, TypeListTy &Tab, + unsigned NumEntries); + + /// @brief Parse a single type. + const Type *ParseTypeConstant(BufPtr &Buf, BufPtr End); + + /// @brief Parse a string constants block + void ParseStringConstants (BufPtr &Buf, BufPtr End, unsigned NumEntries); + +/// @} +/// @name Data +/// @{ private: // Information about the module, extracted from the bytecode revision number. unsigned char RevisionNum; // The rev # itself @@ -82,13 +179,13 @@ private: // TypesLoaded - This vector mirrors the Values[TypeTyID] plane. It is used // to deal with forward references to types. // - typedef std::vector TypeListTy; TypeListTy ModuleTypes; TypeListTy FunctionTypes; // When the ModuleGlobalInfo section is read, we create a FunctionType object // for each function in the module. When the function is loaded, this type is // used to instantiate the actual function object. + std::vector FunctionSignatureList; // Constant values are read in after global variables. Because of this, we @@ -98,16 +195,34 @@ private: // std::vector > GlobalInits; - // For lazy reading-in of functions, we need to save away several pieces of - // information about each function: its begin and end pointer in the buffer - // and its FunctionSlot. - // - LazyFunctionMap LazyFunctionLoadMap; - - /// The handler for parsing +/// @} +/// @name Implementation Details +/// @{ +private: + /// This stores the parser's handler. It makes virtual function calls through + /// the BytecodeHandler to notify the handler of parsing events. What the + /// handler does with the events is completely orthogonal to the business of + /// parsing the bytecode. + /// @brief The handler of bytecode parsing events. BytecodeHandler* handler; + /// For lazy reading-in of functions, we need to save away several pieces of + /// information about each function: its begin and end pointer in the buffer + /// and its FunctionSlot. + struct LazyFunctionInfo { + const unsigned char *Buf, *EndBuf; + LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0) + : Buf(B), EndBuf(EB) {} + }; + typedef std::map LazyFunctionMap; + LazyFunctionMap LazyFunctionLoadMap; + private: + + static inline void readBlock(const unsigned char *&Buf, + const unsigned char *EndBuf, + unsigned &Type, unsigned &Size) ; + const Type *AbstractBytecodeParser::getType(unsigned ID); /// getGlobalTableType - This is just like getType, but when a compaction /// table is in use, it is ignored. Also, no forward references or other @@ -134,45 +249,250 @@ private: return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]); } + AbstractBytecodeParser(const AbstractBytecodeParser &); // DO NOT IMPLEMENT + void operator=(const AbstractBytecodeParser &); // DO NOT IMPLEMENT + +/// @} +}; + +/// This class provides the interface for the handling bytecode events during +/// parsing. The methods on this interface are invoked by the +/// AbstractBytecodeParser as it discovers the content of a bytecode stream. +/// This class provides a a clear separation of concerns between recognizing +/// the semantic units of a bytecode file and deciding what to do with them. +/// The AbstractBytecodeParser recognizes the content of the bytecode file and +/// calls the BytecodeHandler methods to determine what should be done. This +/// arrangement allows Bytecode files to be read and handled for a number of +/// purposes simply by creating a subclass of BytecodeHandler. None of the +/// parsing details need to be understood, only the meaning of the calls +/// made on this interface. +/// +/// Another paradigm that uses this design pattern is the XML SAX Parser. The +/// ContentHandler for SAX plays the same role as the BytecodeHandler here. +/// @see AbstractbytecodeParser +/// @brief Handle Bytecode Parsing Events +class BytecodeHandler { + +/// @name Constructors And Operators +/// @{ public: - typedef const unsigned char* BufPtr; - void ParseModule (BufPtr &Buf, BufPtr End); - void ParseNextFunction (Type* FType) ; - void ParseAllFunctionBodies (); + /// @brief Default constructor (empty) + BytecodeHandler() {} + /// @brief Virtual destructor (empty) + virtual ~BytecodeHandler() {} private: - void ParseVersionInfo (BufPtr &Buf, BufPtr End); - void ParseModuleGlobalInfo (BufPtr &Buf, BufPtr End); - void ParseSymbolTable (BufPtr &Buf, BufPtr End); - void ParseFunctionLazily (BufPtr &Buf, BufPtr End); - void ParseFunctionBody (const Type* FType, BufPtr &Buf, BufPtr EndBuf); - void ParseCompactionTable (BufPtr &Buf, BufPtr End); - void ParseGlobalTypes (BufPtr &Buf, BufPtr End); + BytecodeHandler(const BytecodeHandler &); // DO NOT IMPLEMENT + void operator=(const BytecodeHandler &); // DO NOT IMPLEMENT - void ParseBasicBlock (BufPtr &Buf, BufPtr End, unsigned BlockNo); - unsigned ParseInstructionList(BufPtr &Buf, BufPtr End); - - bool ParseInstruction (BufPtr &Buf, BufPtr End, - std::vector& Args); +/// @} +/// @name Handler Methods +/// @{ +public: - void ParseConstantPool (BufPtr &Buf, BufPtr End, TypeListTy& List); - void ParseConstantValue (BufPtr &Buf, BufPtr End, unsigned TypeID); - void ParseTypeConstants (BufPtr &Buf, BufPtr End, TypeListTy &Tab, - unsigned NumEntries); - const Type *ParseTypeConstant(BufPtr &Buf, BufPtr End); - void ParseStringConstants (BufPtr &Buf, BufPtr End, unsigned NumEntries); + /// This method is called whenever the parser detects an error in the + /// bytecode formatting. Returning true will cause the parser to keep + /// going, however this is inadvisable in most cases. Returning false will + /// cause the parser to throw the message as a std::string. + /// @brief Handle parsing errors. + virtual bool handleError(const std::string& str ); + + /// This method is called at the beginning of a parse before anything is + /// read in order to give the handler a chance to initialize. + /// @brief Handle the start of a bytecode parse + virtual void handleStart(); + + /// This method is called at the end of a parse after everything has been + /// read in order to give the handler a chance to terminate. + /// @brief Handle the end of a bytecode parse + virtual void handleFinish(); + + /// This method is called at the start of a module to indicate that a + /// module is being parsed. + /// @brief Handle the start of a module. + virtual void handleModuleBegin(const std::string& id); + + /// This method is called at the end of a module to indicate that the module + /// previously being parsed has concluded. + /// @brief Handle the end of a module. + virtual void handleModuleEnd(const std::string& id); + + /// This method is called once the version information has been parsed. It + /// provides the information about the version of the bytecode file being + /// read. + /// @brief Handle the bytecode prolog + virtual void handleVersionInfo( + unsigned char RevisionNum, ///< Byte code revision number + Module::Endianness Endianness, ///< Endianness indicator + Module::PointerSize PointerSize ///< PointerSize indicator + ); + + /// This method is called at the start of a module globals block which + /// contains the global variables and the function placeholders + virtual void handleModuleGlobalsBegin(); + + /// This method is called when a non-initialized global variable is + /// recognized. Its type, constness, and linkage type are provided. + /// @brief Handle a non-initialized global variable + virtual void handleGlobalVariable( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes ///< The linkage type of the GV + ); + + /// This method is called when an initialized global variable is recognized. + /// Its type constness, linkage type, and the slot number of the initializer + /// are provided. + /// @brief Handle an intialized global variable. + virtual void handleInitializedGV( + const Type* ElemType, ///< The type of the global variable + bool isConstant, ///< Whether the GV is constant or not + GlobalValue::LinkageTypes,///< The linkage type of the GV + unsigned initSlot ///< Slot number of GV's initializer + ); + + /// This method is called when a new type is recognized. The type is + /// converted from the bytecode and passed to this method. + /// @brief Handle a type + virtual void handleType( const Type* Ty ); + + /// This method is called when the function prototype for a function is + /// encountered in the module globals block. + virtual void handleFunctionDeclaration( + const Type* FuncType ///< The type of the function + ); + + /// This method is called at the end of the module globals block. + /// @brief Handle end of module globals block. + virtual void handleModuleGlobalsEnd(); + + /// This method is called at the beginning of a compaction table. + /// @brief Handle start of compaction table. + virtual void handleCompactionTableBegin(); + + /// @brief Handle start of a compaction table plane + virtual void handleCompactionTablePlane( + unsigned Ty, + unsigned NumEntries + ); + + + /// @brief Handle a type entry in the compaction table + virtual void handleCompactionTableType( + unsigned i, + unsigned TypSlot, + const Type* + ); + + /// @brief Handle a value entry in the compaction table + virtual void handleCompactionTableValue( + unsigned i, + unsigned ValSlot, + const Type* + ); + + /// @brief Handle end of a compaction table + virtual void handleCompactionTableEnd(); + + /// @brief Handle start of a symbol table + virtual void handleSymbolTableBegin(); + + /// @brief Handle start of a symbol table plane + virtual void handleSymbolTablePlane( + unsigned Ty, + unsigned NumEntries, + const Type* Ty + ); + + /// @brief Handle a named type in the symbol table + virtual void handleSymbolTableType( + unsigned i, + unsigned slot, + const std::string& name + ); + + /// @brief Handle a named value in the symbol table + virtual void handleSymbolTableValue( + unsigned i, + unsigned slot, + const std::string& name + ); + + /// @brief Handle the end of a symbol table + virtual void handleSymbolTableEnd(); + + /// @brief Handle the beginning of a function body + virtual void handleFunctionBegin( + const Type* FType, + GlobalValue::LinkageTypes linkage + ); + + /// @brief Handle the end of a function body + virtual void handleFunctionEnd( + const Type* FType + ); + + /// @brief Handle the beginning of a basic block + virtual void handleBasicBlockBegin( + unsigned blocknum + ); + + /// This method is called for each instruction that is parsed. + /// @returns true if the instruction is a block terminating instruction + /// @brief Handle an instruction + virtual bool handleInstruction( + unsigned Opcode, + const Type* iType, + std::vector& Operands + ); + + /// @brief Handle the end of a basic block + virtual void handleBasicBlockEnd(unsigned blocknum); + + /// @brief Handle start of global constants block. + virtual void handleGlobalConstantsBegin(); + + /// @brief Handle a constant expression + virtual void handleConstantExpression( + unsigned Opcode, + const Type* Typ, + std::vector > ArgVec + ); + + /// @brief Handle a constant array + virtual void handleConstantArray( + const ArrayType* AT, + std::vector& ElementSlots + ); + + /// @brief Handle a constant structure + virtual void handleConstantStruct( + const StructType* ST, + std::vector& ElementSlots + ); + + /// @brief Handle a constant pointer + virtual void handleConstantPointer( + const PointerType* PT, + unsigned Slot + ); + + /// @brief Handle a constant strings (array special case) + virtual void handleConstantString( + const ConstantArray* CA + ); + + /// @brief Handle a primitive constant value + virtual void handleConstantValue( Constant * c ); + + /// @brief Handle the end of the global constants + virtual void handleGlobalConstantsEnd(); + +/// @} }; - -static inline void readBlock(const unsigned char *&Buf, - const unsigned char *EndBuf, - unsigned &Type, unsigned &Size) { - Type = read(Buf, EndBuf); - Size = read(Buf, EndBuf); -} - } // End llvm namespace -#endif // vim: sw=2 +#endif