mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-17 06:33:21 +00:00
f70c22b019
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@14201 91177308-0d34-0410-b5e6-96231b3b80d8
545 lines
18 KiB
C++
545 lines
18 KiB
C++
//===-- Parser.h - Abstract Interface To Bytecode Parsing -------*- C++ -*-===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file was developed by Reid Spencer and is distributed under the
|
|
// University of Illinois Open Source License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This header file defines the interface to the Bytecode Parser and the
|
|
// Bytecode Handler interface that it calls.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef BYTECODE_PARSER_H
|
|
#define BYTECODE_PARSER_H
|
|
|
|
#include "llvm/Constants.h"
|
|
#include "llvm/DerivedTypes.h"
|
|
#include "llvm/GlobalValue.h"
|
|
#include "llvm/Module.h"
|
|
#include <utility>
|
|
#include <vector>
|
|
#include <map>
|
|
|
|
namespace llvm {
|
|
|
|
class BytecodeHandler; ///< Forward declare the handler interface
|
|
|
|
/// This class defines the interface for parsing a buffer of bytecode. The
|
|
/// parser itself takes no action except to call the various functions of
|
|
/// the handler interface. The parser's sole responsibility is the correct
|
|
/// interpretation of the bytecode buffer. The handler is responsible for
|
|
/// instantiating and keeping track of all values. As a convenience, the parser
|
|
/// is responsible for materializing types and will pass them through the
|
|
/// handler interface as necessary.
|
|
/// @see BytecodeHandler
|
|
/// @brief Abstract Bytecode Parser interface
|
|
class AbstractBytecodeParser {
|
|
|
|
/// @name Constructors
|
|
/// @{
|
|
public:
|
|
AbstractBytecodeParser(
|
|
BytecodeHandler* h,
|
|
bool repAlignment = false,
|
|
bool repBlocks = false,
|
|
bool repVBR = false
|
|
) {
|
|
handler = h;
|
|
reportAlignment = repAlignment;
|
|
reportBlocks = repBlocks;
|
|
reportVBR = repVBR;
|
|
}
|
|
|
|
~AbstractBytecodeParser() { }
|
|
|
|
/// @}
|
|
/// @name Types
|
|
/// @{
|
|
public:
|
|
/// @brief A convenience type for the buffer pointer
|
|
typedef const unsigned char* BufPtr;
|
|
|
|
/// @brief The type used for vector of potentially abstract types
|
|
typedef std::vector<PATypeHolder> TypeListTy;
|
|
|
|
/// @brief
|
|
|
|
/// @}
|
|
/// @name Methods
|
|
/// @{
|
|
public:
|
|
|
|
/// @brief Main interface to parsing a bytecode buffer.
|
|
void ParseBytecode(const unsigned char *Buf, unsigned Length,
|
|
const std::string &ModuleID);
|
|
|
|
/// The ParseBytecode method lazily parses functions. Use this
|
|
/// method to cause the parser to actually parse all the function bodies
|
|
/// in the bytecode buffer.
|
|
/// @see ParseBytecode
|
|
/// @brief Parse all function bodies
|
|
void ParseAllFunctionBodies ();
|
|
|
|
/// The Parsebytecode method lazily parses functions. Use this
|
|
/// method to casue the parser to parse the next function of a given
|
|
/// types. Note that this will remove the function from what is to be
|
|
/// included by ParseAllFunctionBodies.
|
|
/// @see ParseAllFunctionBodies
|
|
/// @see ParseBytecode
|
|
/// @brief Parse the next function of specific type
|
|
void ParseNextFunction (Function* Func) ;
|
|
|
|
/// @}
|
|
/// @name Parsing Units For Subclasses
|
|
/// @{
|
|
protected:
|
|
/// @brief Parse whole module scope
|
|
void ParseModule ();
|
|
|
|
/// @brief Parse the version information block
|
|
void ParseVersionInfo ();
|
|
|
|
/// @brief Parse the ModuleGlobalInfo block
|
|
void ParseModuleGlobalInfo ();
|
|
|
|
/// @brief Parse a symbol table
|
|
void ParseSymbolTable ();
|
|
|
|
/// This function parses LLVM functions lazily. It obtains the type of the
|
|
/// function and records where the body of the function is in the bytecode
|
|
/// buffer. The caller can then use the ParseNextFunction and
|
|
/// ParseAllFunctionBodies to get handler events for the functions.
|
|
/// @brief Parse functions lazily.
|
|
void ParseFunctionLazily ();
|
|
|
|
/// @brief Parse a function body
|
|
void ParseFunctionBody (Function* Func);
|
|
|
|
/// @brief Parse a compaction table
|
|
void ParseCompactionTable ();
|
|
|
|
/// @brief Parse global types
|
|
void ParseGlobalTypes ();
|
|
|
|
/// @brief Parse a basic block (for LLVM 1.0 basic block blocks)
|
|
void ParseBasicBlock (unsigned BlockNo);
|
|
|
|
/// @brief parse an instruction list (for post LLVM 1.0 instruction lists
|
|
/// with blocks differentiated by terminating instructions.
|
|
unsigned ParseInstructionList();
|
|
|
|
/// @brief Parse an instruction.
|
|
bool ParseInstruction (std::vector<unsigned>& Args);
|
|
|
|
/// @brief Parse a constant pool
|
|
void ParseConstantPool (TypeListTy& List);
|
|
|
|
/// @brief Parse a constant value
|
|
void ParseConstantValue (unsigned TypeID);
|
|
|
|
/// @brief Parse a block of types.
|
|
void ParseTypeConstants (TypeListTy &Tab, unsigned NumEntries);
|
|
|
|
/// @brief Parse a single type.
|
|
const Type *ParseTypeConstant();
|
|
|
|
/// @brief Parse a string constants block
|
|
void ParseStringConstants (unsigned NumEntries);
|
|
|
|
/// @}
|
|
/// @name Data
|
|
/// @{
|
|
private:
|
|
BufPtr MemStart; ///< Start of the memory buffer
|
|
BufPtr MemEnd; ///< End of the memory buffer
|
|
BufPtr BlockStart; ///< Start of current block being parsed
|
|
BufPtr BlockEnd; ///< End of current block being parsed
|
|
BufPtr At; ///< Where we're currently parsing at
|
|
|
|
bool reportAlignment; ///< Parser should report alignment?
|
|
bool reportBlocks; ///< Parser should report blocks?
|
|
bool reportVBR; ///< Report VBR compression events
|
|
|
|
// Information about the module, extracted from the bytecode revision number.
|
|
unsigned char RevisionNum; // The rev # itself
|
|
|
|
// Flags to distinguish LLVM 1.0 & 1.1 bytecode formats (revision #0)
|
|
|
|
// Revision #0 had an explicit alignment of data only for the ModuleGlobalInfo
|
|
// block. This was fixed to be like all other blocks in 1.2
|
|
bool hasInconsistentModuleGlobalInfo;
|
|
|
|
// Revision #0 also explicitly encoded zero values for primitive types like
|
|
// int/sbyte/etc.
|
|
bool hasExplicitPrimitiveZeros;
|
|
|
|
// Flags to control features specific the LLVM 1.2 and before (revision #1)
|
|
|
|
// LLVM 1.2 and earlier required that getelementptr structure indices were
|
|
// ubyte constants and that sequential type indices were longs.
|
|
bool hasRestrictedGEPTypes;
|
|
|
|
|
|
/// CompactionTable - If a compaction table is active in the current function,
|
|
/// this is the mapping that it contains.
|
|
std::vector<Type*> CompactionTypeTable;
|
|
|
|
// ConstantFwdRefs - This maintains a mapping between <Type, Slot #>'s and
|
|
// forward references to constants. Such values may be referenced before they
|
|
// are defined, and if so, the temporary object that they represent is held
|
|
// here.
|
|
//
|
|
typedef std::map<std::pair<const Type*,unsigned>, Constant*> ConstantRefsType;
|
|
ConstantRefsType ConstantFwdRefs;
|
|
|
|
// TypesLoaded - This vector mirrors the Values[TypeTyID] plane. It is used
|
|
// to deal with forward references to types.
|
|
//
|
|
TypeListTy ModuleTypes;
|
|
TypeListTy FunctionTypes;
|
|
|
|
// When the ModuleGlobalInfo section is read, we create a FunctionType object
|
|
// for each function in the module. When the function is loaded, this type is
|
|
// used to instantiate the actual function object.
|
|
|
|
std::vector<Function*> FunctionSignatureList;
|
|
|
|
// Constant values are read in after global variables. Because of this, we
|
|
// must defer setting the initializers on global variables until after module
|
|
// level constants have been read. In the mean time, this list keeps track of
|
|
// what we must do.
|
|
//
|
|
std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
|
|
|
|
/// @}
|
|
/// @name Implementation Details
|
|
/// @{
|
|
private:
|
|
/// This stores the parser's handler. It makes virtual function calls through
|
|
/// the BytecodeHandler to notify the handler of parsing events. What the
|
|
/// handler does with the events is completely orthogonal to the business of
|
|
/// parsing the bytecode.
|
|
/// @brief The handler of bytecode parsing events.
|
|
BytecodeHandler* handler;
|
|
|
|
/// For lazy reading-in of functions, we need to save away several pieces of
|
|
/// information about each function: its begin and end pointer in the buffer
|
|
/// and its FunctionSlot.
|
|
struct LazyFunctionInfo {
|
|
const unsigned char *Buf, *EndBuf;
|
|
LazyFunctionInfo(const unsigned char *B = 0, const unsigned char *EB = 0)
|
|
: Buf(B), EndBuf(EB) {}
|
|
};
|
|
typedef std::map<Function*, LazyFunctionInfo> LazyFunctionMap;
|
|
LazyFunctionMap LazyFunctionLoadMap;
|
|
|
|
private:
|
|
|
|
/// Is there more to parse in the current block?
|
|
inline bool moreInBlock();
|
|
|
|
/// Have we read past the end of the block
|
|
inline void checkPastBlockEnd(const char * block_name);
|
|
|
|
/// Align to 32 bits
|
|
inline void align32();
|
|
|
|
/// Reader interface
|
|
inline unsigned read_uint();
|
|
inline unsigned read_vbr_uint();
|
|
inline uint64_t read_vbr_uint64();
|
|
inline int64_t read_vbr_int64();
|
|
inline std::string read_str();
|
|
inline void read_data(void *Ptr, void *End);
|
|
|
|
/// Read a block header
|
|
inline void readBlock(unsigned &Type, unsigned &Size);
|
|
|
|
const Type *AbstractBytecodeParser::getType(unsigned ID);
|
|
/// getGlobalTableType - This is just like getType, but when a compaction
|
|
/// table is in use, it is ignored. Also, no forward references or other
|
|
/// fancy features are supported.
|
|
const Type *getGlobalTableType(unsigned Slot) {
|
|
if (Slot < Type::FirstDerivedTyID) {
|
|
const Type *Ty = Type::getPrimitiveType((Type::TypeID)Slot);
|
|
assert(Ty && "Not a primitive type ID?");
|
|
return Ty;
|
|
}
|
|
Slot -= Type::FirstDerivedTyID;
|
|
if (Slot >= ModuleTypes.size())
|
|
throw std::string("Illegal compaction table type reference!");
|
|
return ModuleTypes[Slot];
|
|
}
|
|
|
|
unsigned getGlobalTableTypeSlot(const Type *Ty) {
|
|
if (Ty->isPrimitiveType())
|
|
return Ty->getTypeID();
|
|
TypeListTy::iterator I = find(ModuleTypes.begin(),
|
|
ModuleTypes.end(), Ty);
|
|
if (I == ModuleTypes.end())
|
|
throw std::string("Didn't find type in ModuleTypes.");
|
|
return Type::FirstDerivedTyID + (&*I - &ModuleTypes[0]);
|
|
}
|
|
|
|
AbstractBytecodeParser(const AbstractBytecodeParser &); // DO NOT IMPLEMENT
|
|
void operator=(const AbstractBytecodeParser &); // DO NOT IMPLEMENT
|
|
|
|
/// @}
|
|
};
|
|
|
|
/// This class provides the interface for the handling bytecode events during
|
|
/// parsing. The methods on this interface are invoked by the
|
|
/// AbstractBytecodeParser as it discovers the content of a bytecode stream.
|
|
/// This class provides a a clear separation of concerns between recognizing
|
|
/// the semantic units of a bytecode file and deciding what to do with them.
|
|
/// The AbstractBytecodeParser recognizes the content of the bytecode file and
|
|
/// calls the BytecodeHandler methods to determine what should be done. This
|
|
/// arrangement allows Bytecode files to be read and handled for a number of
|
|
/// purposes simply by creating a subclass of BytecodeHandler. None of the
|
|
/// parsing details need to be understood, only the meaning of the calls
|
|
/// made on this interface.
|
|
///
|
|
/// Another paradigm that uses this design pattern is the XML SAX Parser. The
|
|
/// ContentHandler for SAX plays the same role as the BytecodeHandler here.
|
|
/// @see AbstractbytecodeParser
|
|
/// @brief Handle Bytecode Parsing Events
|
|
class BytecodeHandler {
|
|
|
|
/// @name Constructors And Operators
|
|
/// @{
|
|
public:
|
|
/// @brief Default constructor (empty)
|
|
BytecodeHandler() {}
|
|
/// @brief Virtual destructor (empty)
|
|
virtual ~BytecodeHandler() {}
|
|
|
|
private:
|
|
BytecodeHandler(const BytecodeHandler &); // DO NOT IMPLEMENT
|
|
void operator=(const BytecodeHandler &); // DO NOT IMPLEMENT
|
|
|
|
/// @}
|
|
/// @name Handler Methods
|
|
/// @{
|
|
public:
|
|
|
|
/// This method is called whenever the parser detects an error in the
|
|
/// bytecode formatting. Returning true will cause the parser to keep
|
|
/// going, however this is inadvisable in most cases. Returning false will
|
|
/// cause the parser to throw the message as a std::string.
|
|
/// @brief Handle parsing errors.
|
|
virtual bool handleError(const std::string& str );
|
|
|
|
/// This method is called at the beginning of a parse before anything is
|
|
/// read in order to give the handler a chance to initialize.
|
|
/// @brief Handle the start of a bytecode parse
|
|
virtual void handleStart();
|
|
|
|
/// This method is called at the end of a parse after everything has been
|
|
/// read in order to give the handler a chance to terminate.
|
|
/// @brief Handle the end of a bytecode parse
|
|
virtual void handleFinish();
|
|
|
|
/// This method is called at the start of a module to indicate that a
|
|
/// module is being parsed.
|
|
/// @brief Handle the start of a module.
|
|
virtual void handleModuleBegin(const std::string& id);
|
|
|
|
/// This method is called at the end of a module to indicate that the module
|
|
/// previously being parsed has concluded.
|
|
/// @brief Handle the end of a module.
|
|
virtual void handleModuleEnd(const std::string& id);
|
|
|
|
/// This method is called once the version information has been parsed. It
|
|
/// provides the information about the version of the bytecode file being
|
|
/// read.
|
|
/// @brief Handle the bytecode prolog
|
|
virtual void handleVersionInfo(
|
|
unsigned char RevisionNum, ///< Byte code revision number
|
|
Module::Endianness Endianness, ///< Endianness indicator
|
|
Module::PointerSize PointerSize ///< PointerSize indicator
|
|
);
|
|
|
|
/// This method is called at the start of a module globals block which
|
|
/// contains the global variables and the function placeholders
|
|
virtual void handleModuleGlobalsBegin();
|
|
|
|
/// This method is called when a non-initialized global variable is
|
|
/// recognized. Its type, constness, and linkage type are provided.
|
|
/// @brief Handle a non-initialized global variable
|
|
virtual void handleGlobalVariable(
|
|
const Type* ElemType, ///< The type of the global variable
|
|
bool isConstant, ///< Whether the GV is constant or not
|
|
GlobalValue::LinkageTypes ///< The linkage type of the GV
|
|
);
|
|
|
|
/// This method is called when an initialized global variable is recognized.
|
|
/// Its type constness, linkage type, and the slot number of the initializer
|
|
/// are provided.
|
|
/// @brief Handle an intialized global variable.
|
|
virtual void handleInitializedGV(
|
|
const Type* ElemType, ///< The type of the global variable
|
|
bool isConstant, ///< Whether the GV is constant or not
|
|
GlobalValue::LinkageTypes,///< The linkage type of the GV
|
|
unsigned initSlot ///< Slot number of GV's initializer
|
|
);
|
|
|
|
/// This method is called when a new type is recognized. The type is
|
|
/// converted from the bytecode and passed to this method.
|
|
/// @brief Handle a type
|
|
virtual void handleType( const Type* Ty );
|
|
|
|
/// This method is called when the function prototype for a function is
|
|
/// encountered in the module globals block.
|
|
virtual void handleFunctionDeclaration(
|
|
Function* Func,
|
|
const FunctionType* FuncType ///< The type of the function
|
|
);
|
|
|
|
/// This method is called at the end of the module globals block.
|
|
/// @brief Handle end of module globals block.
|
|
virtual void handleModuleGlobalsEnd();
|
|
|
|
/// This method is called at the beginning of a compaction table.
|
|
/// @brief Handle start of compaction table.
|
|
virtual void handleCompactionTableBegin();
|
|
|
|
/// @brief Handle start of a compaction table plane
|
|
virtual void handleCompactionTablePlane(
|
|
unsigned Ty,
|
|
unsigned NumEntries
|
|
);
|
|
|
|
|
|
/// @brief Handle a type entry in the compaction table
|
|
virtual void handleCompactionTableType(
|
|
unsigned i,
|
|
unsigned TypSlot,
|
|
const Type*
|
|
);
|
|
|
|
/// @brief Handle a value entry in the compaction table
|
|
virtual void handleCompactionTableValue(
|
|
unsigned i,
|
|
unsigned ValSlot,
|
|
const Type*
|
|
);
|
|
|
|
/// @brief Handle end of a compaction table
|
|
virtual void handleCompactionTableEnd();
|
|
|
|
/// @brief Handle start of a symbol table
|
|
virtual void handleSymbolTableBegin();
|
|
|
|
/// @brief Handle start of a symbol table plane
|
|
virtual void handleSymbolTablePlane(
|
|
unsigned Ty,
|
|
unsigned NumEntries,
|
|
const Type* Ty
|
|
);
|
|
|
|
/// @brief Handle a named type in the symbol table
|
|
virtual void handleSymbolTableType(
|
|
unsigned i,
|
|
unsigned slot,
|
|
const std::string& name
|
|
);
|
|
|
|
/// @brief Handle a named value in the symbol table
|
|
virtual void handleSymbolTableValue(
|
|
unsigned i,
|
|
unsigned slot,
|
|
const std::string& name
|
|
);
|
|
|
|
/// @brief Handle the end of a symbol table
|
|
virtual void handleSymbolTableEnd();
|
|
|
|
/// @brief Handle the beginning of a function body
|
|
virtual void handleFunctionBegin(
|
|
Function* Func, unsigned Size
|
|
);
|
|
|
|
/// @brief Handle the end of a function body
|
|
virtual void handleFunctionEnd(
|
|
Function* Func
|
|
);
|
|
|
|
/// @brief Handle the beginning of a basic block
|
|
virtual void handleBasicBlockBegin(
|
|
unsigned blocknum
|
|
);
|
|
|
|
/// This method is called for each instruction that is parsed.
|
|
/// @returns true if the instruction is a block terminating instruction
|
|
/// @brief Handle an instruction
|
|
virtual bool handleInstruction(
|
|
unsigned Opcode,
|
|
const Type* iType,
|
|
std::vector<unsigned>& Operands,
|
|
unsigned Length
|
|
);
|
|
|
|
/// @brief Handle the end of a basic block
|
|
virtual void handleBasicBlockEnd(unsigned blocknum);
|
|
|
|
/// @brief Handle start of global constants block.
|
|
virtual void handleGlobalConstantsBegin();
|
|
|
|
/// @brief Handle a constant expression
|
|
virtual void handleConstantExpression(
|
|
unsigned Opcode,
|
|
const Type* Typ,
|
|
std::vector<std::pair<const Type*,unsigned> > ArgVec
|
|
);
|
|
|
|
/// @brief Handle a constant array
|
|
virtual void handleConstantArray(
|
|
const ArrayType* AT,
|
|
std::vector<unsigned>& ElementSlots
|
|
);
|
|
|
|
/// @brief Handle a constant structure
|
|
virtual void handleConstantStruct(
|
|
const StructType* ST,
|
|
std::vector<unsigned>& ElementSlots
|
|
);
|
|
|
|
/// @brief Handle a constant pointer
|
|
virtual void handleConstantPointer(
|
|
const PointerType* PT,
|
|
unsigned Slot
|
|
);
|
|
|
|
/// @brief Handle a constant strings (array special case)
|
|
virtual void handleConstantString(
|
|
const ConstantArray* CA
|
|
);
|
|
|
|
/// @brief Handle a primitive constant value
|
|
virtual void handleConstantValue( Constant * c );
|
|
|
|
/// @brief Handle the end of the global constants
|
|
virtual void handleGlobalConstantsEnd();
|
|
|
|
/// @brief Handle an alignment event
|
|
virtual void handleAlignment(unsigned numBytes);
|
|
|
|
virtual void handleBlock(
|
|
unsigned BType, ///< The type of block
|
|
const unsigned char* StartPtr, ///< The start of the block
|
|
unsigned Size ///< The size of the block
|
|
);
|
|
virtual void handleVBR32(unsigned Size );
|
|
virtual void handleVBR64(unsigned Size );
|
|
/// @}
|
|
|
|
};
|
|
|
|
} // End llvm namespace
|
|
|
|
// vim: sw=2
|
|
#endif
|