From fb0c0dc9604ffba751bdaf272a7ba8cbd29f5860 Mon Sep 17 00:00:00 2001 From: Reid Spencer Date: Mon, 29 May 2006 00:57:22 +0000 Subject: [PATCH] Initial Commit of llvm2cpp This is a safekeeping commit. The program is not finished. It currently handles modules, types, global variables and function declarations. Blocks and instructions remain to be done. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@28528 91177308-0d34-0410-b5e6-96231b3b80d8 --- tools/llvm2cpp/CppWriter.cpp | 1995 ++++++++++++++++++++++++++++++++++ tools/llvm2cpp/CppWriter.h | 18 + tools/llvm2cpp/Makefile | 23 + tools/llvm2cpp/llvm2cpp.cpp | 138 +++ 4 files changed, 2174 insertions(+) create mode 100644 tools/llvm2cpp/CppWriter.cpp create mode 100644 tools/llvm2cpp/CppWriter.h create mode 100644 tools/llvm2cpp/Makefile create mode 100644 tools/llvm2cpp/llvm2cpp.cpp diff --git a/tools/llvm2cpp/CppWriter.cpp b/tools/llvm2cpp/CppWriter.cpp new file mode 100644 index 00000000000..54a28e9f833 --- /dev/null +++ b/tools/llvm2cpp/CppWriter.cpp @@ -0,0 +1,1995 @@ +//===-- CppWriter.cpp - Printing LLVM IR as a C++ Source File -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file was developed by the LLVM research group and is distributed under +// the University of Illinois Open Source License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the writing of the LLVM IR as a set of C++ calls to the +// LLVM IR interface. The input module is assumed to be verified. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CallingConv.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/InlineAsm.h" +#include "llvm/Instruction.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/SymbolTable.h" +#include "llvm/Support/CFG.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/MathExtras.h" +#include +#include + +using namespace llvm; + +namespace { +/// This class provides computation of slot numbers for LLVM Assembly writing. +/// @brief LLVM Assembly Writing Slot Computation. +class SlotMachine { + +/// @name Types +/// @{ +public: + + /// @brief A mapping of Values to slot numbers + typedef std::map ValueMap; + typedef std::map TypeMap; + + /// @brief A plane with next slot number and ValueMap + struct ValuePlane { + unsigned next_slot; ///< The next slot number to use + ValueMap map; ///< The map of Value* -> unsigned + ValuePlane() { next_slot = 0; } ///< Make sure we start at 0 + }; + + struct TypePlane { + unsigned next_slot; + TypeMap map; + TypePlane() { next_slot = 0; } + void clear() { map.clear(); next_slot = 0; } + }; + + /// @brief The map of planes by Type + typedef std::map TypedPlanes; + +/// @} +/// @name Constructors +/// @{ +public: + /// @brief Construct from a module + SlotMachine(const Module *M ); + +/// @} +/// @name Accessors +/// @{ +public: + /// Return the slot number of the specified value in it's type + /// plane. Its an error to ask for something not in the SlotMachine. + /// Its an error to ask for a Type* + int getSlot(const Value *V); + int getSlot(const Type*Ty); + + /// Determine if a Value has a slot or not + bool hasSlot(const Value* V); + bool hasSlot(const Type* Ty); + +/// @} +/// @name Mutators +/// @{ +public: + /// If you'd like to deal with a function instead of just a module, use + /// this method to get its data into the SlotMachine. + void incorporateFunction(const Function *F) { + TheFunction = F; + FunctionProcessed = false; + } + + /// After calling incorporateFunction, use this method to remove the + /// most recently incorporated function from the SlotMachine. This + /// will reset the state of the machine back to just the module contents. + void purgeFunction(); + +/// @} +/// @name Implementation Details +/// @{ +private: + /// Values can be crammed into here at will. If they haven't + /// been inserted already, they get inserted, otherwise they are ignored. + /// Either way, the slot number for the Value* is returned. + unsigned createSlot(const Value *V); + unsigned createSlot(const Type* Ty); + + /// Insert a value into the value table. Return the slot number + /// that it now occupies. BadThings(TM) will happen if you insert a + /// Value that's already been inserted. + unsigned insertValue( const Value *V ); + unsigned insertValue( const Type* Ty); + + /// Add all of the module level global variables (and their initializers) + /// and function declarations, but not the contents of those functions. + void processModule(); + + /// Add all of the functions arguments, basic blocks, and instructions + void processFunction(); + + SlotMachine(const SlotMachine &); // DO NOT IMPLEMENT + void operator=(const SlotMachine &); // DO NOT IMPLEMENT + +/// @} +/// @name Data +/// @{ +public: + + /// @brief The module for which we are holding slot numbers + const Module* TheModule; + + /// @brief The function for which we are holding slot numbers + const Function* TheFunction; + bool FunctionProcessed; + + /// @brief The TypePlanes map for the module level data + TypedPlanes mMap; + TypePlane mTypes; + + /// @brief The TypePlanes map for the function level data + TypedPlanes fMap; + TypePlane fTypes; + +/// @} + +}; + +typedef std::vector TypeList; +typedef std::map TypeMap; +typedef std::map ValueMap; + +void WriteAsOperandInternal(std::ostream &Out, const Value *V, + bool PrintName, TypeMap &TypeTable, + SlotMachine *Machine); + +void WriteAsOperandInternal(std::ostream &Out, const Type *T, + bool PrintName, TypeMap& TypeTable, + SlotMachine *Machine); + +const Module *getModuleFromVal(const Value *V) { + if (const Argument *MA = dyn_cast(V)) + return MA->getParent() ? MA->getParent()->getParent() : 0; + else if (const BasicBlock *BB = dyn_cast(V)) + return BB->getParent() ? BB->getParent()->getParent() : 0; + else if (const Instruction *I = dyn_cast(V)) { + const Function *M = I->getParent() ? I->getParent()->getParent() : 0; + return M ? M->getParent() : 0; + } else if (const GlobalValue *GV = dyn_cast(V)) + return GV->getParent(); + return 0; +} + +// getLLVMName - Turn the specified string into an 'LLVM name', which is either +// prefixed with % (if the string only contains simple characters) or is +// surrounded with ""'s (if it has special chars in it). +std::string getLLVMName(const std::string &Name, + bool prefixName = true) { + assert(!Name.empty() && "Cannot get empty name!"); + + // First character cannot start with a number... + if (Name[0] >= '0' && Name[0] <= '9') + return "\"" + Name + "\""; + + // Scan to see if we have any characters that are not on the "white list" + for (unsigned i = 0, e = Name.size(); i != e; ++i) { + char C = Name[i]; + assert(C != '"' && "Illegal character in LLVM value name!"); + if ((C < 'a' || C > 'z') && (C < 'A' || C > 'Z') && (C < '0' || C > '9') && + C != '-' && C != '.' && C != '_') + return "\"" + Name + "\""; + } + + // If we get here, then the identifier is legal to use as a "VarID". + if (prefixName) + return "%"+Name; + else + return Name; +} + + +/// fillTypeNameTable - If the module has a symbol table, take all global types +/// and stuff their names into the TypeNames map. +/// +void fillTypeNameTable(const Module *M, TypeMap& TypeNames) { + if (!M) return; + const SymbolTable &ST = M->getSymbolTable(); + SymbolTable::type_const_iterator TI = ST.type_begin(); + for (; TI != ST.type_end(); ++TI ) { + // As a heuristic, don't insert pointer to primitive types, because + // they are used too often to have a single useful name. + // + const Type *Ty = cast(TI->second); + if (!isa(Ty) || + !cast(Ty)->getElementType()->isPrimitiveType() || + isa(cast(Ty)->getElementType())) + TypeNames.insert(std::make_pair(Ty, getLLVMName(TI->first))); + } +} + +void calcTypeName(const Type *Ty, + std::vector &TypeStack, + TypeMap& TypeNames, + std::string & Result){ + if (Ty->isPrimitiveType() && !isa(Ty)) { + Result += Ty->getDescription(); // Base case + return; + } + + // Check to see if the type is named. + TypeMap::iterator I = TypeNames.find(Ty); + if (I != TypeNames.end()) { + Result += I->second; + return; + } + + if (isa(Ty)) { + Result += "opaque"; + return; + } + + // Check to see if the Type is already on the stack... + unsigned Slot = 0, CurSize = TypeStack.size(); + while (Slot < CurSize && TypeStack[Slot] != Ty) ++Slot; // Scan for type + + // This is another base case for the recursion. In this case, we know + // that we have looped back to a type that we have previously visited. + // Generate the appropriate upreference to handle this. + if (Slot < CurSize) { + Result += "\\" + utostr(CurSize-Slot); // Here's the upreference + return; + } + + TypeStack.push_back(Ty); // Recursive case: Add us to the stack.. + + switch (Ty->getTypeID()) { + case Type::FunctionTyID: { + const FunctionType *FTy = cast(Ty); + calcTypeName(FTy->getReturnType(), TypeStack, TypeNames, Result); + Result += " ("; + for (FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); I != E; ++I) { + if (I != FTy->param_begin()) + Result += ", "; + calcTypeName(*I, TypeStack, TypeNames, Result); + } + if (FTy->isVarArg()) { + if (FTy->getNumParams()) Result += ", "; + Result += "..."; + } + Result += ")"; + break; + } + case Type::StructTyID: { + const StructType *STy = cast(Ty); + Result += "{ "; + for (StructType::element_iterator I = STy->element_begin(), + E = STy->element_end(); I != E; ++I) { + if (I != STy->element_begin()) + Result += ", "; + calcTypeName(*I, TypeStack, TypeNames, Result); + } + Result += " }"; + break; + } + case Type::PointerTyID: + calcTypeName(cast(Ty)->getElementType(), + TypeStack, TypeNames, Result); + Result += "*"; + break; + case Type::ArrayTyID: { + const ArrayType *ATy = cast(Ty); + Result += "[" + utostr(ATy->getNumElements()) + " x "; + calcTypeName(ATy->getElementType(), TypeStack, TypeNames, Result); + Result += "]"; + break; + } + case Type::PackedTyID: { + const PackedType *PTy = cast(Ty); + Result += "<" + utostr(PTy->getNumElements()) + " x "; + calcTypeName(PTy->getElementType(), TypeStack, TypeNames, Result); + Result += ">"; + break; + } + case Type::OpaqueTyID: + Result += "opaque"; + break; + default: + Result += ""; + } + + TypeStack.pop_back(); // Remove self from stack... + return; +} + + +/// printTypeInt - The internal guts of printing out a type that has a +/// potentially named portion. +/// +std::ostream &printTypeInt(std::ostream &Out, const Type *Ty,TypeMap&TypeNames){ + // Primitive types always print out their description, regardless of whether + // they have been named or not. + // + if (Ty->isPrimitiveType() && !isa(Ty)) + return Out << Ty->getDescription(); + + // Check to see if the type is named. + TypeMap::iterator I = TypeNames.find(Ty); + if (I != TypeNames.end()) return Out << I->second; + + // Otherwise we have a type that has not been named but is a derived type. + // Carefully recurse the type hierarchy to print out any contained symbolic + // names. + // + std::vector TypeStack; + std::string TypeName; + calcTypeName(Ty, TypeStack, TypeNames, TypeName); + TypeNames.insert(std::make_pair(Ty, TypeName));//Cache type name for later use + return (Out << TypeName); +} + + +/// WriteTypeSymbolic - This attempts to write the specified type as a symbolic +/// type, iff there is an entry in the modules symbol table for the specified +/// type or one of it's component types. This is slower than a simple x << Type +/// +std::ostream &WriteTypeSymbolic(std::ostream &Out, const Type *Ty, + const Module *M) { + Out << ' '; + + // If they want us to print out a type, attempt to make it symbolic if there + // is a symbol table in the module... + if (M) { + TypeMap TypeNames; + fillTypeNameTable(M, TypeNames); + + return printTypeInt(Out, Ty, TypeNames); + } else { + return Out << Ty->getDescription(); + } +} + +// PrintEscapedString - Print each character of the specified string, escaping +// it if it is not printable or if it is an escape char. +void PrintEscapedString(const std::string &Str, std::ostream &Out) { + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + unsigned char C = Str[i]; + if (isprint(C) && C != '"' && C != '\\') { + Out << C; + } else { + Out << '\\' + << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')) + << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); + } + } +} + +/// @brief Internal constant writer. +void WriteConstantInternal(std::ostream &Out, const Constant *CV, + bool PrintName, + TypeMap& TypeTable, + SlotMachine *Machine) { + const int IndentSize = 4; + static std::string Indent = "\n"; + if (const ConstantBool *CB = dyn_cast(CV)) { + Out << (CB == ConstantBool::True ? "true" : "false"); + } else if (const ConstantSInt *CI = dyn_cast(CV)) { + Out << CI->getValue(); + } else if (const ConstantUInt *CI = dyn_cast(CV)) { + Out << CI->getValue(); + } else if (const ConstantFP *CFP = dyn_cast(CV)) { + // We would like to output the FP constant value in exponential notation, + // but we cannot do this if doing so will lose precision. Check here to + // make sure that we only output it in exponential format if we can parse + // the value back and get the same value. + // + std::string StrVal = ftostr(CFP->getValue()); + + // Check to make sure that the stringized number is not some string like + // "Inf" or NaN, that atof will accept, but the lexer will not. Check that + // the string matches the "[-+]?[0-9]" regex. + // + if ((StrVal[0] >= '0' && StrVal[0] <= '9') || + ((StrVal[0] == '-' || StrVal[0] == '+') && + (StrVal[1] >= '0' && StrVal[1] <= '9'))) + // Reparse stringized version! + if (atof(StrVal.c_str()) == CFP->getValue()) { + Out << StrVal; + return; + } + + // Otherwise we could not reparse it to exactly the same value, so we must + // output the string in hexadecimal format! + assert(sizeof(double) == sizeof(uint64_t) && + "assuming that double is 64 bits!"); + Out << "0x" << utohexstr(DoubleToBits(CFP->getValue())); + + } else if (isa(CV)) { + Out << "zeroinitializer"; + } else if (const ConstantArray *CA = dyn_cast(CV)) { + // As a special case, print the array as a string if it is an array of + // ubytes or an array of sbytes with positive values. + // + const Type *ETy = CA->getType()->getElementType(); + if (CA->isString()) { + Out << "c\""; + PrintEscapedString(CA->getAsString(), Out); + Out << "\""; + + } else { // Cannot output in string format... + Out << '['; + if (CA->getNumOperands()) { + Out << ' '; + printTypeInt(Out, ETy, TypeTable); + WriteAsOperandInternal(Out, CA->getOperand(0), + PrintName, TypeTable, Machine); + for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) { + Out << ", "; + printTypeInt(Out, ETy, TypeTable); + WriteAsOperandInternal(Out, CA->getOperand(i), PrintName, + TypeTable, Machine); + } + } + Out << " ]"; + } + } else if (const ConstantStruct *CS = dyn_cast(CV)) { + Out << '{'; + unsigned N = CS->getNumOperands(); + if (N) { + if (N > 2) { + Indent += std::string(IndentSize, ' '); + Out << Indent; + } else { + Out << ' '; + } + printTypeInt(Out, CS->getOperand(0)->getType(), TypeTable); + + WriteAsOperandInternal(Out, CS->getOperand(0), + PrintName, TypeTable, Machine); + + for (unsigned i = 1; i < N; i++) { + Out << ", "; + if (N > 2) Out << Indent; + printTypeInt(Out, CS->getOperand(i)->getType(), TypeTable); + + WriteAsOperandInternal(Out, CS->getOperand(i), + PrintName, TypeTable, Machine); + } + if (N > 2) Indent.resize(Indent.size() - IndentSize); + } + + Out << " }"; + } else if (const ConstantPacked *CP = dyn_cast(CV)) { + const Type *ETy = CP->getType()->getElementType(); + assert(CP->getNumOperands() > 0 && + "Number of operands for a PackedConst must be > 0"); + Out << '<'; + Out << ' '; + printTypeInt(Out, ETy, TypeTable); + WriteAsOperandInternal(Out, CP->getOperand(0), + PrintName, TypeTable, Machine); + for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) { + Out << ", "; + printTypeInt(Out, ETy, TypeTable); + WriteAsOperandInternal(Out, CP->getOperand(i), PrintName, + TypeTable, Machine); + } + Out << " >"; + } else if (isa(CV)) { + Out << "null"; + + } else if (isa(CV)) { + Out << "undef"; + + } else if (const ConstantExpr *CE = dyn_cast(CV)) { + Out << CE->getOpcodeName() << " ("; + + for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) { + printTypeInt(Out, (*OI)->getType(), TypeTable); + WriteAsOperandInternal(Out, *OI, PrintName, TypeTable, Machine); + if (OI+1 != CE->op_end()) + Out << ", "; + } + + if (CE->getOpcode() == Instruction::Cast) { + Out << " to "; + printTypeInt(Out, CE->getType(), TypeTable); + } + Out << ')'; + + } else { + Out << ""; + } +} + + +/// WriteAsOperand - Write the name of the specified value out to the specified +/// ostream. This can be useful when you just want to print int %reg126, not +/// the whole instruction that generated it. +/// +void WriteAsOperandInternal(std::ostream &Out, const Value *V, + bool PrintName, TypeMap& TypeTable, + SlotMachine *Machine) { + Out << ' '; + if ((PrintName || isa(V)) && V->hasName()) + Out << getLLVMName(V->getName()); + else { + const Constant *CV = dyn_cast(V); + if (CV && !isa(CV)) { + WriteConstantInternal(Out, CV, PrintName, TypeTable, Machine); + } else if (const InlineAsm *IA = dyn_cast(V)) { + Out << "asm "; + if (IA->hasSideEffects()) + Out << "sideeffect "; + Out << '"'; + PrintEscapedString(IA->getAsmString(), Out); + Out << "\", \""; + PrintEscapedString(IA->getConstraintString(), Out); + Out << '"'; + } else { + int Slot = Machine->getSlot(V); + if (Slot != -1) + Out << '%' << Slot; + else + Out << ""; + } + } +} + +/// WriteAsOperand - Write the name of the specified value out to the specified +/// ostream. This can be useful when you just want to print int %reg126, not +/// the whole instruction that generated it. +/// +std::ostream &WriteAsOperand(std::ostream &Out, const Value *V, + bool PrintType, bool PrintName, + const Module *Context) { + TypeMap TypeNames; + if (Context == 0) Context = getModuleFromVal(V); + + if (Context) + fillTypeNameTable(Context, TypeNames); + + if (PrintType) + printTypeInt(Out, V->getType(), TypeNames); + + WriteAsOperandInternal(Out, V, PrintName, TypeNames, 0); + return Out; +} + +/// WriteAsOperandInternal - Write the name of the specified value out to +/// the specified ostream. This can be useful when you just want to print +/// int %reg126, not the whole instruction that generated it. +/// +void WriteAsOperandInternal(std::ostream &Out, const Type *T, + bool PrintName, TypeMap& TypeTable, + SlotMachine *Machine) { + Out << ' '; + int Slot = Machine->getSlot(T); + if (Slot != -1) + Out << '%' << Slot; + else + Out << ""; +} + +/// WriteAsOperand - Write the name of the specified value out to the specified +/// ostream. This can be useful when you just want to print int %reg126, not +/// the whole instruction that generated it. +/// +std::ostream &WriteAsOperand(std::ostream &Out, const Type *Ty, + bool PrintType, bool PrintName, + const Module *Context) { + TypeMap TypeNames; + assert(Context != 0 && "Can't write types as operand without module context"); + + fillTypeNameTable(Context, TypeNames); + + // if (PrintType) + // printTypeInt(Out, V->getType(), TypeNames); + + printTypeInt(Out, Ty, TypeNames); + + WriteAsOperandInternal(Out, Ty, PrintName, TypeNames, 0); + return Out; +} + +class CppWriter { + std::ostream &Out; + SlotMachine &Machine; + const Module *TheModule; + unsigned long uniqueNum; + TypeMap TypeNames; + ValueMap ValueNames; + TypeMap UnresolvedTypes; + TypeList TypeStack; + +public: + inline CppWriter(std::ostream &o, SlotMachine &Mac, const Module *M) + : Out(o), Machine(Mac), TheModule(M), uniqueNum(0), TypeNames(), + ValueNames(), UnresolvedTypes(), TypeStack() { } + + inline void write(const Module *M) { printModule(M); } + inline void write(const GlobalVariable *G) { printGlobal(G); } + inline void write(const Function *F) { printFunction(F); } + inline void write(const BasicBlock *BB) { printBasicBlock(BB); } + inline void write(const Instruction *I) { printInstruction(*I); } + inline void write(const Constant *CPV) { printConstant(CPV); } + inline void write(const Type *Ty) { printType(Ty); } + + void writeOperand(const Value *Op, bool PrintType, bool PrintName = true); + + const Module* getModule() { return TheModule; } + +private: + void printModule(const Module *M); + void printTypes(const Module* M); + void printConstants(const Module* M); + void printConstant(const Constant *CPV); + void printGlobal(const GlobalVariable *GV); + void printFunction(const Function *F); + void printArgument(const Argument *FA); + void printBasicBlock(const BasicBlock *BB); + void printInstruction(const Instruction &I); + void printSymbolTable(const SymbolTable &ST); + void printLinkageType(GlobalValue::LinkageTypes LT); + void printCallingConv(unsigned cc); + + + // printType - Go to extreme measures to attempt to print out a short, + // symbolic version of a type name. + // + std::ostream &printType(const Type *Ty) { + return printTypeInt(Out, Ty, TypeNames); + } + + // printTypeAtLeastOneLevel - Print out one level of the possibly complex type + // without considering any symbolic types that we may have equal to it. + // + std::ostream &printTypeAtLeastOneLevel(const Type *Ty); + + // printInfoComment - Print a little comment after the instruction indicating + // which slot it occupies. + void printInfoComment(const Value &V); + + std::string getCppName(const Type* val); + std::string getCppName(const Value* val); + inline void printCppName(const Value* val); + inline void printCppName(const Type* val); + bool isOnStack(const Type*) const; + inline void printTypeDef(const Type* Ty); + bool printTypeDefInternal(const Type* Ty); +}; + +std::string +CppWriter::getCppName(const Value* val) { + std::string name; + ValueMap::iterator I = ValueNames.find(val); + if (I != ValueNames.end()) { + name = I->second; + } else { + const char* prefix; + switch (val->getType()->getTypeID()) { + case Type::VoidTyID: prefix = "void_"; break; + case Type::BoolTyID: prefix = "bool_"; break; + case Type::UByteTyID: prefix = "ubyte_"; break; + case Type::SByteTyID: prefix = "sbyte_"; break; + case Type::UShortTyID: prefix = "ushort_"; break; + case Type::ShortTyID: prefix = "short_"; break; + case Type::UIntTyID: prefix = "uint_"; break; + case Type::IntTyID: prefix = "int_"; break; + case Type::ULongTyID: prefix = "ulong_"; break; + case Type::LongTyID: prefix = "long_"; break; + case Type::FloatTyID: prefix = "float_"; break; + case Type::DoubleTyID: prefix = "double_"; break; + case Type::LabelTyID: prefix = "label_"; break; + case Type::FunctionTyID: prefix = "func_"; break; + case Type::StructTyID: prefix = "struct_"; break; + case Type::ArrayTyID: prefix = "array_"; break; + case Type::PointerTyID: prefix = "ptr_"; break; + case Type::PackedTyID: prefix = "packed_"; break; + default: prefix = "other_"; break; + } + name = ValueNames[val] = std::string(prefix) + + (val->hasName() ? val->getName() : utostr(uniqueNum++)); + } + return name; +} + +void +CppWriter::printCppName(const Value* val) { + PrintEscapedString(getCppName(val),Out); +} + +void +CppWriter::printCppName(const Type* Ty) +{ + PrintEscapedString(getCppName(Ty),Out); +} + +// Gets the C++ name for a type. Returns true if we already saw the type, +// false otherwise. +// +inline const std::string* +findTypeName(const SymbolTable& ST, const Type* Ty) +{ + SymbolTable::type_const_iterator TI = ST.type_begin(); + SymbolTable::type_const_iterator TE = ST.type_end(); + for (;TI != TE; ++TI) + if (TI->second == Ty) + return &(TI->first); + return 0; +} + +std::string +CppWriter::getCppName(const Type* Ty) +{ + // First, handle the primitive types .. easy + if (Ty->isPrimitiveType()) { + switch (Ty->getTypeID()) { + case Type::VoidTyID: return "Type::VoidTy"; + case Type::BoolTyID: return "Type::BoolTy"; + case Type::UByteTyID: return "Type::UByteTy"; + case Type::SByteTyID: return "Type::SByteTy"; + case Type::UShortTyID: return "Type::UShortTy"; + case Type::ShortTyID: return "Type::ShortTy"; + case Type::UIntTyID: return "Type::UIntTy"; + case Type::IntTyID: return "Type::IntTy"; + case Type::ULongTyID: return "Type::ULongTy"; + case Type::LongTyID: return "Type::LongTy"; + case Type::FloatTyID: return "Type::FloatTy"; + case Type::DoubleTyID: return "Type::DoubleTy"; + case Type::LabelTyID: return "Type::LabelTy"; + default: + assert(!"Can't get here"); + break; + } + return "Type::VoidTy"; // shouldn't be returned, but make it sensible + } + + // Now, see if we've seen the type before and return that + TypeMap::iterator I = TypeNames.find(Ty); + if (I != TypeNames.end()) + return I->second; + + // Okay, let's build a new name for this type. Start with a prefix + const char* prefix = 0; + switch (Ty->getTypeID()) { + case Type::FunctionTyID: prefix = "FuncTy_"; break; + case Type::StructTyID: prefix = "StructTy_"; break; + case Type::ArrayTyID: prefix = "ArrayTy_"; break; + case Type::PointerTyID: prefix = "PointerTy_"; break; + case Type::OpaqueTyID: prefix = "OpaqueTy_"; break; + case Type::PackedTyID: prefix = "PackedTy_"; break; + default: prefix = "OtherTy_"; break; // prevent breakage + } + + // See if the type has a name in the symboltable and build accordingly + const std::string* tName = findTypeName(TheModule->getSymbolTable(), Ty); + std::string name; + if (tName) + name = std::string(prefix) + *tName; + else + name = std::string(prefix) + utostr(uniqueNum++); + + // Save the name + return TypeNames[Ty] = name; +} + +/// printTypeAtLeastOneLevel - Print out one level of the possibly complex type +/// without considering any symbolic types that we may have equal to it. +/// +std::ostream &CppWriter::printTypeAtLeastOneLevel(const Type *Ty) { + if (const FunctionType *FTy = dyn_cast(Ty)) { + printType(FTy->getReturnType()) << " ("; + for (FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); I != E; ++I) { + if (I != FTy->param_begin()) + Out << ", "; + printType(*I); + } + if (FTy->isVarArg()) { + if (FTy->getNumParams()) Out << ", "; + Out << "..."; + } + Out << ')'; + } else if (const StructType *STy = dyn_cast(Ty)) { + Out << "{ "; + for (StructType::element_iterator I = STy->element_begin(), + E = STy->element_end(); I != E; ++I) { + if (I != STy->element_begin()) + Out << ", "; + printType(*I); + } + Out << " }"; + } else if (const PointerType *PTy = dyn_cast(Ty)) { + printType(PTy->getElementType()) << '*'; + } else if (const ArrayType *ATy = dyn_cast(Ty)) { + Out << '[' << ATy->getNumElements() << " x "; + printType(ATy->getElementType()) << ']'; + } else if (const PackedType *PTy = dyn_cast(Ty)) { + Out << '<' << PTy->getNumElements() << " x "; + printType(PTy->getElementType()) << '>'; + } + else if (const OpaqueType *OTy = dyn_cast(Ty)) { + Out << "opaque"; + } else { + if (!Ty->isPrimitiveType()) + Out << ""; + printType(Ty); + } + return Out; +} + + +void CppWriter::writeOperand(const Value *Operand, bool PrintType, + bool PrintName) { + if (Operand != 0) { + if (PrintType) { Out << ' '; printType(Operand->getType()); } + WriteAsOperandInternal(Out, Operand, PrintName, TypeNames, &Machine); + } else { + Out << ""; + } +} + + +void CppWriter::printModule(const Module *M) { + Out << "\n// Module Construction\n"; + Out << "Module* mod = new Module(\""; + PrintEscapedString(M->getModuleIdentifier(),Out); + Out << "\");\n"; + Out << "mod->setEndianness("; + switch (M->getEndianness()) { + case Module::LittleEndian: Out << "Module::LittleEndian);\n"; break; + case Module::BigEndian: Out << "Module::BigEndian);\n"; break; + case Module::AnyEndianness:Out << "Module::AnyEndianness);\n"; break; + } + Out << "mod->setPointerSize("; + switch (M->getPointerSize()) { + case Module::Pointer32: Out << "Module::Pointer32);\n"; break; + case Module::Pointer64: Out << "Module::Pointer64);\n"; break; + case Module::AnyPointerSize: Out << "Module::AnyPointerSize);\n"; break; + } + if (!M->getTargetTriple().empty()) + Out << "mod->setTargetTriple(\"" << M->getTargetTriple() << "\");\n"; + + if (!M->getModuleInlineAsm().empty()) { + Out << "mod->setModuleInlineAsm(\""; + PrintEscapedString(M->getModuleInlineAsm(),Out); + Out << "\");\n"; + } + + // Loop over the dependent libraries and emit them. + Module::lib_iterator LI = M->lib_begin(); + Module::lib_iterator LE = M->lib_end(); + while (LI != LE) { + Out << "mod->addLibrary(\"" << *LI << "\");\n"; + ++LI; + } + + // Print out all the type definitions + Out << "\n// Type Definitions\n"; + printTypes(M); + + // Print out all the constants declarations + Out << "\n// Constants Construction\n"; + printConstants(M); + + // Process the global variables + Out << "\n// Global Variable Construction\n"; + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) { + printGlobal(I); + } + + // Output all of the functions. + Out << "\n// Function Construction\n"; + for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) + printFunction(I); +} + +void +CppWriter::printCallingConv(unsigned cc){ + // Print the calling convention. + switch (cc) { + default: + case CallingConv::C: Out << "CallingConv::C"; break; + case CallingConv::CSRet: Out << "CallingConv::CSRet"; break; + case CallingConv::Fast: Out << "CallingConv::Fast"; break; + case CallingConv::Cold: Out << "CallingConv::Cold"; break; + case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break; + } +} + +void +CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) { + switch (LT) { + case GlobalValue::InternalLinkage: + Out << "GlobalValue::InternalLinkage"; break; + case GlobalValue::LinkOnceLinkage: + Out << "GlobalValue::LinkOnceLinkage "; break; + case GlobalValue::WeakLinkage: + Out << "GlobalValue::WeakLinkage"; break; + case GlobalValue::AppendingLinkage: + Out << "GlobalValue::AppendingLinkage"; break; + case GlobalValue::ExternalLinkage: + Out << "GlobalValue::ExternalLinkage"; break; + case GlobalValue::GhostLinkage: + Out << "GlobalValue::GhostLinkage"; break; + } +} +void CppWriter::printGlobal(const GlobalVariable *GV) { + Out << "\n"; + Out << "GlobalVariable* "; + printCppName(GV); + Out << " = new GlobalVariable(\n"; + Out << " /*Type=*/"; + printCppName(GV->getType()->getElementType()); + Out << ",\n"; + Out << " /*isConstant=*/" << (GV->isConstant()?"true":"false") + << ",\n /*Linkage=*/"; + printLinkageType(GV->getLinkage()); + Out << ",\n /*Initializer=*/"; + if (GV->hasInitializer()) { + printCppName(GV->getInitializer()); + } else { + Out << "0"; + } + Out << ",\n /*Name=*/\""; + PrintEscapedString(GV->getName(),Out); + Out << "\",\n mod);\n"; + + if (GV->hasSection()) { + printCppName(GV); + Out << "->setSection(\""; + PrintEscapedString(GV->getSection(),Out); + Out << "\");\n"; + } + if (GV->getAlignment()) { + printCppName(GV); + Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");\n"; + }; +} + +bool +CppWriter::isOnStack(const Type* Ty) const { + TypeList::const_iterator TI = + std::find(TypeStack.begin(),TypeStack.end(),Ty); + return TI != TypeStack.end(); +} + +// Prints a type definition. Returns true if it could not resolve all the types +// in the definition but had to use a forward reference. +void +CppWriter::printTypeDef(const Type* Ty) { + assert(TypeStack.empty()); + TypeStack.clear(); + printTypeDefInternal(Ty); + assert(TypeStack.empty()); + // early resolve as many unresolved types as possible. Search the unresolved + // types map for the type we just printed. Now that its definition is complete + // we can resolve any preview references to it. This prevents a cascade of + // unresolved types. + TypeMap::iterator I = UnresolvedTypes.find(Ty); + if (I != UnresolvedTypes.end()) { + Out << "cast(" << I->second + << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");\n"; + Out << I->second << " = cast<"; + switch (Ty->getTypeID()) { + case Type::FunctionTyID: Out << "FunctionType"; break; + case Type::ArrayTyID: Out << "ArrayType"; break; + case Type::StructTyID: Out << "StructType"; break; + case Type::PackedTyID: Out << "PackedType"; break; + case Type::PointerTyID: Out << "PointerType"; break; + case Type::OpaqueTyID: Out << "OpaqueType"; break; + default: Out << "NoSuchDerivedType"; break; + } + Out << ">(" << I->second << "_fwd.get());\n"; + UnresolvedTypes.erase(I); + } + Out << "\n"; +} + +bool +CppWriter::printTypeDefInternal(const Type* Ty) { + // We don't print definitions for primitive types + if (Ty->isPrimitiveType()) + return false; + + // Determine if the name is in the name list before we modify that list. + TypeMap::const_iterator TNI = TypeNames.find(Ty); + + // Everything below needs the name for the type so get it now + std::string typeName(getCppName(Ty)); + + // Search the type stack for recursion. If we find it, then generate this + // as an OpaqueType, but make sure not to do this multiple times because + // the type could appear in multiple places on the stack. Once the opaque + // definition is issues, it must not be re-issued. Consequently we have to + // check the UnresolvedTypes list as well. + if (isOnStack(Ty)) { + TypeMap::const_iterator I = UnresolvedTypes.find(Ty); + if (I == UnresolvedTypes.end()) { + Out << "PATypeHolder " << typeName << "_fwd = OpaqueType::get();\n"; + UnresolvedTypes[Ty] = typeName; + return true; + } + } + + // Avoid printing things we have already printed. Since TNI was obtained + // before the name was inserted with getCppName and because we know the name + // is not on the stack (currently being defined), we can surmise here that if + // we got the name we've also already emitted its definition. + if (TNI != TypeNames.end()) + return false; + + // We're going to print a derived type which, by definition, contains other + // types. So, push this one we're printing onto the type stack to assist with + // recursive definitions. + TypeStack.push_back(Ty); // push on type stack + bool didRecurse = false; + + // Print the type definition + switch (Ty->getTypeID()) { + case Type::FunctionTyID: { + const FunctionType* FT = cast(Ty); + Out << "std::vector" << typeName << "_args;\n"; + FunctionType::param_iterator PI = FT->param_begin(); + FunctionType::param_iterator PE = FT->param_end(); + for (; PI != PE; ++PI) { + const Type* argTy = static_cast(*PI); + bool isForward = printTypeDefInternal(argTy); + std::string argName(getCppName(argTy)); + Out << typeName << "_args.push_back(" << argName; + if (isForward) + Out << "_fwd"; + Out << ");\n"; + } + bool isForward = printTypeDefInternal(FT->getReturnType()); + std::string retTypeName(getCppName(FT->getReturnType())); + Out << "FunctionType* " << typeName << " = FunctionType::get(\n" + << " /*Result=*/" << retTypeName; + if (isForward) + Out << "_fwd"; + Out << ",\n /*Params=*/" << typeName << "_args,\n /*isVarArg=*/" + << (FT->isVarArg() ? "true" : "false") << ");\n"; + break; + } + case Type::StructTyID: { + const StructType* ST = cast(Ty); + Out << "std::vector" << typeName << "_fields;\n"; + StructType::element_iterator EI = ST->element_begin(); + StructType::element_iterator EE = ST->element_end(); + for (; EI != EE; ++EI) { + const Type* fieldTy = static_cast(*EI); + bool isForward = printTypeDefInternal(fieldTy); + std::string fieldName(getCppName(fieldTy)); + Out << typeName << "_fields.push_back(" << fieldName; + if (isForward) + Out << "_fwd"; + Out << ");\n"; + } + Out << "StructType* " << typeName << " = StructType::get(" + << typeName << "_fields);\n"; + break; + } + case Type::ArrayTyID: { + const ArrayType* AT = cast(Ty); + const Type* ET = AT->getElementType(); + bool isForward = printTypeDefInternal(ET); + std::string elemName(getCppName(ET)); + Out << "ArrayType* " << typeName << " = ArrayType::get(" + << elemName << (isForward ? "_fwd" : "") + << ", " << utostr(AT->getNumElements()) << ");\n"; + break; + } + case Type::PointerTyID: { + const PointerType* PT = cast(Ty); + const Type* ET = PT->getElementType(); + bool isForward = printTypeDefInternal(ET); + std::string elemName(getCppName(ET)); + Out << "PointerType* " << typeName << " = PointerType::get(" + << elemName << (isForward ? "_fwd" : "") << ");\n"; + break; + } + case Type::PackedTyID: { + const PackedType* PT = cast(Ty); + const Type* ET = PT->getElementType(); + bool isForward = printTypeDefInternal(ET); + std::string elemName(getCppName(ET)); + Out << "PackedType* " << typeName << " = PackedType::get(" + << elemName << (isForward ? "_fwd" : "") + << ", " << utostr(PT->getNumElements()) << ");\n"; + break; + } + case Type::OpaqueTyID: { + const OpaqueType* OT = cast(Ty); + Out << "OpaqueType* " << typeName << " = OpaqueType::get();\n"; + break; + } + default: + assert(!"Invalid TypeID"); + } + + // Pop us off the type stack + TypeStack.pop_back(); + + // We weren't a recursive type + return false; +} + +void +CppWriter::printTypes(const Module* M) { + // Add all of the global variables to the value table... + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + if (I->hasInitializer()) + printTypeDef(I->getInitializer()->getType()); + printTypeDef(I->getType()); + } + + // Add all the functions to the table + for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); + FI != FE; ++FI) { + printTypeDef(FI->getReturnType()); + printTypeDef(FI->getFunctionType()); + // Add all the function arguments + for(Function::const_arg_iterator AI = FI->arg_begin(), + AE = FI->arg_end(); AI != AE; ++AI) { + printTypeDef(AI->getType()); + } + + // Add all of the basic blocks and instructions + for (Function::const_iterator BB = FI->begin(), + E = FI->end(); BB != E; ++BB) { + printTypeDef(BB->getType()); + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; + ++I) { + printTypeDef(I->getType()); + } + } + } +} + +void +CppWriter::printConstants(const Module* M) { + const SymbolTable& ST = M->getSymbolTable(); + + // Print the constants, in type plane order. + for (SymbolTable::plane_const_iterator PI = ST.plane_begin(); + PI != ST.plane_end(); ++PI ) { + SymbolTable::value_const_iterator VI = ST.value_begin(PI->first); + SymbolTable::value_const_iterator VE = ST.value_end(PI->first); + + for (; VI != VE; ++VI) { + const Value* V = VI->second; + const Constant *CPV = dyn_cast(V) ; + if (CPV && !isa(V)) { + printConstant(CPV); + } + } + } + + // Add all of the global variables to the value table... + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) + if (I->hasInitializer()) + printConstant(I->getInitializer()); +} + +// printSymbolTable - Run through symbol table looking for constants +// and types. Emit their declarations. +void CppWriter::printSymbolTable(const SymbolTable &ST) { + + // Print the types. + for (SymbolTable::type_const_iterator TI = ST.type_begin(); + TI != ST.type_end(); ++TI ) { + Out << "\t" << getLLVMName(TI->first) << " = type "; + + // Make sure we print out at least one level of the type structure, so + // that we do not get %FILE = type %FILE + // + printTypeAtLeastOneLevel(TI->second) << "\n"; + } + +} + + +/// printConstant - Print out a constant pool entry... +/// +void CppWriter::printConstant(const Constant *CV) { + const int IndentSize = 2; + static std::string Indent = "\n"; + std::string constName(getCppName(CV)); + std::string typeName(getCppName(CV->getType())); + if (CV->isNullValue()) { + Out << "Constant* " << constName << " = Constant::getNullValue(" + << typeName << ");\n"; + return; + } + if (const ConstantBool *CB = dyn_cast(CV)) { + Out << "Constant* " << constName << " = ConstantBool::get(" + << (CB == ConstantBool::True ? "true" : "false") + << ");"; + } else if (const ConstantSInt *CI = dyn_cast(CV)) { + Out << "Constant* " << constName << " = ConstantSInt::get(" + << typeName << ", " << CI->getValue() << ");"; + } else if (const ConstantUInt *CI = dyn_cast(CV)) { + Out << "Constant* " << constName << " = ConstantUInt::get(" + << typeName << ", " << CI->getValue() << ");"; + } else if (isa(CV)) { + Out << "Constant* " << constName << " = ConstantAggregateZero::get(" + << typeName << ");"; + } else if (isa(CV)) { + Out << "Constant* " << constName << " = ConstanPointerNull::get(" + << typeName << ");"; + } else if (const ConstantFP *CFP = dyn_cast(CV)) { + Out << "ConstantFP::get(" << typeName << ", "; + // We would like to output the FP constant value in exponential notation, + // but we cannot do this if doing so will lose precision. Check here to + // make sure that we only output it in exponential format if we can parse + // the value back and get the same value. + // + std::string StrVal = ftostr(CFP->getValue()); + + // Check to make sure that the stringized number is not some string like + // "Inf" or NaN, that atof will accept, but the lexer will not. Check that + // the string matches the "[-+]?[0-9]" regex. + // + if ((StrVal[0] >= '0' && StrVal[0] <= '9') || + ((StrVal[0] == '-' || StrVal[0] == '+') && + (StrVal[1] >= '0' && StrVal[1] <= '9'))) + // Reparse stringized version! + if (atof(StrVal.c_str()) == CFP->getValue()) { + Out << StrVal; + return; + } + + // Otherwise we could not reparse it to exactly the same value, so we must + // output the string in hexadecimal format! + assert(sizeof(double) == sizeof(uint64_t) && + "assuming that double is 64 bits!"); + Out << "0x" << utohexstr(DoubleToBits(CFP->getValue())) << ");"; + } else if (const ConstantArray *CA = dyn_cast(CV)) { + if (CA->isString()) { + Out << "Constant* " << constName << " = ConstantArray::get(\""; + PrintEscapedString(CA->getAsString(),Out); + Out << "\");"; + } else { + Out << "std::vector " << constName << "_elems;\n"; + unsigned N = CA->getNumOperands(); + for (unsigned i = 0; i < N; ++i) { + printConstant(CA->getOperand(i)); + Out << constName << "_elems.push_back(" + << getCppName(CA->getOperand(i)) << ");\n"; + } + Out << "Constant* " << constName << " = ConstantArray::get(" + << typeName << ", " << constName << "_elems);"; + } + } else if (const ConstantStruct *CS = dyn_cast(CV)) { + Out << "std::vector " << constName << "_fields;\n"; + unsigned N = CS->getNumOperands(); + for (unsigned i = 0; i < N; i++) { + printConstant(CS->getOperand(i)); + Out << constName << "_fields.push_back(" + << getCppName(CA->getOperand(i)) << ");\n"; + } + Out << "Constant* " << constName << " = ConstantStruct::get(" + << typeName << ", " << constName << "_fields);"; + } else if (const ConstantPacked *CP = dyn_cast(CV)) { + Out << "std::vector " << constName << "_elems;\n"; + unsigned N = CP->getNumOperands(); + for (unsigned i = 0; i < N; ++i) { + printConstant(CP->getOperand(i)); + Out << constName << "_elems.push_back(" + << getCppName(CP->getOperand(i)) << ");\n"; + } + Out << "Constant* " << constName << " = ConstantPacked::get(" + << typeName << ", " << constName << "_elems);"; + } else if (isa(CV)) { + Out << "Constant* " << constName << " = UndefValue::get(" + << typeName << ");\n"; + } else if (const ConstantExpr *CE = dyn_cast(CV)) { + Out << CE->getOpcodeName() << " ("; + + for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) { + //printTypeInt(Out, (*OI)->getType(), TypeTable); + //WriteAsOperandInternal(Out, *OI, PrintName, TypeTable, Machine); + if (OI+1 != CE->op_end()) + Out << ", "; + } + + if (CE->getOpcode() == Instruction::Cast) { + Out << " to "; + // printTypeInt(Out, CE->getType(), TypeTable); + } + Out << ')'; + + } else { + Out << ""; + } + Out << "\n"; +} + +/// printFunction - Print all aspects of a function. +/// +void CppWriter::printFunction(const Function *F) { + std::string funcTypeName(getCppName(F->getFunctionType())); + + Out << "Function* "; + printCppName(F); + Out << " = new Function(" << funcTypeName << ", " ; + printLinkageType(F->getLinkage()); + Out << ", \"" << F->getName() << "\", mod);\n"; + printCppName(F); + Out << "->setCallingConv("; + printCallingConv(F->getCallingConv()); + Out << ");\n"; + if (F->hasSection()) { + printCppName(F); + Out << "->setSection(" << F->getSection() << ");\n"; + } + if (F->getAlignment()) { + printCppName(F); + Out << "->setAlignment(" << F->getAlignment() << ");\n"; + } + + Machine.incorporateFunction(F); + + if (!F->isExternal()) { + Out << "{"; + // Output all of its basic blocks... for the function + for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) + printBasicBlock(I); + Out << "}\n"; + } + + Machine.purgeFunction(); +} + +/// printArgument - This member is called for every argument that is passed into +/// the function. Simply print it out +/// +void CppWriter::printArgument(const Argument *Arg) { + // Insert commas as we go... the first arg doesn't get a comma + if (Arg != Arg->getParent()->arg_begin()) Out << ", "; + + // Output type... + printType(Arg->getType()); + + // Output name, if available... + if (Arg->hasName()) + Out << ' ' << getLLVMName(Arg->getName()); +} + +/// printBasicBlock - This member is called for each basic block in a method. +/// +void CppWriter::printBasicBlock(const BasicBlock *BB) { + if (BB->hasName()) { // Print out the label if it exists... + Out << "\n" << getLLVMName(BB->getName(), false) << ':'; + } else if (!BB->use_empty()) { // Don't print block # of no uses... + Out << "\n;