//===- ReaderWrappers.cpp - Parse bytecode from file or buffer -----------===// // // The LLVM Compiler Infrastructure // // This file was developed by the LLVM research group and is distributed under // the University of Illinois Open Source License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements loading and parsing a bytecode file and parsing a // bytecode module from a given buffer. // //===----------------------------------------------------------------------===// #include "llvm/Bytecode/Analyzer.h" #include "llvm/Bytecode/Reader.h" #include "Reader.h" #include "llvm/Module.h" #include "llvm/Instructions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/System/MappedFile.h" #include "llvm/System/Program.h" #include #include #include using namespace llvm; //===----------------------------------------------------------------------===// // BytecodeFileReader - Read from an mmap'able file descriptor. // namespace { /// BytecodeFileReader - parses a bytecode file from a file /// class BytecodeFileReader : public BytecodeReader { private: std::string fileName; sys::MappedFile mapFile; BytecodeFileReader(const BytecodeFileReader&); // Do not implement void operator=(const BytecodeFileReader &BFR); // Do not implement public: BytecodeFileReader(const std::string &Filename, llvm::BytecodeHandler* H=0); bool read(std::string* ErrMsg); void freeState() { BytecodeReader::freeState(); mapFile.close(); } }; } BytecodeFileReader::BytecodeFileReader(const std::string &Filename, llvm::BytecodeHandler* H) : BytecodeReader(H), fileName(Filename) { } bool BytecodeFileReader::read(std::string* ErrMsg) { if (mapFile.open(sys::Path(fileName), sys::MappedFile::READ_ACCESS, ErrMsg)) return true; if (!mapFile.map(ErrMsg)) { mapFile.close(); return true; } unsigned char* buffer = reinterpret_cast(mapFile.base()); return ParseBytecode(buffer, mapFile.size(), fileName, ErrMsg); } //===----------------------------------------------------------------------===// // BytecodeBufferReader - Read from a memory buffer // namespace { /// BytecodeBufferReader - parses a bytecode file from a buffer /// class BytecodeBufferReader : public BytecodeReader { private: const unsigned char *Buffer; const unsigned char *Buf; unsigned Length; std::string ModuleID; bool MustDelete; BytecodeBufferReader(const BytecodeBufferReader&); // Do not implement void operator=(const BytecodeBufferReader &BFR); // Do not implement public: BytecodeBufferReader(const unsigned char *Buf, unsigned Length, const std::string &ModuleID, llvm::BytecodeHandler* Handler = 0); ~BytecodeBufferReader(); bool read(std::string* ErrMsg); }; } BytecodeBufferReader::BytecodeBufferReader(const unsigned char *buf, unsigned len, const std::string &modID, llvm::BytecodeHandler *H) : BytecodeReader(H), Buffer(0), Buf(buf), Length(len), ModuleID(modID) , MustDelete(false) { } BytecodeBufferReader::~BytecodeBufferReader() { if (MustDelete) delete [] Buffer; } bool BytecodeBufferReader::read(std::string* ErrMsg) { // If not aligned, allocate a new buffer to hold the bytecode... const unsigned char *ParseBegin = 0; if (reinterpret_cast(Buf) & 3) { Buffer = new unsigned char[Length+4]; unsigned Offset = 4 - ((intptr_t)Buffer & 3); // Make sure it's aligned ParseBegin = Buffer + Offset; memcpy((unsigned char*)ParseBegin, Buf, Length); // Copy it over MustDelete = true; } else { // If we don't need to copy it over, just use the caller's copy ParseBegin = Buffer = Buf; MustDelete = false; } if (ParseBytecode(ParseBegin, Length, ModuleID, ErrMsg)) { if (MustDelete) delete [] Buffer; return true; } return false; } //===----------------------------------------------------------------------===// // BytecodeStdinReader - Read bytecode from Standard Input // namespace { /// BytecodeStdinReader - parses a bytecode file from stdin /// class BytecodeStdinReader : public BytecodeReader { private: std::vector FileData; unsigned char *FileBuf; BytecodeStdinReader(const BytecodeStdinReader&); // Do not implement void operator=(const BytecodeStdinReader &BFR); // Do not implement public: BytecodeStdinReader( llvm::BytecodeHandler* H = 0 ); bool read(std::string* ErrMsg); }; } BytecodeStdinReader::BytecodeStdinReader( BytecodeHandler* H ) : BytecodeReader(H) { } bool BytecodeStdinReader::read(std::string* ErrMsg) { sys::Program::ChangeStdinToBinary(); char Buffer[4096*4]; // Read in all of the data from stdin, we cannot mmap stdin... while (std::cin.good()) { std::cin.read(Buffer, 4096*4); int BlockSize = std::cin.gcount(); if (0 >= BlockSize) break; FileData.insert(FileData.end(), Buffer, Buffer+BlockSize); } if (FileData.empty()) { if (ErrMsg) *ErrMsg = "Standard Input is empty!"; return true; } FileBuf = &FileData[0]; if (ParseBytecode(FileBuf, FileData.size(), "", ErrMsg)) return true; return false; } //===----------------------------------------------------------------------===// // Varargs transmogrification code... // // CheckVarargs - This is used to automatically translate old-style varargs to // new style varargs for backwards compatibility. static ModuleProvider* CheckVarargs(ModuleProvider* MP) { Module* M = MP->getModule(); // check to see if va_start takes arguements... Function* F = M->getNamedFunction("llvm.va_start"); if(F == 0) return MP; //No varargs use, just return. if (F->getFunctionType()->getNumParams() == 1) return MP; // Modern varargs processing, just return. // If we get to this point, we know that we have an old-style module. // Materialize the whole thing to perform the rewriting. if (MP->materializeModule() == 0) return 0; if(Function* F = M->getNamedFunction("llvm.va_start")) { assert(F->arg_size() == 0 && "Obsolete va_start takes 0 argument!"); //foo = va_start() // -> //bar = alloca typeof(foo) //va_start(bar) //foo = load bar const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID); const Type* ArgTy = F->getFunctionType()->getReturnType(); const Type* ArgTyPtr = PointerType::get(ArgTy); Function* NF = M->getOrInsertFunction("llvm.va_start", RetTy, ArgTyPtr, (Type *)0); for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;) if (CallInst* CI = dyn_cast(*I++)) { AllocaInst* bar = new AllocaInst(ArgTy, 0, "vastart.fix.1", CI); new CallInst(NF, bar, "", CI); Value* foo = new LoadInst(bar, "vastart.fix.2", CI); CI->replaceAllUsesWith(foo); CI->getParent()->getInstList().erase(CI); } F->setName(""); } if(Function* F = M->getNamedFunction("llvm.va_end")) { assert(F->arg_size() == 1 && "Obsolete va_end takes 1 argument!"); //vaend foo // -> //bar = alloca 1 of typeof(foo) //vaend bar const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID); const Type* ArgTy = F->getFunctionType()->getParamType(0); const Type* ArgTyPtr = PointerType::get(ArgTy); Function* NF = M->getOrInsertFunction("llvm.va_end", RetTy, ArgTyPtr, (Type *)0); for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;) if (CallInst* CI = dyn_cast(*I++)) { AllocaInst* bar = new AllocaInst(ArgTy, 0, "vaend.fix.1", CI); new StoreInst(CI->getOperand(1), bar, CI); new CallInst(NF, bar, "", CI); CI->getParent()->getInstList().erase(CI); } F->setName(""); } if(Function* F = M->getNamedFunction("llvm.va_copy")) { assert(F->arg_size() == 1 && "Obsolete va_copy takes 1 argument!"); //foo = vacopy(bar) // -> //a = alloca 1 of typeof(foo) //b = alloca 1 of typeof(foo) //store bar -> b //vacopy(a, b) //foo = load a const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID); const Type* ArgTy = F->getFunctionType()->getReturnType(); const Type* ArgTyPtr = PointerType::get(ArgTy); Function* NF = M->getOrInsertFunction("llvm.va_copy", RetTy, ArgTyPtr, ArgTyPtr, (Type *)0); for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;) if (CallInst* CI = dyn_cast(*I++)) { AllocaInst* a = new AllocaInst(ArgTy, 0, "vacopy.fix.1", CI); AllocaInst* b = new AllocaInst(ArgTy, 0, "vacopy.fix.2", CI); new StoreInst(CI->getOperand(1), b, CI); new CallInst(NF, a, b, "", CI); Value* foo = new LoadInst(a, "vacopy.fix.3", CI); CI->replaceAllUsesWith(foo); CI->getParent()->getInstList().erase(CI); } F->setName(""); } return MP; } //===----------------------------------------------------------------------===// // Wrapper functions //===----------------------------------------------------------------------===// /// getBytecodeBufferModuleProvider - lazy function-at-a-time loading from a /// buffer ModuleProvider* llvm::getBytecodeBufferModuleProvider(const unsigned char *Buffer, unsigned Length, const std::string &ModuleID, std::string *ErrMsg, BytecodeHandler *H) { BytecodeBufferReader* rdr = new BytecodeBufferReader(Buffer, Length, ModuleID, H); if (rdr->read(ErrMsg)) return 0; return CheckVarargs(rdr); } /// ParseBytecodeBuffer - Parse a given bytecode buffer /// Module *llvm::ParseBytecodeBuffer(const unsigned char *Buffer, unsigned Length, const std::string &ModuleID, std::string *ErrMsg) { ModuleProvider *MP = getBytecodeBufferModuleProvider(Buffer, Length, ModuleID, ErrMsg, 0); if (!MP) return 0; Module *M = MP->releaseModule(); delete MP; return M; } /// getBytecodeModuleProvider - lazy function-at-a-time loading from a file /// ModuleProvider * llvm::getBytecodeModuleProvider(const std::string &Filename, std::string* ErrMsg, BytecodeHandler* H) { // Read from a file if (Filename != std::string("-")) { BytecodeFileReader* rdr = new BytecodeFileReader(Filename, H); if (rdr->read(ErrMsg)) return 0; return CheckVarargs(rdr); } // Read from stdin BytecodeStdinReader* rdr = new BytecodeStdinReader(H); if (rdr->read(ErrMsg)) return 0; return CheckVarargs(rdr); } /// ParseBytecodeFile - Parse the given bytecode file /// Module *llvm::ParseBytecodeFile(const std::string &Filename, std::string *ErrMsg) { ModuleProvider* MP = getBytecodeModuleProvider(Filename, ErrMsg); if (!MP) return 0; Module *M = MP->releaseModule(); delete MP; return M; } // AnalyzeBytecodeFile - analyze one file Module* llvm::AnalyzeBytecodeFile( const std::string &Filename, ///< File to analyze BytecodeAnalysis& bca, ///< Statistical output std::string *ErrMsg, ///< Error output std::ostream* output ///< Dump output ) { BytecodeHandler* AH = createBytecodeAnalyzerHandler(bca,output); ModuleProvider* MP = getBytecodeModuleProvider(Filename, ErrMsg, AH); if (!MP) return 0; Module *M = MP->releaseModule(); delete MP; return M; } // AnalyzeBytecodeBuffer - analyze a buffer Module* llvm::AnalyzeBytecodeBuffer( const unsigned char* Buffer, ///< Pointer to start of bytecode buffer unsigned Length, ///< Size of the bytecode buffer const std::string& ModuleID, ///< Identifier for the module BytecodeAnalysis& bca, ///< The results of the analysis std::string* ErrMsg, ///< Errors, if any. std::ostream* output ///< Dump output, if any ) { BytecodeHandler* hdlr = createBytecodeAnalyzerHandler(bca, output); ModuleProvider* MP = getBytecodeBufferModuleProvider(Buffer, Length, ModuleID, ErrMsg, hdlr); if (!MP) return 0; Module *M = MP->releaseModule(); delete MP; return M; } bool llvm::GetBytecodeDependentLibraries(const std::string &fname, Module::LibraryListType& deplibs, std::string* ErrMsg) { ModuleProvider* MP = getBytecodeModuleProvider(fname, ErrMsg); if (!MP) { deplibs.clear(); return true; } Module* M = MP->releaseModule(); deplibs = M->getLibraries(); delete M; delete MP; return false; } static void getSymbols(Module*M, std::vector& symbols) { // Loop over global variables for (Module::global_iterator GI = M->global_begin(), GE=M->global_end(); GI != GE; ++GI) if (!GI->isExternal() && !GI->hasInternalLinkage()) if (!GI->getName().empty()) symbols.push_back(GI->getName()); // Loop over functions. for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) if (!FI->isExternal() && !FI->hasInternalLinkage()) if (!FI->getName().empty()) symbols.push_back(FI->getName()); } // Get just the externally visible defined symbols from the bytecode bool llvm::GetBytecodeSymbols(const sys::Path& fName, std::vector& symbols, std::string* ErrMsg) { ModuleProvider *MP = getBytecodeModuleProvider(fName.toString(), ErrMsg); if (!MP) return true; // Get the module from the provider Module* M = MP->materializeModule(); if (M == 0) { delete MP; return true; } // Get the symbols getSymbols(M, symbols); // Done with the module. delete MP; return true; } ModuleProvider* llvm::GetBytecodeSymbols(const unsigned char*Buffer, unsigned Length, const std::string& ModuleID, std::vector& symbols, std::string* ErrMsg) { // Get the module provider ModuleProvider* MP = getBytecodeBufferModuleProvider(Buffer, Length, ModuleID, ErrMsg, 0); if (!MP) return 0; // Get the module from the provider Module* M = MP->materializeModule(); if (M == 0) { delete MP; return 0; } // Get the symbols getSymbols(M, symbols); // Done with the module. Note that ModuleProvider will delete the // Module when it is deleted. Also note that its the caller's responsibility // to delete the ModuleProvider. return MP; }