mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-18 13:34:04 +00:00
f0edc6c18b
with ParseBytecodeFile used to leak both a ModuleProvider (and related bytecode parser stuff attached to it) AND a file descriptor, which was never closed. This prevented gccld/llvm-ld/llvm-link from linking together apps with more that ~252 .bc files on darwin. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30912 91177308-0d34-0410-b5e6-96231b3b80d8
463 lines
15 KiB
C++
463 lines
15 KiB
C++
//===- ReaderWrappers.cpp - Parse bytecode from file or buffer -----------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file was developed by the LLVM research group and is distributed under
|
|
// the University of Illinois Open Source License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements loading and parsing a bytecode file and parsing a
|
|
// bytecode module from a given buffer.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/Bytecode/Analyzer.h"
|
|
#include "llvm/Bytecode/Reader.h"
|
|
#include "Reader.h"
|
|
#include "llvm/Module.h"
|
|
#include "llvm/Instructions.h"
|
|
#include "llvm/ADT/StringExtras.h"
|
|
#include "llvm/System/MappedFile.h"
|
|
#include "llvm/System/Program.h"
|
|
#include <cerrno>
|
|
#include <iostream>
|
|
#include <memory>
|
|
|
|
using namespace llvm;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// BytecodeFileReader - Read from an mmap'able file descriptor.
|
|
//
|
|
|
|
namespace {
|
|
/// BytecodeFileReader - parses a bytecode file from a file
|
|
///
|
|
class BytecodeFileReader : public BytecodeReader {
|
|
private:
|
|
std::string fileName;
|
|
sys::MappedFile mapFile;
|
|
|
|
BytecodeFileReader(const BytecodeFileReader&); // Do not implement
|
|
void operator=(const BytecodeFileReader &BFR); // Do not implement
|
|
|
|
public:
|
|
BytecodeFileReader(const std::string &Filename, llvm::BytecodeHandler* H=0);
|
|
bool read(std::string* ErrMsg);
|
|
|
|
void freeState() {
|
|
BytecodeReader::freeState();
|
|
mapFile.close();
|
|
}
|
|
};
|
|
}
|
|
|
|
BytecodeFileReader::BytecodeFileReader(const std::string &Filename,
|
|
llvm::BytecodeHandler* H)
|
|
: BytecodeReader(H), fileName(Filename) {
|
|
}
|
|
|
|
bool BytecodeFileReader::read(std::string* ErrMsg) {
|
|
if (mapFile.open(sys::Path(fileName), sys::MappedFile::READ_ACCESS, ErrMsg))
|
|
return true;
|
|
if (!mapFile.map(ErrMsg)) {
|
|
mapFile.close();
|
|
return true;
|
|
}
|
|
unsigned char* buffer = reinterpret_cast<unsigned char*>(mapFile.base());
|
|
return ParseBytecode(buffer, mapFile.size(), fileName, ErrMsg);
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// BytecodeBufferReader - Read from a memory buffer
|
|
//
|
|
|
|
namespace {
|
|
/// BytecodeBufferReader - parses a bytecode file from a buffer
|
|
///
|
|
class BytecodeBufferReader : public BytecodeReader {
|
|
private:
|
|
const unsigned char *Buffer;
|
|
const unsigned char *Buf;
|
|
unsigned Length;
|
|
std::string ModuleID;
|
|
bool MustDelete;
|
|
|
|
BytecodeBufferReader(const BytecodeBufferReader&); // Do not implement
|
|
void operator=(const BytecodeBufferReader &BFR); // Do not implement
|
|
|
|
public:
|
|
BytecodeBufferReader(const unsigned char *Buf, unsigned Length,
|
|
const std::string &ModuleID,
|
|
llvm::BytecodeHandler* Handler = 0);
|
|
~BytecodeBufferReader();
|
|
|
|
bool read(std::string* ErrMsg);
|
|
|
|
};
|
|
}
|
|
|
|
BytecodeBufferReader::BytecodeBufferReader(const unsigned char *buf,
|
|
unsigned len,
|
|
const std::string &modID,
|
|
llvm::BytecodeHandler *H)
|
|
: BytecodeReader(H), Buffer(0), Buf(buf), Length(len), ModuleID(modID)
|
|
, MustDelete(false) {
|
|
}
|
|
|
|
BytecodeBufferReader::~BytecodeBufferReader() {
|
|
if (MustDelete) delete [] Buffer;
|
|
}
|
|
|
|
bool
|
|
BytecodeBufferReader::read(std::string* ErrMsg) {
|
|
// If not aligned, allocate a new buffer to hold the bytecode...
|
|
const unsigned char *ParseBegin = 0;
|
|
if (reinterpret_cast<uint64_t>(Buf) & 3) {
|
|
Buffer = new unsigned char[Length+4];
|
|
unsigned Offset = 4 - ((intptr_t)Buffer & 3); // Make sure it's aligned
|
|
ParseBegin = Buffer + Offset;
|
|
memcpy((unsigned char*)ParseBegin, Buf, Length); // Copy it over
|
|
MustDelete = true;
|
|
} else {
|
|
// If we don't need to copy it over, just use the caller's copy
|
|
ParseBegin = Buffer = Buf;
|
|
MustDelete = false;
|
|
}
|
|
if (ParseBytecode(ParseBegin, Length, ModuleID, ErrMsg)) {
|
|
if (MustDelete) delete [] Buffer;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// BytecodeStdinReader - Read bytecode from Standard Input
|
|
//
|
|
|
|
namespace {
|
|
/// BytecodeStdinReader - parses a bytecode file from stdin
|
|
///
|
|
class BytecodeStdinReader : public BytecodeReader {
|
|
private:
|
|
std::vector<unsigned char> FileData;
|
|
unsigned char *FileBuf;
|
|
|
|
BytecodeStdinReader(const BytecodeStdinReader&); // Do not implement
|
|
void operator=(const BytecodeStdinReader &BFR); // Do not implement
|
|
|
|
public:
|
|
BytecodeStdinReader( llvm::BytecodeHandler* H = 0 );
|
|
bool read(std::string* ErrMsg);
|
|
};
|
|
}
|
|
|
|
BytecodeStdinReader::BytecodeStdinReader( BytecodeHandler* H )
|
|
: BytecodeReader(H)
|
|
{
|
|
}
|
|
|
|
bool
|
|
BytecodeStdinReader::read(std::string* ErrMsg)
|
|
{
|
|
sys::Program::ChangeStdinToBinary();
|
|
char Buffer[4096*4];
|
|
|
|
// Read in all of the data from stdin, we cannot mmap stdin...
|
|
while (std::cin.good()) {
|
|
std::cin.read(Buffer, 4096*4);
|
|
int BlockSize = std::cin.gcount();
|
|
if (0 >= BlockSize)
|
|
break;
|
|
FileData.insert(FileData.end(), Buffer, Buffer+BlockSize);
|
|
}
|
|
|
|
if (FileData.empty()) {
|
|
if (ErrMsg)
|
|
*ErrMsg = "Standard Input is empty!";
|
|
return true;
|
|
}
|
|
|
|
FileBuf = &FileData[0];
|
|
if (ParseBytecode(FileBuf, FileData.size(), "<stdin>", ErrMsg))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Varargs transmogrification code...
|
|
//
|
|
|
|
// CheckVarargs - This is used to automatically translate old-style varargs to
|
|
// new style varargs for backwards compatibility.
|
|
static ModuleProvider* CheckVarargs(ModuleProvider* MP) {
|
|
Module* M = MP->getModule();
|
|
|
|
// check to see if va_start takes arguements...
|
|
Function* F = M->getNamedFunction("llvm.va_start");
|
|
if(F == 0) return MP; //No varargs use, just return.
|
|
|
|
if (F->getFunctionType()->getNumParams() == 1)
|
|
return MP; // Modern varargs processing, just return.
|
|
|
|
// If we get to this point, we know that we have an old-style module.
|
|
// Materialize the whole thing to perform the rewriting.
|
|
if (MP->materializeModule() == 0)
|
|
return 0;
|
|
|
|
if(Function* F = M->getNamedFunction("llvm.va_start")) {
|
|
assert(F->arg_size() == 0 && "Obsolete va_start takes 0 argument!");
|
|
|
|
//foo = va_start()
|
|
// ->
|
|
//bar = alloca typeof(foo)
|
|
//va_start(bar)
|
|
//foo = load bar
|
|
|
|
const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID);
|
|
const Type* ArgTy = F->getFunctionType()->getReturnType();
|
|
const Type* ArgTyPtr = PointerType::get(ArgTy);
|
|
Function* NF = M->getOrInsertFunction("llvm.va_start",
|
|
RetTy, ArgTyPtr, (Type *)0);
|
|
|
|
for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;)
|
|
if (CallInst* CI = dyn_cast<CallInst>(*I++)) {
|
|
AllocaInst* bar = new AllocaInst(ArgTy, 0, "vastart.fix.1", CI);
|
|
new CallInst(NF, bar, "", CI);
|
|
Value* foo = new LoadInst(bar, "vastart.fix.2", CI);
|
|
CI->replaceAllUsesWith(foo);
|
|
CI->getParent()->getInstList().erase(CI);
|
|
}
|
|
F->setName("");
|
|
}
|
|
|
|
if(Function* F = M->getNamedFunction("llvm.va_end")) {
|
|
assert(F->arg_size() == 1 && "Obsolete va_end takes 1 argument!");
|
|
//vaend foo
|
|
// ->
|
|
//bar = alloca 1 of typeof(foo)
|
|
//vaend bar
|
|
const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID);
|
|
const Type* ArgTy = F->getFunctionType()->getParamType(0);
|
|
const Type* ArgTyPtr = PointerType::get(ArgTy);
|
|
Function* NF = M->getOrInsertFunction("llvm.va_end",
|
|
RetTy, ArgTyPtr, (Type *)0);
|
|
|
|
for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;)
|
|
if (CallInst* CI = dyn_cast<CallInst>(*I++)) {
|
|
AllocaInst* bar = new AllocaInst(ArgTy, 0, "vaend.fix.1", CI);
|
|
new StoreInst(CI->getOperand(1), bar, CI);
|
|
new CallInst(NF, bar, "", CI);
|
|
CI->getParent()->getInstList().erase(CI);
|
|
}
|
|
F->setName("");
|
|
}
|
|
|
|
if(Function* F = M->getNamedFunction("llvm.va_copy")) {
|
|
assert(F->arg_size() == 1 && "Obsolete va_copy takes 1 argument!");
|
|
//foo = vacopy(bar)
|
|
// ->
|
|
//a = alloca 1 of typeof(foo)
|
|
//b = alloca 1 of typeof(foo)
|
|
//store bar -> b
|
|
//vacopy(a, b)
|
|
//foo = load a
|
|
|
|
const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID);
|
|
const Type* ArgTy = F->getFunctionType()->getReturnType();
|
|
const Type* ArgTyPtr = PointerType::get(ArgTy);
|
|
Function* NF = M->getOrInsertFunction("llvm.va_copy",
|
|
RetTy, ArgTyPtr, ArgTyPtr, (Type *)0);
|
|
|
|
for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;)
|
|
if (CallInst* CI = dyn_cast<CallInst>(*I++)) {
|
|
AllocaInst* a = new AllocaInst(ArgTy, 0, "vacopy.fix.1", CI);
|
|
AllocaInst* b = new AllocaInst(ArgTy, 0, "vacopy.fix.2", CI);
|
|
new StoreInst(CI->getOperand(1), b, CI);
|
|
new CallInst(NF, a, b, "", CI);
|
|
Value* foo = new LoadInst(a, "vacopy.fix.3", CI);
|
|
CI->replaceAllUsesWith(foo);
|
|
CI->getParent()->getInstList().erase(CI);
|
|
}
|
|
F->setName("");
|
|
}
|
|
return MP;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Wrapper functions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// getBytecodeBufferModuleProvider - lazy function-at-a-time loading from a
|
|
/// buffer
|
|
ModuleProvider*
|
|
llvm::getBytecodeBufferModuleProvider(const unsigned char *Buffer,
|
|
unsigned Length,
|
|
const std::string &ModuleID,
|
|
std::string *ErrMsg,
|
|
BytecodeHandler *H) {
|
|
BytecodeBufferReader* rdr =
|
|
new BytecodeBufferReader(Buffer, Length, ModuleID, H);
|
|
if (rdr->read(ErrMsg))
|
|
return 0;
|
|
return CheckVarargs(rdr);
|
|
}
|
|
|
|
/// ParseBytecodeBuffer - Parse a given bytecode buffer
|
|
///
|
|
Module *llvm::ParseBytecodeBuffer(const unsigned char *Buffer, unsigned Length,
|
|
const std::string &ModuleID,
|
|
std::string *ErrMsg) {
|
|
ModuleProvider *MP =
|
|
getBytecodeBufferModuleProvider(Buffer, Length, ModuleID, ErrMsg, 0);
|
|
if (!MP) return 0;
|
|
Module *M = MP->releaseModule();
|
|
delete MP;
|
|
return M;
|
|
}
|
|
|
|
/// getBytecodeModuleProvider - lazy function-at-a-time loading from a file
|
|
///
|
|
ModuleProvider *
|
|
llvm::getBytecodeModuleProvider(const std::string &Filename,
|
|
std::string* ErrMsg,
|
|
BytecodeHandler* H) {
|
|
// Read from a file
|
|
if (Filename != std::string("-")) {
|
|
BytecodeFileReader* rdr = new BytecodeFileReader(Filename, H);
|
|
if (rdr->read(ErrMsg))
|
|
return 0;
|
|
return CheckVarargs(rdr);
|
|
}
|
|
|
|
// Read from stdin
|
|
BytecodeStdinReader* rdr = new BytecodeStdinReader(H);
|
|
if (rdr->read(ErrMsg))
|
|
return 0;
|
|
return CheckVarargs(rdr);
|
|
}
|
|
|
|
/// ParseBytecodeFile - Parse the given bytecode file
|
|
///
|
|
Module *llvm::ParseBytecodeFile(const std::string &Filename,
|
|
std::string *ErrMsg) {
|
|
ModuleProvider* MP = getBytecodeModuleProvider(Filename, ErrMsg);
|
|
if (!MP) return 0;
|
|
Module *M = MP->releaseModule();
|
|
delete MP;
|
|
return M;
|
|
}
|
|
|
|
// AnalyzeBytecodeFile - analyze one file
|
|
Module* llvm::AnalyzeBytecodeFile(
|
|
const std::string &Filename, ///< File to analyze
|
|
BytecodeAnalysis& bca, ///< Statistical output
|
|
std::string *ErrMsg, ///< Error output
|
|
std::ostream* output ///< Dump output
|
|
) {
|
|
BytecodeHandler* AH = createBytecodeAnalyzerHandler(bca,output);
|
|
ModuleProvider* MP = getBytecodeModuleProvider(Filename, ErrMsg, AH);
|
|
if (!MP) return 0;
|
|
Module *M = MP->releaseModule();
|
|
delete MP;
|
|
return M;
|
|
}
|
|
|
|
// AnalyzeBytecodeBuffer - analyze a buffer
|
|
Module* llvm::AnalyzeBytecodeBuffer(
|
|
const unsigned char* Buffer, ///< Pointer to start of bytecode buffer
|
|
unsigned Length, ///< Size of the bytecode buffer
|
|
const std::string& ModuleID, ///< Identifier for the module
|
|
BytecodeAnalysis& bca, ///< The results of the analysis
|
|
std::string* ErrMsg, ///< Errors, if any.
|
|
std::ostream* output ///< Dump output, if any
|
|
)
|
|
{
|
|
BytecodeHandler* hdlr = createBytecodeAnalyzerHandler(bca, output);
|
|
ModuleProvider* MP =
|
|
getBytecodeBufferModuleProvider(Buffer, Length, ModuleID, ErrMsg, hdlr);
|
|
if (!MP) return 0;
|
|
Module *M = MP->releaseModule();
|
|
delete MP;
|
|
return M;
|
|
}
|
|
|
|
bool llvm::GetBytecodeDependentLibraries(const std::string &fname,
|
|
Module::LibraryListType& deplibs,
|
|
std::string* ErrMsg) {
|
|
ModuleProvider* MP = getBytecodeModuleProvider(fname, ErrMsg);
|
|
if (!MP) {
|
|
deplibs.clear();
|
|
return true;
|
|
}
|
|
Module* M = MP->releaseModule();
|
|
deplibs = M->getLibraries();
|
|
delete M;
|
|
delete MP;
|
|
return false;
|
|
}
|
|
|
|
static void getSymbols(Module*M, std::vector<std::string>& symbols) {
|
|
// Loop over global variables
|
|
for (Module::global_iterator GI = M->global_begin(), GE=M->global_end(); GI != GE; ++GI)
|
|
if (!GI->isExternal() && !GI->hasInternalLinkage())
|
|
if (!GI->getName().empty())
|
|
symbols.push_back(GI->getName());
|
|
|
|
// Loop over functions.
|
|
for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
|
|
if (!FI->isExternal() && !FI->hasInternalLinkage())
|
|
if (!FI->getName().empty())
|
|
symbols.push_back(FI->getName());
|
|
}
|
|
|
|
// Get just the externally visible defined symbols from the bytecode
|
|
bool llvm::GetBytecodeSymbols(const sys::Path& fName,
|
|
std::vector<std::string>& symbols,
|
|
std::string* ErrMsg) {
|
|
ModuleProvider *MP = getBytecodeModuleProvider(fName.toString(), ErrMsg);
|
|
if (!MP)
|
|
return true;
|
|
|
|
// Get the module from the provider
|
|
Module* M = MP->materializeModule();
|
|
if (M == 0) {
|
|
delete MP;
|
|
return true;
|
|
}
|
|
|
|
// Get the symbols
|
|
getSymbols(M, symbols);
|
|
|
|
// Done with the module.
|
|
delete MP;
|
|
return true;
|
|
}
|
|
|
|
ModuleProvider*
|
|
llvm::GetBytecodeSymbols(const unsigned char*Buffer, unsigned Length,
|
|
const std::string& ModuleID,
|
|
std::vector<std::string>& symbols,
|
|
std::string* ErrMsg) {
|
|
// Get the module provider
|
|
ModuleProvider* MP =
|
|
getBytecodeBufferModuleProvider(Buffer, Length, ModuleID, ErrMsg, 0);
|
|
if (!MP)
|
|
return 0;
|
|
|
|
// Get the module from the provider
|
|
Module* M = MP->materializeModule();
|
|
if (M == 0) {
|
|
delete MP;
|
|
return 0;
|
|
}
|
|
|
|
// Get the symbols
|
|
getSymbols(M, symbols);
|
|
|
|
// Done with the module. Note that ModuleProvider will delete the
|
|
// Module when it is deleted. Also note that its the caller's responsibility
|
|
// to delete the ModuleProvider.
|
|
return MP;
|
|
}
|