llvm-6502/lib/Transforms/IPO/Internalize.cpp
Duncan P. N. Exon Smith 3e4542b2ca Reapply "LTO: add API to set strategy for -internalize"
Reapply r199191, reverted in r199197 because it carelessly broke
Other/link-opts.ll.  The problem was that calling
createInternalizePass("main") would select
createInternalizePass(bool("main")) instead of
createInternalizePass(ArrayRef<const char *>("main")).  This commit
fixes the bug.

The original commit message follows.

Add API to LTOCodeGenerator to specify a strategy for the -internalize
pass.

This is a new attempt at Bill's change in r185882, which he reverted in
r188029 due to problems with the gold linker.  This puts the onus on the
linker to decide whether (and what) to internalize.

In particular, running internalize before outputting an object file may
change a 'weak' symbol into an internal one, even though that symbol
could be needed by an external object file --- e.g., with arclite.

This patch enables three strategies:

- LTO_INTERNALIZE_FULL: the default (and the old behaviour).
- LTO_INTERNALIZE_NONE: skip -internalize.
- LTO_INTERNALIZE_HIDDEN: only -internalize symbols with hidden
  visibility.

LTO_INTERNALIZE_FULL should be used when linking an executable.

Outputting an object file (e.g., via ld -r) is more complicated, and
depends on whether hidden symbols should be internalized.  E.g., for
ld -r, LTO_INTERNALIZE_NONE can be used when -keep_private_externs, and
LTO_INTERNALIZE_HIDDEN can be used otherwise.  However,
LTO_INTERNALIZE_FULL is inappropriate, since the output object file will
eventually need to link with others.

lto_codegen_set_internalize_strategy() sets the strategy for subsequent
calls to lto_codegen_write_merged_modules() and lto_codegen_compile*().

<rdar://problem/14334895>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199244 91177308-0d34-0410-b5e6-96231b3b80d8
2014-01-14 18:52:17 +00:00

237 lines
8.3 KiB
C++

//===-- Internalize.cpp - Mark functions internal -------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass loops over all of the functions and variables in the input module.
// If the function or variable is not in the list of external names given to
// the pass it is marked as internal.
//
// This transformation would not be legal in a regular compilation, but it gets
// extra information from the linker about what is safe.
//
// For example: Internalizing a function with external linkage. Only if we are
// told it is only used from within this module, it is safe to do it.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "internalize"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <fstream>
#include <set>
using namespace llvm;
STATISTIC(NumAliases , "Number of aliases internalized");
STATISTIC(NumFunctions, "Number of functions internalized");
STATISTIC(NumGlobals , "Number of global vars internalized");
// APIFile - A file which contains a list of symbols that should not be marked
// external.
static cl::opt<std::string>
APIFile("internalize-public-api-file", cl::value_desc("filename"),
cl::desc("A file containing list of symbol names to preserve"));
// APIList - A list of symbols that should not be marked internal.
static cl::list<std::string>
APIList("internalize-public-api-list", cl::value_desc("list"),
cl::desc("A list of symbol names to preserve"),
cl::CommaSeparated);
namespace {
class InternalizePass : public ModulePass {
std::set<std::string> ExternalNames;
bool OnlyHidden;
public:
static char ID; // Pass identification, replacement for typeid
explicit InternalizePass(bool OnlyHidden = false);
explicit InternalizePass(ArrayRef<const char *> ExportList, bool OnlyHidden);
void LoadFile(const char *Filename);
virtual bool runOnModule(Module &M);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addPreserved<CallGraphWrapperPass>();
}
};
} // end anonymous namespace
char InternalizePass::ID = 0;
INITIALIZE_PASS(InternalizePass, "internalize",
"Internalize Global Symbols", false, false)
InternalizePass::InternalizePass(bool OnlyHidden)
: ModulePass(ID), OnlyHidden(OnlyHidden) {
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
if (!APIFile.empty()) // If a filename is specified, use it.
LoadFile(APIFile.c_str());
ExternalNames.insert(APIList.begin(), APIList.end());
}
InternalizePass::InternalizePass(ArrayRef<const char *> ExportList,
bool OnlyHidden)
: ModulePass(ID), OnlyHidden(OnlyHidden) {
initializeInternalizePassPass(*PassRegistry::getPassRegistry());
for(ArrayRef<const char *>::const_iterator itr = ExportList.begin();
itr != ExportList.end(); itr++) {
ExternalNames.insert(*itr);
}
}
void InternalizePass::LoadFile(const char *Filename) {
// Load the APIFile...
std::ifstream In(Filename);
if (!In.good()) {
errs() << "WARNING: Internalize couldn't load file '" << Filename
<< "'! Continuing as if it's empty.\n";
return; // Just continue as if the file were empty
}
while (In) {
std::string Symbol;
In >> Symbol;
if (!Symbol.empty())
ExternalNames.insert(Symbol);
}
}
static bool shouldInternalize(const GlobalValue &GV,
const std::set<std::string> &ExternalNames,
bool OnlyHidden) {
if (OnlyHidden && !GV.hasHiddenVisibility())
return false;
// Function must be defined here
if (GV.isDeclaration())
return false;
// Available externally is really just a "declaration with a body".
if (GV.hasAvailableExternallyLinkage())
return false;
// Assume that dllexported symbols are referenced elsewhere
if (GV.hasDLLExportStorageClass())
return false;
// Already has internal linkage
if (GV.hasLocalLinkage())
return false;
// Marked to keep external?
if (ExternalNames.count(GV.getName()))
return false;
return true;
}
bool InternalizePass::runOnModule(Module &M) {
CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>();
CallGraph *CG = CGPass ? &CGPass->getCallGraph() : 0;
CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
bool Changed = false;
SmallPtrSet<GlobalValue *, 8> Used;
collectUsedGlobalVariables(M, Used, false);
// We must assume that globals in llvm.used have a reference that not even
// the linker can see, so we don't internalize them.
// For llvm.compiler.used the situation is a bit fuzzy. The assembler and
// linker can drop those symbols. If this pass is running as part of LTO,
// one might think that it could just drop llvm.compiler.used. The problem
// is that even in LTO llvm doesn't see every reference. For example,
// we don't see references from function local inline assembly. To be
// conservative, we internalize symbols in llvm.compiler.used, but we
// keep llvm.compiler.used so that the symbol is not deleted by llvm.
for (SmallPtrSet<GlobalValue *, 8>::iterator I = Used.begin(), E = Used.end();
I != E; ++I) {
GlobalValue *V = *I;
ExternalNames.insert(V->getName());
}
// Mark all functions not in the api as internal.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (!shouldInternalize(*I, ExternalNames, OnlyHidden))
continue;
I->setLinkage(GlobalValue::InternalLinkage);
if (ExternalNode)
// Remove a callgraph edge from the external node to this function.
ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
Changed = true;
++NumFunctions;
DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
}
// Never internalize the llvm.used symbol. It is used to implement
// attribute((used)).
// FIXME: Shouldn't this just filter on llvm.metadata section??
ExternalNames.insert("llvm.used");
ExternalNames.insert("llvm.compiler.used");
// Never internalize anchors used by the machine module info, else the info
// won't find them. (see MachineModuleInfo.)
ExternalNames.insert("llvm.global_ctors");
ExternalNames.insert("llvm.global_dtors");
ExternalNames.insert("llvm.global.annotations");
// Never internalize symbols code-gen inserts.
// FIXME: We should probably add this (and the __stack_chk_guard) via some
// type of call-back in CodeGen.
ExternalNames.insert("__stack_chk_fail");
ExternalNames.insert("__stack_chk_guard");
// Mark all global variables with initializers that are not in the api as
// internal as well.
for (Module::global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I) {
if (!shouldInternalize(*I, ExternalNames, OnlyHidden))
continue;
I->setLinkage(GlobalValue::InternalLinkage);
Changed = true;
++NumGlobals;
DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
}
// Mark all aliases that are not in the api as internal as well.
for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
I != E; ++I) {
if (!shouldInternalize(*I, ExternalNames, OnlyHidden))
continue;
I->setLinkage(GlobalValue::InternalLinkage);
Changed = true;
++NumAliases;
DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
}
return Changed;
}
ModulePass *llvm::createInternalizePass(bool OnlyHidden) {
return new InternalizePass(OnlyHidden);
}
ModulePass *llvm::createInternalizePass(ArrayRef<const char *> ExportList,
bool OnlyHidden) {
return new InternalizePass(ExportList, OnlyHidden);
}
ModulePass *llvm::createInternalizePass(const char *SingleExport) {
return createInternalizePass(ArrayRef<const char *>(SingleExport));
}