mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-27 02:31:09 +00:00
With this patch, the LowerGC transformation becomes the
ShadowStackCollector, which additionally has reduced overhead with no sacrifice in portability. Considering a function @fun with 8 loop-local roots, ShadowStackCollector introduces the following overhead (x86): ; shadowstack prologue movl L_llvm_gc_root_chain$non_lazy_ptr, %eax movl (%eax), %ecx movl $___gc_fun, 20(%esp) movl $0, 24(%esp) movl $0, 28(%esp) movl $0, 32(%esp) movl $0, 36(%esp) movl $0, 40(%esp) movl $0, 44(%esp) movl $0, 48(%esp) movl $0, 52(%esp) movl %ecx, 16(%esp) leal 16(%esp), %ecx movl %ecx, (%eax) ; shadowstack loop overhead (none) ; shadowstack epilogue movl 48(%esp), %edx movl %edx, (%ecx) ; shadowstack metadata .align 3 ___gc_fun: # __gc_fun .long 8 .space 4 In comparison to LowerGC: ; lowergc prologue movl L_llvm_gc_root_chain$non_lazy_ptr, %eax movl (%eax), %ecx movl %ecx, 48(%esp) movl $8, 52(%esp) movl $0, 60(%esp) movl $0, 56(%esp) movl $0, 68(%esp) movl $0, 64(%esp) movl $0, 76(%esp) movl $0, 72(%esp) movl $0, 84(%esp) movl $0, 80(%esp) movl $0, 92(%esp) movl $0, 88(%esp) movl $0, 100(%esp) movl $0, 96(%esp) movl $0, 108(%esp) movl $0, 104(%esp) movl $0, 116(%esp) movl $0, 112(%esp) ; lowergc loop overhead leal 44(%esp), %eax movl %eax, 56(%esp) leal 40(%esp), %eax movl %eax, 64(%esp) leal 36(%esp), %eax movl %eax, 72(%esp) leal 32(%esp), %eax movl %eax, 80(%esp) leal 28(%esp), %eax movl %eax, 88(%esp) leal 24(%esp), %eax movl %eax, 96(%esp) leal 20(%esp), %eax movl %eax, 104(%esp) leal 16(%esp), %eax movl %eax, 112(%esp) ; lowergc epilogue movl 48(%esp), %edx movl %edx, (%ecx) ; lowergc metadata (none) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45670 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ce2247755e
commit
8fa8929177
@ -17,6 +17,7 @@
|
||||
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/CodeGen/ScheduleDAG.h"
|
||||
#include "llvm/CodeGen/Collectors.h"
|
||||
|
||||
namespace {
|
||||
struct ForceCodegenLinking {
|
||||
@ -35,6 +36,8 @@ namespace {
|
||||
|
||||
(void) llvm::createSimpleRegisterCoalescer();
|
||||
|
||||
(void) llvm::createShadowStackCollector();
|
||||
|
||||
(void) llvm::createBURRListDAGScheduler(NULL, NULL, NULL);
|
||||
(void) llvm::createTDRRListDAGScheduler(NULL, NULL, NULL);
|
||||
(void) llvm::createTDListDAGScheduler(NULL, NULL, NULL);
|
||||
|
@ -84,7 +84,6 @@ namespace {
|
||||
(void) llvm::createLoopRotatePass();
|
||||
(void) llvm::createLoopIndexSplitPass();
|
||||
(void) llvm::createLowerAllocationsPass();
|
||||
(void) llvm::createLowerGCPass();
|
||||
(void) llvm::createLowerInvokePass();
|
||||
(void) llvm::createLowerPackedPass();
|
||||
(void) llvm::createLowerSelectPass();
|
||||
|
@ -294,13 +294,6 @@ FunctionPass *createLowerPackedPass();
|
||||
FunctionPass *createLowerInvokePass(const TargetLowering *TLI = NULL);
|
||||
extern const PassInfo *LowerInvokePassID;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// LowerGCPass - This function returns an instance of the "lowergc" pass, which
|
||||
// lowers garbage collection intrinsics to normal LLVM code.
|
||||
//
|
||||
FunctionPass *createLowerGCPass();
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// BlockPlacement - This pass reorders basic blocks in order to increase the
|
||||
|
441
lib/CodeGen/ShadowStackCollector.cpp
Normal file
441
lib/CodeGen/ShadowStackCollector.cpp
Normal file
@ -0,0 +1,441 @@
|
||||
//===-- ShadowStackCollector.cpp - GC support for uncooperative targets ---===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements lowering for the llvm.gc* intrinsics for targets that do
|
||||
// not natively support them (which includes the C backend). Note that the code
|
||||
// generated is not quite as efficient as collectors which generate stack maps
|
||||
// to identify roots.
|
||||
//
|
||||
// This pass implements the code transformation described in this paper:
|
||||
// "Accurate Garbage Collection in an Uncooperative Environment"
|
||||
// Fergus Henderson, ISMM, 2002
|
||||
//
|
||||
// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with
|
||||
// this collector.
|
||||
//
|
||||
// In order to support this particular transformation, all stack roots are
|
||||
// coallocated in the stack. This allows a fully target-independent stack map
|
||||
// while introducing only minor runtime overhead.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "shadowstackgc"
|
||||
#include "llvm/CodeGen/Collectors.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/CodeGen/Collector.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/Instructions.h"
|
||||
#include "llvm/IntrinsicInst.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/Support/LLVMBuilder.h"
|
||||
#include "llvm/Analysis/Verifier.h"
|
||||
#include <cstdlib>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
|
||||
class VISIBILITY_HIDDEN ShadowStackCollector : public Collector {
|
||||
/// RootChain - This is the global linked-list that contains the chain of GC
|
||||
/// roots.
|
||||
GlobalVariable *Head;
|
||||
|
||||
/// StackEntryTy - Abstract type of a link in the shadow stack.
|
||||
///
|
||||
const StructType *StackEntryTy;
|
||||
|
||||
/// Roots - GC roots in the current function. Each is a pair of the
|
||||
/// intrinsic call and its corresponding alloca.
|
||||
std::vector<std::pair<CallInst*,AllocaInst*> > Roots;
|
||||
|
||||
public:
|
||||
ShadowStackCollector();
|
||||
|
||||
bool initializeCustomLowering(Module &M);
|
||||
bool performCustomLowering(Function &F);
|
||||
|
||||
private:
|
||||
bool IsNullValue(Value *V);
|
||||
Constant *GetFrameMap(Function &F);
|
||||
const Type* GetConcreteStackEntryType(Function &F);
|
||||
void CollectRoots(Function &F);
|
||||
static GetElementPtrInst *CreateGEP(LLVMBuilder &B, Value *BasePtr,
|
||||
int Idx1, const char *Name);
|
||||
static GetElementPtrInst *CreateGEP(LLVMBuilder &B, Value *BasePtr,
|
||||
int Idx1, int Idx2, const char *Name);
|
||||
};
|
||||
|
||||
CollectorRegistry::Add<ShadowStackCollector>
|
||||
Y("shadow-stack",
|
||||
"Very portable collector for uncooperative code generators");
|
||||
|
||||
/// EscapeEnumerator - This is a little algorithm to find all escape points
|
||||
/// from a function so that "finally"-style code can be inserted. In addition
|
||||
/// to finding the existing return and unwind instructions, it also (if
|
||||
/// necessary) transforms any call instructions into invokes and sends them to
|
||||
/// a landing pad.
|
||||
///
|
||||
/// It's wrapped up in a state machine using the same transform C# uses for
|
||||
/// 'yield return' enumerators, This transform allows it to be non-allocating.
|
||||
class VISIBILITY_HIDDEN EscapeEnumerator {
|
||||
Function &F;
|
||||
const char *CleanupBBName;
|
||||
|
||||
// State.
|
||||
int State;
|
||||
Function::iterator StateBB, StateE;
|
||||
LLVMBuilder Builder;
|
||||
|
||||
public:
|
||||
EscapeEnumerator(Function &F, const char *N = "cleanup")
|
||||
: F(F), CleanupBBName(N), State(0) {}
|
||||
|
||||
LLVMBuilder *Next() {
|
||||
switch (State) {
|
||||
default:
|
||||
return 0;
|
||||
|
||||
case 0:
|
||||
StateBB = F.begin();
|
||||
StateE = F.end();
|
||||
State = 1;
|
||||
|
||||
case 1:
|
||||
// Find all 'return' and 'unwind' instructions.
|
||||
while (StateBB != StateE) {
|
||||
BasicBlock *CurBB = StateBB++;
|
||||
|
||||
// Branches and invokes do not escape, only unwind and return do.
|
||||
TerminatorInst *TI = CurBB->getTerminator();
|
||||
if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI))
|
||||
continue;
|
||||
|
||||
Builder.SetInsertPoint(TI->getParent(), TI);
|
||||
return &Builder;
|
||||
}
|
||||
|
||||
State = 2;
|
||||
|
||||
// Find all 'call' instructions.
|
||||
SmallVector<Instruction*,16> Calls;
|
||||
for (Function::iterator BB = F.begin(),
|
||||
E = F.end(); BB != E; ++BB)
|
||||
for (BasicBlock::iterator II = BB->begin(),
|
||||
EE = BB->end(); II != EE; ++II)
|
||||
if (CallInst *CI = dyn_cast<CallInst>(II))
|
||||
if (!CI->getCalledFunction() ||
|
||||
!CI->getCalledFunction()->getIntrinsicID())
|
||||
Calls.push_back(CI);
|
||||
|
||||
if (Calls.empty())
|
||||
return 0;
|
||||
|
||||
// Create a cleanup block.
|
||||
BasicBlock *CleanupBB = new BasicBlock(CleanupBBName, &F);
|
||||
UnwindInst *UI = new UnwindInst(CleanupBB);
|
||||
|
||||
// Transform the 'call' instructions into 'invoke's branching to the
|
||||
// cleanup block. Go in reverse order to make prettier BB names.
|
||||
SmallVector<Value*,16> Args;
|
||||
for (unsigned I = Calls.size(); I != 0; ) {
|
||||
CallInst *CI = cast<CallInst>(Calls[--I]);
|
||||
|
||||
// Split the basic block containing the function call.
|
||||
BasicBlock *CallBB = CI->getParent();
|
||||
BasicBlock *NewBB =
|
||||
CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
|
||||
|
||||
// Remove the unconditional branch inserted at the end of CallBB.
|
||||
CallBB->getInstList().pop_back();
|
||||
NewBB->getInstList().remove(CI);
|
||||
|
||||
// Create a new invoke instruction.
|
||||
Args.clear();
|
||||
Args.append(CI->op_begin() + 1, CI->op_end());
|
||||
|
||||
InvokeInst *II = new InvokeInst(CI->getOperand(0),
|
||||
NewBB, CleanupBB,
|
||||
Args.begin(), Args.end(),
|
||||
CI->getName(), CallBB);
|
||||
II->setCallingConv(CI->getCallingConv());
|
||||
II->setParamAttrs(CI->getParamAttrs());
|
||||
CI->replaceAllUsesWith(II);
|
||||
delete CI;
|
||||
}
|
||||
|
||||
Builder.SetInsertPoint(UI->getParent(), UI);
|
||||
return &Builder;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
Collector *llvm::createShadowStackCollector() {
|
||||
return new ShadowStackCollector();
|
||||
}
|
||||
|
||||
ShadowStackCollector::ShadowStackCollector() : Head(0), StackEntryTy(0) {
|
||||
InitRoots = true;
|
||||
CustomRoots = true;
|
||||
}
|
||||
|
||||
Constant *ShadowStackCollector::GetFrameMap(Function &F) {
|
||||
// doInitialization creates the abstract type of this value.
|
||||
|
||||
Type *VoidPtr = PointerType::getUnqual(Type::Int8Ty);
|
||||
|
||||
// Truncate the ShadowStackDescriptor if some metadata is null.
|
||||
unsigned NumMeta = 0;
|
||||
SmallVector<Constant*,16> Metadata;
|
||||
for (unsigned I = 0; I != Roots.size(); ++I) {
|
||||
Constant *C = cast<Constant>(Roots[I].first->getOperand(2));
|
||||
if (!C->isNullValue())
|
||||
NumMeta = I + 1;
|
||||
Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
|
||||
}
|
||||
|
||||
Constant *BaseElts[] = {
|
||||
ConstantInt::get(Type::Int32Ty, Roots.size(), false),
|
||||
ConstantInt::get(Type::Int32Ty, NumMeta, false),
|
||||
};
|
||||
|
||||
Constant *DescriptorElts[] = {
|
||||
ConstantStruct::get(BaseElts, 2),
|
||||
ConstantArray::get(ArrayType::get(VoidPtr, NumMeta),
|
||||
Metadata.begin(), NumMeta)
|
||||
};
|
||||
|
||||
Constant *FrameMap = ConstantStruct::get(DescriptorElts, 2);
|
||||
|
||||
std::string TypeName("gc_map.");
|
||||
TypeName += utostr(NumMeta);
|
||||
F.getParent()->addTypeName(TypeName, FrameMap->getType());
|
||||
|
||||
// FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
|
||||
// that, short of multithreaded LLVM, it should be safe; all that is
|
||||
// necessary is that a simple Module::iterator loop not be invalidated.
|
||||
// Appending to the GlobalVariable list is safe in that sense.
|
||||
//
|
||||
// All of the output passes emit globals last. The ExecutionEngine
|
||||
// explicitly supports adding globals to the module after
|
||||
// initialization.
|
||||
//
|
||||
// Still, if it isn't deemed acceptable, then this transformation needs
|
||||
// to be a ModulePass (which means it cannot be in the 'llc' pipeline
|
||||
// (which uses a FunctionPassManager (which segfaults (not asserts) if
|
||||
// provided a ModulePass))).
|
||||
Constant *GV = new GlobalVariable(FrameMap->getType(), true,
|
||||
GlobalVariable::InternalLinkage,
|
||||
FrameMap, "__gc_" + F.getName(),
|
||||
F.getParent());
|
||||
|
||||
Constant *GEPIndices[2] = { ConstantInt::get(Type::Int32Ty, 0),
|
||||
ConstantInt::get(Type::Int32Ty, 0) };
|
||||
return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2);
|
||||
}
|
||||
|
||||
const Type* ShadowStackCollector::GetConcreteStackEntryType(Function &F) {
|
||||
// doInitialization creates the generic version of this type.
|
||||
std::vector<const Type*> EltTys;
|
||||
EltTys.push_back(StackEntryTy);
|
||||
for (size_t I = 0; I != Roots.size(); I++)
|
||||
EltTys.push_back(Roots[I].second->getAllocatedType());
|
||||
Type *Ty = StructType::get(EltTys);
|
||||
|
||||
std::string TypeName("gc_stackentry.");
|
||||
TypeName += F.getName();
|
||||
F.getParent()->addTypeName(TypeName, Ty);
|
||||
|
||||
return Ty;
|
||||
}
|
||||
|
||||
/// doInitialization - If this module uses the GC intrinsics, find them now. If
|
||||
/// not, exit fast.
|
||||
bool ShadowStackCollector::initializeCustomLowering(Module &M) {
|
||||
// struct FrameMap {
|
||||
// int32_t NumRoots; // Number of roots in stack frame.
|
||||
// int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
|
||||
// void *Meta[]; // May be absent for roots without metadata.
|
||||
// };
|
||||
std::vector<const Type*> EltTys;
|
||||
EltTys.push_back(Type::Int32Ty); // 32 bits is ok up to a 32GB stack frame. :)
|
||||
EltTys.push_back(Type::Int32Ty); // Specifies length of variable length array.
|
||||
StructType *FrameMapTy = StructType::get(EltTys);
|
||||
M.addTypeName("gc_map", FrameMapTy);
|
||||
PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
|
||||
|
||||
// struct StackEntry {
|
||||
// ShadowStackEntry *Next; // Caller's stack entry.
|
||||
// FrameMap *Map; // Pointer to constant FrameMap.
|
||||
// void *Roots[]; // Stack roots (in-place array, so we pretend).
|
||||
// };
|
||||
OpaqueType *RecursiveTy = OpaqueType::get();
|
||||
|
||||
EltTys.clear();
|
||||
EltTys.push_back(PointerType::getUnqual(RecursiveTy));
|
||||
EltTys.push_back(FrameMapPtrTy);
|
||||
PATypeHolder LinkTyH = StructType::get(EltTys);
|
||||
|
||||
RecursiveTy->refineAbstractTypeTo(LinkTyH.get());
|
||||
StackEntryTy = cast<StructType>(LinkTyH.get());
|
||||
const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
|
||||
M.addTypeName("gc_stackentry", LinkTyH.get()); // FIXME: Is this safe from
|
||||
// a FunctionPass?
|
||||
|
||||
// Get the root chain if it already exists.
|
||||
Head = M.getGlobalVariable("llvm_gc_root_chain");
|
||||
if (!Head) {
|
||||
// If the root chain does not exist, insert a new one with linkonce
|
||||
// linkage!
|
||||
Head = new GlobalVariable(StackEntryPtrTy, false,
|
||||
GlobalValue::LinkOnceLinkage,
|
||||
Constant::getNullValue(StackEntryPtrTy),
|
||||
"llvm_gc_root_chain", &M);
|
||||
} else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
|
||||
Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
|
||||
Head->setLinkage(GlobalValue::LinkOnceLinkage);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ShadowStackCollector::IsNullValue(Value *V) {
|
||||
if (Constant *C = dyn_cast<Constant>(V))
|
||||
return C->isNullValue();
|
||||
return false;
|
||||
}
|
||||
|
||||
void ShadowStackCollector::CollectRoots(Function &F) {
|
||||
// FIXME: Account for original alignment. Could fragment the root array.
|
||||
// Approach 1: Null initialize empty slots at runtime. Yuck.
|
||||
// Approach 2: Emit a map of the array instead of just a count.
|
||||
|
||||
assert(Roots.empty() && "Not cleaned up?");
|
||||
|
||||
SmallVector<std::pair<CallInst*,AllocaInst*>,16> MetaRoots;
|
||||
|
||||
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
|
||||
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
|
||||
if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
|
||||
if (Function *F = CI->getCalledFunction())
|
||||
if (F->getIntrinsicID() == Intrinsic::gcroot) {
|
||||
std::pair<CallInst*,AllocaInst*> Pair = std::make_pair(
|
||||
CI, cast<AllocaInst>(
|
||||
IntrinsicInst::StripPointerCasts(CI->getOperand(1))));
|
||||
if (IsNullValue(CI->getOperand(2)))
|
||||
Roots.push_back(Pair);
|
||||
else
|
||||
MetaRoots.push_back(Pair);
|
||||
}
|
||||
|
||||
// Number roots with metadata (usually empty) at the beginning, so that the
|
||||
// FrameMap::Meta array can be elided.
|
||||
Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
|
||||
}
|
||||
|
||||
GetElementPtrInst *
|
||||
ShadowStackCollector::CreateGEP(LLVMBuilder &B, Value *BasePtr,
|
||||
int Idx, int Idx2, const char *Name) {
|
||||
Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0),
|
||||
ConstantInt::get(Type::Int32Ty, Idx),
|
||||
ConstantInt::get(Type::Int32Ty, Idx2) };
|
||||
return B.CreateGEP(BasePtr, Indices, Indices + 3, Name);
|
||||
}
|
||||
|
||||
GetElementPtrInst *
|
||||
ShadowStackCollector::CreateGEP(LLVMBuilder &B, Value *BasePtr,
|
||||
int Idx, const char *Name) {
|
||||
Value *Indices[] = { ConstantInt::get(Type::Int32Ty, 0),
|
||||
ConstantInt::get(Type::Int32Ty, Idx) };
|
||||
return B.CreateGEP(BasePtr, Indices, Indices + 2, Name);
|
||||
}
|
||||
|
||||
/// runOnFunction - Insert code to maintain the shadow stack.
|
||||
bool ShadowStackCollector::performCustomLowering(Function &F) {
|
||||
// Find calls to llvm.gcroot.
|
||||
CollectRoots(F);
|
||||
|
||||
// If there are no roots in this function, then there is no need to add a
|
||||
// stack map entry for it.
|
||||
if (Roots.empty())
|
||||
return false;
|
||||
|
||||
// Build the constant map and figure the type of the shadow stack entry.
|
||||
Value *FrameMap = GetFrameMap(F);
|
||||
const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
|
||||
|
||||
// Build the shadow stack entry at the very start of the function.
|
||||
BasicBlock::iterator IP = F.getEntryBlock().begin();
|
||||
LLVMBuilder AtEntry(IP->getParent(), IP);
|
||||
|
||||
Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0,
|
||||
"gc_frame");
|
||||
|
||||
while (isa<AllocaInst>(IP)) ++IP;
|
||||
AtEntry.SetInsertPoint(IP->getParent(), IP);
|
||||
|
||||
// Initialize the map pointer and load the current head of the shadow stack.
|
||||
Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
|
||||
Instruction *EntryMapPtr = CreateGEP(AtEntry, StackEntry,0,1,"gc_frame.map");
|
||||
AtEntry.CreateStore(FrameMap, EntryMapPtr);
|
||||
|
||||
// After all the allocas...
|
||||
for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
|
||||
// For each root, find the corresponding slot in the aggregate...
|
||||
Value *SlotPtr = CreateGEP(AtEntry, StackEntry, 1 + I, "gc_root");
|
||||
|
||||
// And use it in lieu of the alloca.
|
||||
AllocaInst *OriginalAlloca = Roots[I].second;
|
||||
SlotPtr->takeName(OriginalAlloca);
|
||||
OriginalAlloca->replaceAllUsesWith(SlotPtr);
|
||||
}
|
||||
|
||||
// Move past the original stores inserted by Collector::InitRoots. This isn't
|
||||
// really necessary (the collector would never see the intermediate state),
|
||||
// but it's nicer not to push the half-initialized entry onto the stack.
|
||||
while (isa<StoreInst>(IP)) ++IP;
|
||||
AtEntry.SetInsertPoint(IP->getParent(), IP);
|
||||
|
||||
// Push the entry onto the shadow stack.
|
||||
Instruction *EntryNextPtr = CreateGEP(AtEntry,StackEntry,0,0,"gc_frame.next");
|
||||
Instruction *NewHeadVal = CreateGEP(AtEntry,StackEntry, 0, "gc_newhead");
|
||||
AtEntry.CreateStore(CurrentHead, EntryNextPtr);
|
||||
AtEntry.CreateStore(NewHeadVal, Head);
|
||||
|
||||
// For each instruction that escapes...
|
||||
EscapeEnumerator EE(F, "gc_cleanup");
|
||||
while (LLVMBuilder *AtExit = EE.Next()) {
|
||||
// Pop the entry from the shadow stack. Don't reuse CurrentHead from
|
||||
// AtEntry, since that would make the value live for the entire function.
|
||||
Instruction *EntryNextPtr2 = CreateGEP(*AtExit, StackEntry, 0, 0,
|
||||
"gc_frame.next");
|
||||
Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
|
||||
AtExit->CreateStore(SavedHead, Head);
|
||||
}
|
||||
|
||||
// Delete the original allocas (which are no longer used) and the intrinsic
|
||||
// calls (which are no longer valid). Doing this last avoids invalidating
|
||||
// iterators.
|
||||
for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
|
||||
Roots[I].first->eraseFromParent();
|
||||
Roots[I].second->eraseFromParent();
|
||||
}
|
||||
|
||||
F.dump();
|
||||
|
||||
Roots.clear();
|
||||
return true;
|
||||
}
|
@ -1,350 +0,0 @@
|
||||
//===-- LowerGC.cpp - Provide GC support for targets that don't -----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements lowering for the llvm.gc* intrinsics for targets that do
|
||||
// not natively support them (which includes the C backend). Note that the code
|
||||
// generated is not as efficient as it would be for targets that natively
|
||||
// support the GC intrinsics, but it is useful for getting new targets
|
||||
// up-and-running quickly.
|
||||
//
|
||||
// This pass implements the code transformation described in this paper:
|
||||
// "Accurate Garbage Collection in an Uncooperative Environment"
|
||||
// Fergus Henderson, ISMM, 2002
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "lowergc"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/Instructions.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
class VISIBILITY_HIDDEN LowerGC : public FunctionPass {
|
||||
/// GCRootInt, GCReadInt, GCWriteInt - The function prototypes for the
|
||||
/// llvm.gcread/llvm.gcwrite/llvm.gcroot intrinsics.
|
||||
Function *GCRootInt, *GCReadInt, *GCWriteInt;
|
||||
|
||||
/// GCRead/GCWrite - These are the functions provided by the garbage
|
||||
/// collector for read/write barriers.
|
||||
Constant *GCRead, *GCWrite;
|
||||
|
||||
/// RootChain - This is the global linked-list that contains the chain of GC
|
||||
/// roots.
|
||||
GlobalVariable *RootChain;
|
||||
|
||||
/// MainRootRecordType - This is the type for a function root entry if it
|
||||
/// had zero roots.
|
||||
const Type *MainRootRecordType;
|
||||
public:
|
||||
static char ID; // Pass identification, replacement for typeid
|
||||
LowerGC() : FunctionPass((intptr_t)&ID),
|
||||
GCRootInt(0), GCReadInt(0), GCWriteInt(0),
|
||||
GCRead(0), GCWrite(0), RootChain(0), MainRootRecordType(0) {}
|
||||
virtual bool doInitialization(Module &M);
|
||||
virtual bool runOnFunction(Function &F);
|
||||
|
||||
private:
|
||||
const StructType *getRootRecordType(unsigned NumRoots);
|
||||
};
|
||||
|
||||
char LowerGC::ID = 0;
|
||||
RegisterPass<LowerGC>
|
||||
X("lowergc", "Lower GC intrinsics, for GCless code generators");
|
||||
}
|
||||
|
||||
/// createLowerGCPass - This function returns an instance of the "lowergc"
|
||||
/// pass, which lowers garbage collection intrinsics to normal LLVM code.
|
||||
FunctionPass *llvm::createLowerGCPass() {
|
||||
return new LowerGC();
|
||||
}
|
||||
|
||||
/// getRootRecordType - This function creates and returns the type for a root
|
||||
/// record containing 'NumRoots' roots.
|
||||
const StructType *LowerGC::getRootRecordType(unsigned NumRoots) {
|
||||
// Build a struct that is a type used for meta-data/root pairs.
|
||||
std::vector<const Type *> ST;
|
||||
ST.push_back(GCRootInt->getFunctionType()->getParamType(0));
|
||||
ST.push_back(GCRootInt->getFunctionType()->getParamType(1));
|
||||
StructType *PairTy = StructType::get(ST);
|
||||
|
||||
// Build the array of pairs.
|
||||
ArrayType *PairArrTy = ArrayType::get(PairTy, NumRoots);
|
||||
|
||||
// Now build the recursive list type.
|
||||
PATypeHolder RootListH =
|
||||
MainRootRecordType ? (Type*)MainRootRecordType : (Type*)OpaqueType::get();
|
||||
ST.clear();
|
||||
ST.push_back(PointerType::getUnqual(RootListH)); // Prev pointer
|
||||
ST.push_back(Type::Int32Ty); // NumElements in array
|
||||
ST.push_back(PairArrTy); // The pairs
|
||||
StructType *RootList = StructType::get(ST);
|
||||
if (MainRootRecordType)
|
||||
return RootList;
|
||||
|
||||
assert(NumRoots == 0 && "The main struct type should have zero entries!");
|
||||
cast<OpaqueType>((Type*)RootListH.get())->refineAbstractTypeTo(RootList);
|
||||
MainRootRecordType = RootListH;
|
||||
return cast<StructType>(RootListH.get());
|
||||
}
|
||||
|
||||
/// doInitialization - If this module uses the GC intrinsics, find them now. If
|
||||
/// not, this pass does not do anything.
|
||||
bool LowerGC::doInitialization(Module &M) {
|
||||
GCRootInt = M.getFunction("llvm.gcroot");
|
||||
GCReadInt = M.getFunction("llvm.gcread");
|
||||
GCWriteInt = M.getFunction("llvm.gcwrite");
|
||||
if (!GCRootInt && !GCReadInt && !GCWriteInt) return false;
|
||||
|
||||
PointerType *VoidPtr = PointerType::getUnqual(Type::Int8Ty);
|
||||
PointerType *VoidPtrPtr = PointerType::getUnqual(VoidPtr);
|
||||
|
||||
// If the program is using read/write barriers, find the implementations of
|
||||
// them from the GC runtime library.
|
||||
if (GCReadInt) // Make: sbyte* %llvm_gc_read(sbyte**)
|
||||
GCRead = M.getOrInsertFunction("llvm_gc_read", VoidPtr, VoidPtr, VoidPtrPtr,
|
||||
(Type *)0);
|
||||
if (GCWriteInt) // Make: void %llvm_gc_write(sbyte*, sbyte**)
|
||||
GCWrite = M.getOrInsertFunction("llvm_gc_write", Type::VoidTy,
|
||||
VoidPtr, VoidPtr, VoidPtrPtr, (Type *)0);
|
||||
|
||||
// If the program has GC roots, get or create the global root list.
|
||||
if (GCRootInt) {
|
||||
const StructType *RootListTy = getRootRecordType(0);
|
||||
const Type *PRLTy = PointerType::getUnqual(RootListTy);
|
||||
M.addTypeName("llvm_gc_root_ty", RootListTy);
|
||||
|
||||
// Get the root chain if it already exists.
|
||||
RootChain = M.getGlobalVariable("llvm_gc_root_chain", PRLTy);
|
||||
if (RootChain == 0) {
|
||||
// If the root chain does not exist, insert a new one with linkonce
|
||||
// linkage!
|
||||
RootChain = new GlobalVariable(PRLTy, false,
|
||||
GlobalValue::LinkOnceLinkage,
|
||||
Constant::getNullValue(PRLTy),
|
||||
"llvm_gc_root_chain", &M);
|
||||
} else if (RootChain->hasExternalLinkage() && RootChain->isDeclaration()) {
|
||||
RootChain->setInitializer(Constant::getNullValue(PRLTy));
|
||||
RootChain->setLinkage(GlobalValue::LinkOnceLinkage);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Coerce - If the specified operand number of the specified instruction does
|
||||
/// not have the specified type, insert a cast. Note that this only uses BitCast
|
||||
/// because the types involved are all pointers.
|
||||
static void Coerce(Instruction *I, unsigned OpNum, Type *Ty) {
|
||||
if (I->getOperand(OpNum)->getType() != Ty) {
|
||||
if (Constant *C = dyn_cast<Constant>(I->getOperand(OpNum)))
|
||||
I->setOperand(OpNum, ConstantExpr::getBitCast(C, Ty));
|
||||
else {
|
||||
CastInst *CI = new BitCastInst(I->getOperand(OpNum), Ty, "", I);
|
||||
I->setOperand(OpNum, CI);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// runOnFunction - If the program is using GC intrinsics, replace any
|
||||
/// read/write intrinsics with the appropriate read/write barrier calls, then
|
||||
/// inline them. Finally, build the data structures for
|
||||
bool LowerGC::runOnFunction(Function &F) {
|
||||
// Quick exit for programs that are not using GC mechanisms.
|
||||
if (!GCRootInt && !GCReadInt && !GCWriteInt) return false;
|
||||
|
||||
PointerType *VoidPtr = PointerType::getUnqual(Type::Int8Ty);
|
||||
PointerType *VoidPtrPtr = PointerType::getUnqual(VoidPtr);
|
||||
|
||||
// If there are read/write barriers in the program, perform a quick pass over
|
||||
// the function eliminating them. While we are at it, remember where we see
|
||||
// calls to llvm.gcroot.
|
||||
std::vector<CallInst*> GCRoots;
|
||||
std::vector<CallInst*> NormalCalls;
|
||||
|
||||
bool MadeChange = false;
|
||||
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
|
||||
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
|
||||
if (CallInst *CI = dyn_cast<CallInst>(II++)) {
|
||||
if (!CI->getCalledFunction() ||
|
||||
!CI->getCalledFunction()->isIntrinsic())
|
||||
NormalCalls.push_back(CI); // Remember all normal function calls.
|
||||
|
||||
if (Function *F = CI->getCalledFunction())
|
||||
if (F == GCRootInt)
|
||||
GCRoots.push_back(CI);
|
||||
else if (F == GCReadInt || F == GCWriteInt) {
|
||||
if (F == GCWriteInt) {
|
||||
// Change a llvm.gcwrite call to call llvm_gc_write instead.
|
||||
CI->setOperand(0, GCWrite);
|
||||
// Insert casts of the operands as needed.
|
||||
Coerce(CI, 1, VoidPtr);
|
||||
Coerce(CI, 2, VoidPtr);
|
||||
Coerce(CI, 3, VoidPtrPtr);
|
||||
} else {
|
||||
Coerce(CI, 1, VoidPtr);
|
||||
Coerce(CI, 2, VoidPtrPtr);
|
||||
if (CI->getType() == VoidPtr) {
|
||||
CI->setOperand(0, GCRead);
|
||||
} else {
|
||||
// Create a whole new call to replace the old one.
|
||||
|
||||
// It sure would be nice to pass op_begin()+1,
|
||||
// op_begin()+2 but it runs into trouble with
|
||||
// CallInst::init's &*iterator, which requires a
|
||||
// conversion from Use* to Value*. The conversion
|
||||
// from Use to Value * is not useful because the
|
||||
// memory for Value * won't be contiguous.
|
||||
Value* Args[] = {
|
||||
CI->getOperand(1),
|
||||
CI->getOperand(2)
|
||||
};
|
||||
CallInst *NC = new CallInst(GCRead, Args, Args + 2,
|
||||
CI->getName(), CI);
|
||||
// These functions only deal with ptr type results so BitCast
|
||||
// is the correct kind of cast (no-op cast).
|
||||
Value *NV = new BitCastInst(NC, CI->getType(), "", CI);
|
||||
CI->replaceAllUsesWith(NV);
|
||||
BB->getInstList().erase(CI);
|
||||
CI = NC;
|
||||
}
|
||||
}
|
||||
|
||||
MadeChange = true;
|
||||
}
|
||||
}
|
||||
|
||||
// If there are no GC roots in this function, then there is no need to create
|
||||
// a GC list record for it.
|
||||
if (GCRoots.empty()) return MadeChange;
|
||||
|
||||
// Okay, there are GC roots in this function. On entry to the function, add a
|
||||
// record to the llvm_gc_root_chain, and remove it on exit.
|
||||
|
||||
// Create the alloca, and zero it out.
|
||||
const StructType *RootListTy = getRootRecordType(GCRoots.size());
|
||||
AllocaInst *AI = new AllocaInst(RootListTy, 0, "gcroots", F.begin()->begin());
|
||||
|
||||
// Insert the memset call after all of the allocas in the function.
|
||||
BasicBlock::iterator IP = AI;
|
||||
while (isa<AllocaInst>(IP)) ++IP;
|
||||
|
||||
Constant *Zero = ConstantInt::get(Type::Int32Ty, 0);
|
||||
Constant *One = ConstantInt::get(Type::Int32Ty, 1);
|
||||
|
||||
Value *Idx[2] = { Zero, Zero };
|
||||
|
||||
// Get a pointer to the prev pointer.
|
||||
Value *PrevPtrPtr = new GetElementPtrInst(AI, Idx, Idx + 2,
|
||||
"prevptrptr", IP);
|
||||
|
||||
// Load the previous pointer.
|
||||
Value *PrevPtr = new LoadInst(RootChain, "prevptr", IP);
|
||||
// Store the previous pointer into the prevptrptr
|
||||
new StoreInst(PrevPtr, PrevPtrPtr, IP);
|
||||
|
||||
// Set the number of elements in this record.
|
||||
Idx[1] = One;
|
||||
Value *NumEltsPtr = new GetElementPtrInst(AI, Idx, Idx + 2,
|
||||
"numeltsptr", IP);
|
||||
new StoreInst(ConstantInt::get(Type::Int32Ty, GCRoots.size()), NumEltsPtr,IP);
|
||||
|
||||
Value* Par[4];
|
||||
Par[0] = Zero;
|
||||
Par[1] = ConstantInt::get(Type::Int32Ty, 2);
|
||||
|
||||
const PointerType *PtrLocTy =
|
||||
cast<PointerType>(GCRootInt->getFunctionType()->getParamType(0));
|
||||
Constant *Null = ConstantPointerNull::get(PtrLocTy);
|
||||
|
||||
// Initialize all of the gcroot records now.
|
||||
for (unsigned i = 0, e = GCRoots.size(); i != e; ++i) {
|
||||
// Initialize the meta-data pointer.
|
||||
Par[2] = ConstantInt::get(Type::Int32Ty, i);
|
||||
Par[3] = One;
|
||||
Value *MetaDataPtr = new GetElementPtrInst(AI, Par, Par + 4,
|
||||
"MetaDataPtr", IP);
|
||||
assert(isa<Constant>(GCRoots[i]->getOperand(2)) && "Must be a constant");
|
||||
new StoreInst(GCRoots[i]->getOperand(2), MetaDataPtr, IP);
|
||||
|
||||
// Initialize the root pointer to null on entry to the function.
|
||||
Par[3] = Zero;
|
||||
Value *RootPtrPtr = new GetElementPtrInst(AI, Par, Par + 4,
|
||||
"RootEntPtr", IP);
|
||||
new StoreInst(Null, RootPtrPtr, IP);
|
||||
|
||||
// Each occurrance of the llvm.gcroot intrinsic now turns into an
|
||||
// initialization of the slot with the address.
|
||||
new StoreInst(GCRoots[i]->getOperand(1), RootPtrPtr, GCRoots[i]);
|
||||
}
|
||||
|
||||
// Now that the record is all initialized, store the pointer into the global
|
||||
// pointer.
|
||||
Value *C = new BitCastInst(AI, PointerType::getUnqual(MainRootRecordType), "", IP);
|
||||
new StoreInst(C, RootChain, IP);
|
||||
|
||||
// Eliminate all the gcroot records now.
|
||||
for (unsigned i = 0, e = GCRoots.size(); i != e; ++i)
|
||||
GCRoots[i]->getParent()->getInstList().erase(GCRoots[i]);
|
||||
|
||||
// On exit from the function we have to remove the entry from the GC root
|
||||
// chain. Doing this is straight-forward for return and unwind instructions:
|
||||
// just insert the appropriate copy.
|
||||
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
|
||||
if (isa<UnwindInst>(BB->getTerminator()) ||
|
||||
isa<ReturnInst>(BB->getTerminator())) {
|
||||
// We could reuse the PrevPtr loaded on entry to the function, but this
|
||||
// would make the value live for the whole function, which is probably a
|
||||
// bad idea. Just reload the value out of our stack entry.
|
||||
PrevPtr = new LoadInst(PrevPtrPtr, "prevptr", BB->getTerminator());
|
||||
new StoreInst(PrevPtr, RootChain, BB->getTerminator());
|
||||
}
|
||||
|
||||
// If an exception is thrown from a callee we have to make sure to
|
||||
// unconditionally take the record off the stack. For this reason, we turn
|
||||
// all call instructions into invoke whose cleanup pops the entry off the
|
||||
// stack. We only insert one cleanup block, which is shared by all invokes.
|
||||
if (!NormalCalls.empty()) {
|
||||
// Create the shared cleanup block.
|
||||
BasicBlock *Cleanup = new BasicBlock("gc_cleanup", &F);
|
||||
UnwindInst *UI = new UnwindInst(Cleanup);
|
||||
PrevPtr = new LoadInst(PrevPtrPtr, "prevptr", UI);
|
||||
new StoreInst(PrevPtr, RootChain, UI);
|
||||
|
||||
// Loop over all of the function calls, turning them into invokes.
|
||||
while (!NormalCalls.empty()) {
|
||||
CallInst *CI = NormalCalls.back();
|
||||
BasicBlock *CBB = CI->getParent();
|
||||
NormalCalls.pop_back();
|
||||
|
||||
// Split the basic block containing the function call.
|
||||
BasicBlock *NewBB = CBB->splitBasicBlock(CI, CBB->getName()+".cont");
|
||||
|
||||
// Remove the unconditional branch inserted at the end of the CBB.
|
||||
CBB->getInstList().pop_back();
|
||||
NewBB->getInstList().remove(CI);
|
||||
|
||||
// Create a new invoke instruction.
|
||||
std::vector<Value*> Args(CI->op_begin()+1, CI->op_end());
|
||||
|
||||
Value *II = new InvokeInst(CI->getCalledValue(), NewBB, Cleanup,
|
||||
Args.begin(), Args.end(), CI->getName(), CBB);
|
||||
cast<InvokeInst>(II)->setCallingConv(CI->getCallingConv());
|
||||
cast<InvokeInst>(II)->setParamAttrs(CI->getParamAttrs());
|
||||
CI->replaceAllUsesWith(II);
|
||||
delete CI;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
@ -97,24 +97,26 @@ void llvm_gc_write(void *V, void *ObjPtr, void **FieldPtr) { *FieldPtr = V; }
|
||||
* FIXME: This should be in a code-generator specific library, but for now this
|
||||
* will work for all code generators.
|
||||
*/
|
||||
typedef struct GCRoot {
|
||||
void **RootPtr;
|
||||
void *Meta;
|
||||
} GCRoot;
|
||||
struct FrameMap {
|
||||
int32_t NumRoots; // Number of roots in stack frame.
|
||||
int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots.
|
||||
void *Meta[]; // May be absent for roots without metadata.
|
||||
};
|
||||
|
||||
typedef struct GCRoots {
|
||||
struct GCRoots *Next;
|
||||
unsigned NumRoots;
|
||||
GCRoot RootRecords[];
|
||||
} GCRoots;
|
||||
GCRoots *llvm_gc_root_chain;
|
||||
struct StackEntry {
|
||||
ShadowStackEntry *Next; // Caller's stack entry.
|
||||
const FrameMap *Map; // Pointer to constant FrameMap.
|
||||
void *Roots[]; // Stack roots (in-place array).
|
||||
};
|
||||
StackEntry *llvm_gc_root_chain;
|
||||
|
||||
void llvm_cg_walk_gcroots(void (*FP)(void **Root, void *Meta)) {
|
||||
GCRoots *R = llvm_gc_root_chain;
|
||||
for (; R; R = R->Next) {
|
||||
for (StackEntry *R; R; R = R->Next) {
|
||||
unsigned i, e;
|
||||
for (i = 0, e = R->NumRoots; i != e; ++i)
|
||||
FP(R->RootRecords[i].RootPtr, R->RootRecords[i].Meta);
|
||||
for (i = 0, e = R->NumMeta; i != e; ++i)
|
||||
FP(&R->Roots[i], R->Map->Meta[i]);
|
||||
for (e = R->NumRoots; i != e; ++i)
|
||||
FP(&R->Roots[i], NULL);
|
||||
}
|
||||
}
|
||||
/* END FIXME! */
|
||||
|
17
test/CodeGen/Generic/GC/redundant_init.ll
Normal file
17
test/CodeGen/Generic/GC/redundant_init.ll
Normal file
@ -0,0 +1,17 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86 | \
|
||||
; RUN: ignore grep {movl..0} | count 0
|
||||
|
||||
%struct.obj = type { i8*, %struct.obj* }
|
||||
|
||||
declare void @g() gc "shadow-stack"
|
||||
|
||||
define void @f(i8* %o) gc "shadow-stack" {
|
||||
entry:
|
||||
%root = alloca i8*
|
||||
call void @llvm.gcroot(i8** %root, i8* null)
|
||||
store i8* %o, i8** %root
|
||||
call void @g()
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.gcroot(i8**, i8*)
|
Loading…
x
Reference in New Issue
Block a user