[RewriteStatepointsForGC] Limited support for vectors of pointers

This patch adds limited support for inserting explicit relocations when there's a vector of pointers live over the statepoint. This doesn't handle the case where the vector contains a mix of base and non-base pointers; that's future work.

The current implementation just scalarizes the vector over the gc.statepoint before doing the explicit rewrite. An alternate approach would be to plumb the vector all the way though the backend lowering, but doing that appears challenging. In particular, the size of the indirect spill slot is currently assumed to be sizeof(pointer) throughout the backend.

In practice, this is enough to allow running the SLP and Loop vectorizers before RewriteStatepointsForGC.

Differential Revision: http://reviews.llvm.org/D8671



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234647 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Philip Reames 2015-04-10 21:48:25 +00:00
parent 8aca90e5b6
commit cb148c8541
2 changed files with 312 additions and 26 deletions

View File

@ -131,12 +131,54 @@ static bool isGCPointerType(const Type *T) {
return false;
}
// Return true if this type is one which a) is a gc pointer or contains a GC
// pointer and b) is of a type this code expects to encounter as a live value.
// (The insertion code will assert that a type which matches (a) and not (b)
// is not encountered.)
static bool isHandledGCPointerType(Type *T) {
// We fully support gc pointers
if (isGCPointerType(T))
return true;
// We partially support vectors of gc pointers. The code will assert if it
// can't handle something.
if (auto VT = dyn_cast<VectorType>(T))
if (isGCPointerType(VT->getElementType()))
return true;
return false;
}
#ifndef NDEBUG
/// Returns true if this type contains a gc pointer whether we know how to
/// handle that type or not.
static bool containsGCPtrType(Type *Ty) {
if(isGCPointerType(Ty))
return true;
if (VectorType *VT = dyn_cast<VectorType>(Ty))
return isGCPointerType(VT->getScalarType());
if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
return containsGCPtrType(AT->getElementType());
if (StructType *ST = dyn_cast<StructType>(Ty))
return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
[](Type *SubType) {
return containsGCPtrType(SubType);
});
return false;
}
// Returns true if this is a type which a) is a gc pointer or contains a GC
// pointer and b) is of a type which the code doesn't expect (i.e. first class
// aggregates). Used to trip assertions.
static bool isUnhandledGCPointerType(Type *Ty) {
return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty);
}
#endif
/// Return true if the Value is a gc reference type which is potentially used
/// after the instruction 'loc'. This is only used with the edge reachability
/// liveness code. Note: It is assumed the V dominates loc.
static bool isLiveGCReferenceAt(Value &V, Instruction *loc, DominatorTree &DT,
static bool isLiveGCReferenceAt(Value &V, Instruction *Loc, DominatorTree &DT,
LoopInfo *LI) {
if (!isGCPointerType(V.getType()))
if (!isHandledGCPointerType(V.getType()))
return false;
if (V.use_empty())
@ -146,23 +188,6 @@ static bool isLiveGCReferenceAt(Value &V, Instruction *loc, DominatorTree &DT,
return true;
}
#ifndef NDEBUG
static bool isAggWhichContainsGCPtrType(Type *Ty) {
if (VectorType *VT = dyn_cast<VectorType>(Ty))
return isGCPointerType(VT->getScalarType());
if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
return isGCPointerType(AT->getElementType()) ||
isAggWhichContainsGCPtrType(AT->getElementType());
if (StructType *ST = dyn_cast<StructType>(Ty))
return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
[](Type *SubType) {
return isGCPointerType(SubType) ||
isAggWhichContainsGCPtrType(SubType);
});
return false;
}
#endif
// Conservatively identifies any definitions which might be live at the
// given instruction. The analysis is performed immediately before the
// given instruction. Values defined by that instruction are not considered
@ -189,7 +214,7 @@ static void findLiveGCValuesAtInst(Instruction *term, BasicBlock *pred,
// Are there any gc pointer arguments live over this point? This needs to be
// special cased since arguments aren't defined in basic blocks.
for (Argument &arg : F->args()) {
assert(!isAggWhichContainsGCPtrType(arg.getType()) &&
assert(!isUnhandledGCPointerType(arg.getType()) &&
"support for FCA unimplemented");
if (is_live_gc_reference(arg)) {
@ -233,7 +258,7 @@ static void findLiveGCValuesAtInst(Instruction *term, BasicBlock *pred,
break;
}
assert(!isAggWhichContainsGCPtrType(inst.getType()) &&
assert(!isUnhandledGCPointerType(inst.getType()) &&
"support for FCA unimplemented");
if (is_live_gc_reference(inst)) {
@ -299,6 +324,51 @@ analyzeParsePointLiveness(DominatorTree &DT, const CallSite &CS,
result.liveset = liveset;
}
/// If we can trivially determine that this vector contains only base pointers,
/// return the base instruction.
static Value *findBaseOfVector(Value *I) {
assert(I->getType()->isVectorTy() &&
cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
"Illegal to ask for the base pointer of a non-pointer type");
// Each case parallels findBaseDefiningValue below, see that code for
// detailed motivation.
if (isa<Argument>(I))
// An incoming argument to the function is a base pointer
return I;
// We shouldn't see the address of a global as a vector value?
assert(!isa<GlobalVariable>(I) &&
"unexpected global variable found in base of vector");
// inlining could possibly introduce phi node that contains
// undef if callee has multiple returns
if (isa<UndefValue>(I))
// utterly meaningless, but useful for dealing with partially optimized
// code.
return I;
// Due to inheritance, this must be _after_ the global variable and undef
// checks
if (Constant *Con = dyn_cast<Constant>(I)) {
assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
"order of checks wrong!");
assert(Con->isNullValue() && "null is the only case which makes sense");
return Con;
}
if (isa<LoadInst>(I))
return I;
// Note: This code is currently rather incomplete. We are essentially only
// handling cases where the vector element is trivially a base pointer. We
// need to update the entire base pointer construction algorithm to know how
// to track vector elements and potentially scalarize, but the case which
// would motivate the work hasn't shown up in real workloads yet.
llvm_unreachable("no base found for vector element");
}
/// Helper function for findBasePointer - Will return a value which either a)
/// defines the base pointer for the input or b) blocks the simple search
/// (i.e. a PHI or Select of two derived pointers)
@ -306,10 +376,15 @@ static Value *findBaseDefiningValue(Value *I) {
assert(I->getType()->isPointerTy() &&
"Illegal to ask for the base pointer of a non-pointer type");
// There are instructions which can never return gc pointer values. Sanity
// check that this is actually true.
assert(!isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
!isa<ShuffleVectorInst>(I) && "Vector types are not gc pointers");
// This case is a bit of a hack - it only handles extracts from vectors which
// trivially contain only base pointers. See note inside the function for
// how to improve this.
if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
Value *VectorOperand = EEI->getVectorOperand();
Value *VectorBase = findBaseOfVector(VectorOperand);
assert(VectorBase && "extract element not known to be a trivial base");
return EEI;
}
if (isa<Argument>(I))
// An incoming argument to the function is a base pointer
@ -1650,6 +1725,117 @@ static void addBasesAsLiveValues(StatepointLiveSetTy &liveset,
assert(liveset.size() == PointerToBase.size());
}
/// Remove any vector of pointers from the liveset by scalarizing them over the
/// statepoint instruction. Adds the scalarized pieces to the liveset. It
/// would be preferrable to include the vector in the statepoint itself, but
/// the lowering code currently does not handle that. Extending it would be
/// slightly non-trivial since it requires a format change. Given how rare
/// such cases are (for the moment?) scalarizing is an acceptable comprimise.
static void splitVectorValues(Instruction *StatepointInst,
StatepointLiveSetTy& LiveSet, DominatorTree &DT) {
SmallVector<Value *, 16> ToSplit;
for (Value *V : LiveSet)
if (isa<VectorType>(V->getType()))
ToSplit.push_back(V);
if (ToSplit.empty())
return;
Function &F = *(StatepointInst->getParent()->getParent());
DenseMap<Value*, AllocaInst*> AllocaMap;
// First is normal return, second is exceptional return (invoke only)
DenseMap<Value*, std::pair<Value*,Value*>> Replacements;
for (Value *V : ToSplit) {
LiveSet.erase(V);
AllocaInst *Alloca = new AllocaInst(V->getType(), "",
F.getEntryBlock().getFirstNonPHI());
AllocaMap[V] = Alloca;
VectorType *VT = cast<VectorType>(V->getType());
IRBuilder<> Builder(StatepointInst);
SmallVector<Value*, 16> Elements;
for (unsigned i = 0; i < VT->getNumElements(); i++)
Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i)));
LiveSet.insert(Elements.begin(), Elements.end());
auto InsertVectorReform = [&](Instruction *IP) {
Builder.SetInsertPoint(IP);
Builder.SetCurrentDebugLocation(IP->getDebugLoc());
Value *ResultVec = UndefValue::get(VT);
for (unsigned i = 0; i < VT->getNumElements(); i++)
ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i],
Builder.getInt32(i));
return ResultVec;
};
if (isa<CallInst>(StatepointInst)) {
BasicBlock::iterator Next(StatepointInst);
Next++;
Instruction *IP = &*(Next);
Replacements[V].first = InsertVectorReform(IP);
Replacements[V].second = nullptr;
} else {
InvokeInst *Invoke = cast<InvokeInst>(StatepointInst);
// We've already normalized - check that we don't have shared destination
// blocks
BasicBlock *NormalDest = Invoke->getNormalDest();
assert(!isa<PHINode>(NormalDest->begin()));
BasicBlock *UnwindDest = Invoke->getUnwindDest();
assert(!isa<PHINode>(UnwindDest->begin()));
// Insert insert element sequences in both successors
Instruction *IP = &*(NormalDest->getFirstInsertionPt());
Replacements[V].first = InsertVectorReform(IP);
IP = &*(UnwindDest->getFirstInsertionPt());
Replacements[V].second = InsertVectorReform(IP);
}
}
for (Value *V : ToSplit) {
AllocaInst *Alloca = AllocaMap[V];
// Capture all users before we start mutating use lists
SmallVector<Instruction*, 16> Users;
for (User *U : V->users())
Users.push_back(cast<Instruction>(U));
for (Instruction *I : Users) {
if (auto Phi = dyn_cast<PHINode>(I)) {
for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++)
if (V == Phi->getIncomingValue(i)) {
LoadInst *Load = new LoadInst(Alloca, "",
Phi->getIncomingBlock(i)->getTerminator());
Phi->setIncomingValue(i, Load);
}
} else {
LoadInst *Load = new LoadInst(Alloca, "", I);
I->replaceUsesOfWith(V, Load);
}
}
// Store the original value and the replacement value into the alloca
StoreInst *Store = new StoreInst(V, Alloca);
if (auto I = dyn_cast<Instruction>(V))
Store->insertAfter(I);
else
Store->insertAfter(Alloca);
// Normal return for invoke, or call return
Instruction *Replacement = cast<Instruction>(Replacements[V].first);
(new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
// Unwind return for invoke only
Replacement = cast_or_null<Instruction>(Replacements[V].second);
if (Replacement)
(new StoreInst(Replacement, Alloca))->insertAfter(Replacement);
}
// apply mem2reg to promote alloca to SSA
SmallVector<AllocaInst*, 16> Allocas;
for (Value *V : ToSplit)
Allocas.push_back(AllocaMap[V]);
PromoteMemToReg(Allocas, DT);
}
static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
SmallVectorImpl<CallSite> &toUpdate) {
#ifndef NDEBUG
@ -1680,7 +1866,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
SmallVector<Value *, 64> DeoptValues;
for (Use &U : StatepointCS.vm_state_args()) {
Value *Arg = cast<Value>(&U);
if (isGCPointerType(Arg->getType()))
assert(!isUnhandledGCPointerType(Arg->getType()) &&
"support for FCA unimplemented");
if (isHandledGCPointerType(Arg->getType()))
DeoptValues.push_back(Arg);
}
insertUseHolderAfter(CS, DeoptValues, holders);
@ -1698,6 +1886,17 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
// site.
findLiveReferences(F, DT, P, toUpdate, records);
// Do a limited scalarization of any live at safepoint vector values which
// contain pointers. This enables this pass to run after vectorization at
// the cost of some possible performance loss. TODO: it would be nice to
// natively support vectors all the way through the backend so we don't need
// to scalarize here.
for (size_t i = 0; i < records.size(); i++) {
struct PartiallyConstructedSafepointRecord &info = records[i];
Instruction *statepoint = toUpdate[i].getInstruction();
splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT);
}
// B) Find the base pointers for each live pointer
/* scope for caching */ {
// Cache the 'defining value' relation used in the computation and

View File

@ -0,0 +1,87 @@
; Test that we can correctly handle vectors of pointers in statepoint
; rewriting. Currently, we scalarize, but that's an implementation detail.
; RUN: opt %s -rewrite-statepoints-for-gc -S | FileCheck %s
; A non-vector relocation for comparison
define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-LABEL: test
; CHECK: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated
entry:
%safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %obj
}
; A base vector from a argument
define <2 x i64 addrspace(1)*> @test2(<2 x i64 addrspace(1)*> %obj) gc "statepoint-example" {
; CHECK-LABEL: test2
; CHECK: extractelement
; CHECK-NEXT: extractelement
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %5
entry:
%safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 0)
ret <2 x i64 addrspace(1)*> %obj
}
; A base vector from a load
define <2 x i64 addrspace(1)*> @test3(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
; CHECK-LABEL: test3
; CHECK: load
; CHECK-NEXT: extractelement
; CHECK-NEXT: extractelement
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %5
entry:
%obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
%safepoint_token = call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 0)
ret <2 x i64 addrspace(1)*> %obj
}
declare i32 @fake_personality_function()
; When a statepoint is an invoke rather than a call
define <2 x i64 addrspace(1)*> @test4(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
; CHECK-LABEL: test4
; CHECK: load
; CHECK-NEXT: extractelement
; CHECK-NEXT: extractelement
; CHECK-NEXT: gc.statepoint
entry:
%obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
invoke i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @do_safepoint, i32 0, i32 0, i32 0)
to label %normal_return unwind label %exceptional_return
; CHECK-LABEL: normal_return:
; CHECK: gc.relocate
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %6
normal_return: ; preds = %entry
ret <2 x i64 addrspace(1)*> %obj
; CHECK-LABEL: exceptional_return:
; CHECK: gc.relocate
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %10
exceptional_return: ; preds = %entry
%landing_pad4 = landingpad { i8*, i32 } personality i32 ()* @fake_personality_function
cleanup
ret <2 x i64 addrspace(1)*> %obj
}
declare void @do_safepoint()
declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()*, i32, i32, ...)