[GC] CodeGenPrep transform: simplify offsetable relocate

The transform is somewhat involved, but the basic idea is simple: find
derived pointers that have been offset from the base pointer using gep
and replace the relocate of the derived pointer with a gep to the
relocated base pointer (with the same offset).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226060 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Ramkumar Ramachandra 2015-01-14 23:27:07 +00:00
parent df630fb822
commit fba4d82671
3 changed files with 245 additions and 3 deletions

View File

@ -194,12 +194,12 @@ class GCRelocateOperands {
/// The index into the associate statepoint's argument list
/// which contains the base pointer of the pointer whose
/// relocation this gc.relocate describes.
int basePtrIndex() {
unsigned basePtrIndex() {
return cast<ConstantInt>(RelocateCS.getArgument(1))->getZExtValue();
}
/// The index into the associate statepoint's argument list which
/// contains the pointer whose relocation this gc.relocate describes.
int derivedPtrIndex() {
unsigned derivedPtrIndex() {
return cast<ConstantInt>(RelocateCS.getArgument(2))->getZExtValue();
}
Value *basePtr() {

View File

@ -32,6 +32,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Statepoint.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Pass.h"
@ -72,6 +73,10 @@ static cl::opt<bool> DisableBranchOpts(
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
cl::desc("Disable branch optimizations in CodeGenPrepare"));
static cl::opt<bool>
DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
cl::desc("Disable GC optimizations in CodeGenPrepare"));
static cl::opt<bool> DisableSelectToBranch(
"disable-cgp-select2branch", cl::Hidden, cl::init(false),
cl::desc("Disable select to branch conversion."));
@ -183,6 +188,7 @@ class TypePromotionTransaction;
const SmallVectorImpl<Instruction *> &Exts,
unsigned CreatedInst);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
};
}
@ -248,7 +254,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
BasicBlock *BB = I++;
bool ModifiedDTOnIteration = false;
MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
// Restart BB iteration if the dominator tree of the Function was changed
ModifiedDT |= ModifiedDTOnIteration;
if (ModifiedDTOnIteration)
@ -298,6 +304,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
EverMadeChange |= MadeChange;
}
if (!DisableGCOpts) {
SmallVector<Instruction *, 2> Statepoints;
for (BasicBlock &BB : F)
for (Instruction &I : BB)
if (isStatepoint(I))
Statepoints.push_back(&I);
for (auto &I : Statepoints)
EverMadeChange |= simplifyOffsetableRelocate(*I);
}
if (ModifiedDT && DT)
DT->recalculate(F);
@ -521,6 +537,144 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
}
// Computes a map of base pointer relocation instructions to corresponding
// derived pointer relocation instructions given a vector of all relocate calls
static void computeBaseDerivedRelocateMap(
const SmallVectorImpl<User *> &AllRelocateCalls,
DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> &
RelocateInstMap) {
// Collect information in two maps: one primarily for locating the base object
// while filling the second map; the second map is the final structure holding
// a mapping between Base and corresponding Derived relocate calls
DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap;
for (auto &U : AllRelocateCalls) {
GCRelocateOperands ThisRelocate(U);
IntrinsicInst *I = cast<IntrinsicInst>(U);
auto K = std::make_pair(ThisRelocate.basePtrIndex(),
ThisRelocate.derivedPtrIndex());
RelocateIdxMap.insert(std::make_pair(K, I));
}
for (auto &Item : RelocateIdxMap) {
std::pair<unsigned, unsigned> Key = Item.first;
if (Key.first == Key.second)
// Base relocation: nothing to insert
continue;
IntrinsicInst *I = Item.second;
auto BaseKey = std::make_pair(Key.first, Key.first);
IntrinsicInst *Base = RelocateIdxMap[BaseKey];
if (!Base)
// TODO: We might want to insert a new base object relocate and gep off
// that, if there are enough derived object relocates.
continue;
RelocateInstMap[Base].push_back(I);
}
}
// Accepts a GEP and extracts the operands into a vector provided they're all
// small integer constants
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
SmallVectorImpl<Value *> &OffsetV) {
for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
// Only accept small constant integer operands
auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
if (!Op || Op->getZExtValue() > 20)
return false;
}
for (unsigned i = 1; i < GEP->getNumOperands(); i++)
OffsetV.push_back(GEP->getOperand(i));
return true;
}
// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
// replace, computes a replacement, and affects it.
static bool
simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
const SmallVectorImpl<IntrinsicInst *> &Targets) {
bool MadeChange = false;
for (auto &ToReplace : Targets) {
GCRelocateOperands MasterRelocate(RelocatedBase);
GCRelocateOperands ThisRelocate(ToReplace);
assert(ThisRelocate.basePtrIndex() == MasterRelocate.basePtrIndex() &&
"Not relocating a derived object of the original base object");
if (ThisRelocate.basePtrIndex() == ThisRelocate.derivedPtrIndex()) {
// A duplicate relocate call. TODO: coalesce duplicates.
continue;
}
Value *Base = ThisRelocate.basePtr();
auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.derivedPtr());
if (!Derived || Derived->getPointerOperand() != Base)
continue;
SmallVector<Value *, 2> OffsetV;
if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
continue;
// Create a Builder and replace the target callsite with a gep
IRBuilder<> Builder(ToReplace);
Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
Value *Replacement =
Builder.CreateGEP(RelocatedBase, makeArrayRef(OffsetV));
Instruction *ReplacementInst = cast<Instruction>(Replacement);
ReplacementInst->removeFromParent();
ReplacementInst->insertAfter(RelocatedBase);
Replacement->takeName(ToReplace);
ToReplace->replaceAllUsesWith(Replacement);
ToReplace->eraseFromParent();
MadeChange = true;
}
return MadeChange;
}
// Turns this:
//
// %base = ...
// %ptr = gep %base + 15
// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
// %base' = relocate(%tok, i32 4, i32 4)
// %ptr' = relocate(%tok, i32 4, i32 5)
// %val = load %ptr'
//
// into this:
//
// %base = ...
// %ptr = gep %base + 15
// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
// %base' = gc.relocate(%tok, i32 4, i32 4)
// %ptr' = gep %base' + 15
// %val = load %ptr'
bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
bool MadeChange = false;
SmallVector<User *, 2> AllRelocateCalls;
for (auto *U : I.users())
if (isGCRelocate(dyn_cast<Instruction>(U)))
// Collect all the relocate calls associated with a statepoint
AllRelocateCalls.push_back(U);
// We need atleast one base pointer relocation + one derived pointer
// relocation to mangle
if (AllRelocateCalls.size() < 2)
return false;
// RelocateInstMap is a mapping from the base relocate instruction to the
// corresponding derived relocate instructions
DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap;
computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
if (RelocateInstMap.empty())
return false;
for (auto &Item : RelocateInstMap)
// Item.first is the RelocatedBase to offset against
// Item.second is the vector of Targets to replace
MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
return MadeChange;
}
/// SinkCast - Sink the specified cast instruction into its user blocks
static bool SinkCast(CastInst *CI) {
BasicBlock *DefBB = CI->getParent();

View File

@ -0,0 +1,88 @@
; RUN: opt -codegenprepare -S < %s | FileCheck %s
target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
declare zeroext i1 @return_i1()
define i32 @test_sor_basic(i32* %base) {
; CHECK: getelementptr i32* %base, i32 15
; CHECK: getelementptr i32* %base-new, i32 15
entry:
%ptr = getelementptr i32* %base, i32 15
%tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
%base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4)
%ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
%ret = load i32* %ptr-new
ret i32 %ret
}
define i32 @test_sor_two_derived(i32* %base) {
; CHECK: getelementptr i32* %base, i32 15
; CHECK: getelementptr i32* %base, i32 12
; CHECK: getelementptr i32* %base-new, i32 15
; CHECK: getelementptr i32* %base-new, i32 12
entry:
%ptr = getelementptr i32* %base, i32 15
%ptr2 = getelementptr i32* %base, i32 12
%tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
%base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4)
%ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
%ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6)
%ret = load i32* %ptr-new
ret i32 %ret
}
define i32 @test_sor_ooo(i32* %base) {
; CHECK: getelementptr i32* %base, i32 15
; CHECK: getelementptr i32* %base-new, i32 15
entry:
%ptr = getelementptr i32* %base, i32 15
%tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
%ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
%base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4)
%ret = load i32* %ptr-new
ret i32 %ret
}
define i32 @test_sor_gep_smallint([3 x i32]* %base) {
; CHECK: getelementptr [3 x i32]* %base, i32 0, i32 2
; CHECK: getelementptr [3 x i32]* %base-new, i32 0, i32 2
entry:
%ptr = getelementptr [3 x i32]* %base, i32 0, i32 2
%tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
%base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 4, i32 4)
%ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
%ret = load i32* %ptr-new
ret i32 %ret
}
define i32 @test_sor_gep_largeint([3 x i32]* %base) {
; CHECK: getelementptr [3 x i32]* %base, i32 0, i32 21
; CHECK-NOT: getelementptr [3 x i32]* %base-new, i32 0, i32 21
entry:
%ptr = getelementptr [3 x i32]* %base, i32 0, i32 21
%tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
%base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 4, i32 4)
%ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
%ret = load i32* %ptr-new
ret i32 %ret
}
define i32 @test_sor_noop(i32* %base) {
; CHECK: getelementptr i32* %base, i32 15
; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6)
entry:
%ptr = getelementptr i32* %base, i32 15
%ptr2 = getelementptr i32* %base, i32 12
%tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
%ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
%ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6)
%ret = load i32* %ptr-new
ret i32 %ret
}
declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...)
declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32)
declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32, i32, i32)