mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-12 17:32:19 +00:00
Teach loop-idiom about address space pointer sizes
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190491 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
f834dce7c7
commit
11250c1194
@ -953,6 +953,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
|
|||||||
Value *SplatValue = isBytewiseValue(StoredVal);
|
Value *SplatValue = isBytewiseValue(StoredVal);
|
||||||
Constant *PatternValue = 0;
|
Constant *PatternValue = 0;
|
||||||
|
|
||||||
|
unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
|
||||||
|
|
||||||
// If we're allowed to form a memset, and the stored value would be acceptable
|
// If we're allowed to form a memset, and the stored value would be acceptable
|
||||||
// for memset, use it.
|
// for memset, use it.
|
||||||
if (SplatValue && TLI->has(LibFunc::memset) &&
|
if (SplatValue && TLI->has(LibFunc::memset) &&
|
||||||
@ -961,8 +963,10 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
|
|||||||
CurLoop->isLoopInvariant(SplatValue)) {
|
CurLoop->isLoopInvariant(SplatValue)) {
|
||||||
// Keep and use SplatValue.
|
// Keep and use SplatValue.
|
||||||
PatternValue = 0;
|
PatternValue = 0;
|
||||||
} else if (TLI->has(LibFunc::memset_pattern16) &&
|
} else if (DestAS == 0 &&
|
||||||
|
TLI->has(LibFunc::memset_pattern16) &&
|
||||||
(PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
|
(PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
|
||||||
|
// Don't create memset_pattern16s with address spaces.
|
||||||
// It looks like we can use PatternValue!
|
// It looks like we can use PatternValue!
|
||||||
SplatValue = 0;
|
SplatValue = 0;
|
||||||
} else {
|
} else {
|
||||||
@ -978,14 +982,15 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
|
|||||||
IRBuilder<> Builder(Preheader->getTerminator());
|
IRBuilder<> Builder(Preheader->getTerminator());
|
||||||
SCEVExpander Expander(*SE, "loop-idiom");
|
SCEVExpander Expander(*SE, "loop-idiom");
|
||||||
|
|
||||||
|
Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
|
||||||
|
|
||||||
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
|
// Okay, we have a strided store "p[i]" of a splattable value. We can turn
|
||||||
// this into a memset in the loop preheader now if we want. However, this
|
// this into a memset in the loop preheader now if we want. However, this
|
||||||
// would be unsafe to do if there is anything else in the loop that may read
|
// would be unsafe to do if there is anything else in the loop that may read
|
||||||
// or write to the aliased location. Check for any overlap by generating the
|
// or write to the aliased location. Check for any overlap by generating the
|
||||||
// base pointer and checking the region.
|
// base pointer and checking the region.
|
||||||
unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
|
|
||||||
Value *BasePtr =
|
Value *BasePtr =
|
||||||
Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
|
Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy,
|
||||||
Preheader->getTerminator());
|
Preheader->getTerminator());
|
||||||
|
|
||||||
if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
|
if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
|
||||||
@ -1001,7 +1006,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
|
|||||||
|
|
||||||
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
|
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
|
||||||
// pointer size if it isn't already.
|
// pointer size if it isn't already.
|
||||||
Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
|
Type *IntPtr = Builder.getIntPtrTy(TD, DestAS);
|
||||||
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
|
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
|
||||||
|
|
||||||
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
|
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
|
||||||
@ -1021,11 +1026,15 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
|
|||||||
NumBytes,
|
NumBytes,
|
||||||
StoreAlignment);
|
StoreAlignment);
|
||||||
} else {
|
} else {
|
||||||
|
// Everything is emitted in default address space
|
||||||
|
Type *Int8PtrTy = DestInt8PtrTy;
|
||||||
|
|
||||||
Module *M = TheStore->getParent()->getParent()->getParent();
|
Module *M = TheStore->getParent()->getParent()->getParent();
|
||||||
Value *MSP = M->getOrInsertFunction("memset_pattern16",
|
Value *MSP = M->getOrInsertFunction("memset_pattern16",
|
||||||
Builder.getVoidTy(),
|
Builder.getVoidTy(),
|
||||||
Builder.getInt8PtrTy(),
|
Int8PtrTy,
|
||||||
Builder.getInt8PtrTy(), IntPtr,
|
Int8PtrTy,
|
||||||
|
IntPtr,
|
||||||
(void*)0);
|
(void*)0);
|
||||||
|
|
||||||
// Otherwise we should form a memset_pattern16. PatternValue is known to be
|
// Otherwise we should form a memset_pattern16. PatternValue is known to be
|
||||||
@ -1035,7 +1044,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
|
|||||||
PatternValue, ".memset_pattern");
|
PatternValue, ".memset_pattern");
|
||||||
GV->setUnnamedAddr(true); // Ok to merge these.
|
GV->setUnnamedAddr(true); // Ok to merge these.
|
||||||
GV->setAlignment(16);
|
GV->setAlignment(16);
|
||||||
Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
|
Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
|
||||||
NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
|
NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1111,17 +1120,17 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
|
|||||||
|
|
||||||
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
|
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to
|
||||||
// pointer size if it isn't already.
|
// pointer size if it isn't already.
|
||||||
Type *IntPtr = TD->getIntPtrType(SI->getContext());
|
Type *IntPtrTy = Builder.getIntPtrTy(TD, SI->getPointerAddressSpace());
|
||||||
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
|
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
|
||||||
|
|
||||||
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
|
const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1),
|
||||||
SCEV::FlagNUW);
|
SCEV::FlagNUW);
|
||||||
if (StoreSize != 1)
|
if (StoreSize != 1)
|
||||||
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
|
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
|
||||||
SCEV::FlagNUW);
|
SCEV::FlagNUW);
|
||||||
|
|
||||||
Value *NumBytes =
|
Value *NumBytes =
|
||||||
Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
|
Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
|
||||||
|
|
||||||
CallInst *NewCall =
|
CallInst *NewCall =
|
||||||
Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
|
Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
|
||||||
|
@ -0,0 +1,69 @@
|
|||||||
|
; RUN: opt -S -indvars -o - %s | FileCheck %s
|
||||||
|
target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-n8:16:32:64"
|
||||||
|
|
||||||
|
; Derived from ptriv in lftr-reuse.ll
|
||||||
|
define void @ptriv_as2(i8 addrspace(2)* %base, i32 %n) nounwind {
|
||||||
|
; CHECK-LABEL: @ptriv_as2(
|
||||||
|
entry:
|
||||||
|
%idx.trunc = trunc i32 %n to i8
|
||||||
|
%add.ptr = getelementptr inbounds i8 addrspace(2)* %base, i8 %idx.trunc
|
||||||
|
%cmp1 = icmp ult i8 addrspace(2)* %base, %add.ptr
|
||||||
|
br i1 %cmp1, label %for.body, label %for.end
|
||||||
|
|
||||||
|
; Make sure the added GEP has the right index type
|
||||||
|
; CHECK: %lftr.limit = getelementptr i8 addrspace(2)* %base, i8 %0
|
||||||
|
|
||||||
|
; CHECK: for.body:
|
||||||
|
; CHECK: phi i8 addrspace(2)*
|
||||||
|
; CHECK-NOT: phi
|
||||||
|
; CHECK-NOT: add{{^rspace}}
|
||||||
|
; CHECK: icmp ne i8 addrspace(2)*
|
||||||
|
; CHECK: br i1
|
||||||
|
for.body:
|
||||||
|
%p.02 = phi i8 addrspace(2)* [ %base, %entry ], [ %incdec.ptr, %for.body ]
|
||||||
|
; cruft to make the IV useful
|
||||||
|
%sub.ptr.lhs.cast = ptrtoint i8 addrspace(2)* %p.02 to i8
|
||||||
|
%sub.ptr.rhs.cast = ptrtoint i8 addrspace(2)* %base to i8
|
||||||
|
%sub.ptr.sub = sub i8 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
|
||||||
|
store i8 %sub.ptr.sub, i8 addrspace(2)* %p.02
|
||||||
|
%incdec.ptr = getelementptr inbounds i8 addrspace(2)* %p.02, i32 1
|
||||||
|
%cmp = icmp ult i8 addrspace(2)* %incdec.ptr, %add.ptr
|
||||||
|
br i1 %cmp, label %for.body, label %for.end
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @ptriv_as3(i8 addrspace(3)* %base, i32 %n) nounwind {
|
||||||
|
; CHECK-LABEL: @ptriv_as3(
|
||||||
|
entry:
|
||||||
|
%idx.trunc = trunc i32 %n to i16
|
||||||
|
%add.ptr = getelementptr inbounds i8 addrspace(3)* %base, i16 %idx.trunc
|
||||||
|
%cmp1 = icmp ult i8 addrspace(3)* %base, %add.ptr
|
||||||
|
br i1 %cmp1, label %for.body, label %for.end
|
||||||
|
|
||||||
|
; Make sure the added GEP has the right index type
|
||||||
|
; CHECK: %lftr.limit = getelementptr i8 addrspace(3)* %base, i16 %0
|
||||||
|
|
||||||
|
; CHECK: for.body:
|
||||||
|
; CHECK: phi i8 addrspace(3)*
|
||||||
|
; CHECK-NOT: phi
|
||||||
|
; CHECK-NOT: add{{^rspace}}
|
||||||
|
; CHECK: icmp ne i8 addrspace(3)*
|
||||||
|
; CHECK: br i1
|
||||||
|
for.body:
|
||||||
|
%p.02 = phi i8 addrspace(3)* [ %base, %entry ], [ %incdec.ptr, %for.body ]
|
||||||
|
; cruft to make the IV useful
|
||||||
|
%sub.ptr.lhs.cast = ptrtoint i8 addrspace(3)* %p.02 to i16
|
||||||
|
%sub.ptr.rhs.cast = ptrtoint i8 addrspace(3)* %base to i16
|
||||||
|
%sub.ptr.sub = sub i16 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
|
||||||
|
%conv = trunc i16 %sub.ptr.sub to i8
|
||||||
|
store i8 %conv, i8 addrspace(3)* %p.02
|
||||||
|
%incdec.ptr = getelementptr inbounds i8 addrspace(3)* %p.02, i32 1
|
||||||
|
%cmp = icmp ult i8 addrspace(3)* %incdec.ptr, %add.ptr
|
||||||
|
br i1 %cmp, label %for.body, label %for.end
|
||||||
|
|
||||||
|
for.end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
91
test/Transforms/LoopIdiom/basic-address-space.ll
Normal file
91
test/Transforms/LoopIdiom/basic-address-space.ll
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
|
||||||
|
|
||||||
|
target datalayout = "e-p:32:32:32-p1:64:64:64-p2:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-n8:16:32:64"
|
||||||
|
target triple = "x86_64-apple-darwin10.0.0"
|
||||||
|
|
||||||
|
; Two dimensional nested loop should be promoted to one big memset.
|
||||||
|
define void @test10(i8 addrspace(2)* %X) nounwind ssp {
|
||||||
|
; CHECK-LABEL: @test10(
|
||||||
|
; CHECK: entry:
|
||||||
|
; CHECK-NEXT: call void @llvm.memset.p2i8.i16(i8 addrspace(2)* %X, i8 0, i16 10000, i32 1, i1 false)
|
||||||
|
; CHECK-NOT: store
|
||||||
|
; CHECK: ret void
|
||||||
|
|
||||||
|
entry:
|
||||||
|
br label %bb.nph
|
||||||
|
|
||||||
|
bb.nph: ; preds = %entry, %for.inc10
|
||||||
|
%i.04 = phi i16 [ 0, %entry ], [ %inc12, %for.inc10 ]
|
||||||
|
br label %for.body5
|
||||||
|
|
||||||
|
for.body5: ; preds = %for.body5, %bb.nph
|
||||||
|
%j.02 = phi i16 [ 0, %bb.nph ], [ %inc, %for.body5 ]
|
||||||
|
%mul = mul nsw i16 %i.04, 100
|
||||||
|
%add = add nsw i16 %j.02, %mul
|
||||||
|
%arrayidx = getelementptr inbounds i8 addrspace(2)* %X, i16 %add
|
||||||
|
store i8 0, i8 addrspace(2)* %arrayidx, align 1
|
||||||
|
%inc = add nsw i16 %j.02, 1
|
||||||
|
%cmp4 = icmp eq i16 %inc, 100
|
||||||
|
br i1 %cmp4, label %for.inc10, label %for.body5
|
||||||
|
|
||||||
|
for.inc10: ; preds = %for.body5
|
||||||
|
%inc12 = add nsw i16 %i.04, 1
|
||||||
|
%cmp = icmp eq i16 %inc12, 100
|
||||||
|
br i1 %cmp, label %for.end13, label %bb.nph
|
||||||
|
|
||||||
|
for.end13: ; preds = %for.inc10
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test11_pattern(i32 addrspace(2)* nocapture %P) nounwind ssp {
|
||||||
|
; CHECK-LABEL: @test11_pattern(
|
||||||
|
; CHECK-NOT: memset_pattern
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %entry, %for.body
|
||||||
|
%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
|
||||||
|
%arrayidx = getelementptr i32 addrspace(2)* %P, i64 %indvar
|
||||||
|
store i32 1, i32 addrspace(2)* %arrayidx, align 4
|
||||||
|
%indvar.next = add i64 %indvar, 1
|
||||||
|
%exitcond = icmp eq i64 %indvar.next, 10000
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end: ; preds = %for.body
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; PR9815 - This is a partial overlap case that cannot be safely transformed
|
||||||
|
; into a memcpy.
|
||||||
|
@g_50 = addrspace(2) global [7 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0], align 16
|
||||||
|
|
||||||
|
|
||||||
|
define i32 @test14() nounwind {
|
||||||
|
; CHECK-LABEL: @test14(
|
||||||
|
; CHECK: for.body:
|
||||||
|
; CHECK: load i32
|
||||||
|
; CHECK: store i32
|
||||||
|
; CHECK: br i1 %cmp
|
||||||
|
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.inc, %for.body.lr.ph
|
||||||
|
%tmp5 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
|
||||||
|
%add = add nsw i32 %tmp5, 4
|
||||||
|
%idxprom = sext i32 %add to i64
|
||||||
|
%arrayidx = getelementptr inbounds [7 x i32] addrspace(2)* @g_50, i32 0, i64 %idxprom
|
||||||
|
%tmp2 = load i32 addrspace(2)* %arrayidx, align 4
|
||||||
|
%add4 = add nsw i32 %tmp5, 5
|
||||||
|
%idxprom5 = sext i32 %add4 to i64
|
||||||
|
%arrayidx6 = getelementptr inbounds [7 x i32] addrspace(2)* @g_50, i32 0, i64 %idxprom5
|
||||||
|
store i32 %tmp2, i32 addrspace(2)* %arrayidx6, align 4
|
||||||
|
%inc = add nsw i32 %tmp5, 1
|
||||||
|
%cmp = icmp slt i32 %inc, 2
|
||||||
|
br i1 %cmp, label %for.body, label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.inc
|
||||||
|
%tmp8 = load i32 addrspace(2)* getelementptr inbounds ([7 x i32] addrspace(2)* @g_50, i32 0, i64 6), align 4
|
||||||
|
ret i32 %tmp8
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,88 @@
|
|||||||
|
; RUN: opt < %s -loop-reduce -S | FileCheck %s
|
||||||
|
; CHECK: bb1:
|
||||||
|
; CHECK: load double addrspace(1)* [[IV:%[^,]+]]
|
||||||
|
; CHECK: store double {{.*}}, double addrspace(1)* [[IV]]
|
||||||
|
|
||||||
|
; CHECK-NOT: cast
|
||||||
|
; Make sure the GEP has the right index type
|
||||||
|
; CHECK: getelementptr double addrspace(1)* [[IV]], i16 1
|
||||||
|
; CHECK: br {{.*}} label %bb1
|
||||||
|
|
||||||
|
; Make sure the GEP has the right index type
|
||||||
|
; CHECK: getelementptr double addrspace(1)* {{.*}}, i16
|
||||||
|
|
||||||
|
|
||||||
|
; This test tests several things. The load and store should use the
|
||||||
|
; same address instead of having it computed twice, and SCEVExpander should
|
||||||
|
; be able to reconstruct the full getelementptr, despite it having a few
|
||||||
|
; obstacles set in its way.
|
||||||
|
; We only check that the inner loop (bb1-bb2) is "reduced" because LSR
|
||||||
|
; currently only operates on inner loops.
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64:64-p1:16:16:16-n16:32:64"
|
||||||
|
|
||||||
|
define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double addrspace(1)* nocapture %p) nounwind {
|
||||||
|
entry:
|
||||||
|
%tmp = icmp sgt i64 %n, 0 ; <i1> [#uses=1]
|
||||||
|
br i1 %tmp, label %bb.nph3, label %return
|
||||||
|
|
||||||
|
bb.nph: ; preds = %bb2.preheader
|
||||||
|
%tmp1 = mul i64 %tmp16, %i.02 ; <i64> [#uses=1]
|
||||||
|
%tmp2 = mul i64 %tmp19, %i.02 ; <i64> [#uses=1]
|
||||||
|
br label %bb1
|
||||||
|
|
||||||
|
bb1: ; preds = %bb2, %bb.nph
|
||||||
|
%j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ] ; <i64> [#uses=3]
|
||||||
|
%tmp3 = add i64 %j.01, %tmp1 ; <i64> [#uses=1]
|
||||||
|
%tmp4 = add i64 %j.01, %tmp2 ; <i64> [#uses=1]
|
||||||
|
%z0 = add i64 %tmp3, 5203
|
||||||
|
%tmp5 = getelementptr double addrspace(1)* %p, i64 %z0 ; <double addrspace(1)*> [#uses=1]
|
||||||
|
%tmp6 = load double addrspace(1)* %tmp5, align 8 ; <double> [#uses=1]
|
||||||
|
%tmp7 = fdiv double %tmp6, 2.100000e+00 ; <double> [#uses=1]
|
||||||
|
%z1 = add i64 %tmp4, 5203
|
||||||
|
%tmp8 = getelementptr double addrspace(1)* %p, i64 %z1 ; <double addrspace(1)*> [#uses=1]
|
||||||
|
store double %tmp7, double addrspace(1)* %tmp8, align 8
|
||||||
|
%tmp9 = add i64 %j.01, 1 ; <i64> [#uses=2]
|
||||||
|
br label %bb2
|
||||||
|
|
||||||
|
bb2: ; preds = %bb1
|
||||||
|
%tmp10 = icmp slt i64 %tmp9, %m ; <i1> [#uses=1]
|
||||||
|
br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
|
||||||
|
|
||||||
|
bb2.bb3_crit_edge: ; preds = %bb2
|
||||||
|
br label %bb3
|
||||||
|
|
||||||
|
bb3: ; preds = %bb2.preheader, %bb2.bb3_crit_edge
|
||||||
|
%tmp11 = add i64 %i.02, 1 ; <i64> [#uses=2]
|
||||||
|
br label %bb4
|
||||||
|
|
||||||
|
bb4: ; preds = %bb3
|
||||||
|
%tmp12 = icmp slt i64 %tmp11, %n ; <i1> [#uses=1]
|
||||||
|
br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
|
||||||
|
|
||||||
|
bb4.return_crit_edge: ; preds = %bb4
|
||||||
|
br label %bb4.return_crit_edge.split
|
||||||
|
|
||||||
|
bb4.return_crit_edge.split: ; preds = %bb.nph3, %bb4.return_crit_edge
|
||||||
|
br label %return
|
||||||
|
|
||||||
|
bb.nph3: ; preds = %entry
|
||||||
|
%tmp13 = icmp sgt i64 %m, 0 ; <i1> [#uses=1]
|
||||||
|
%tmp14 = mul i64 %n, 37 ; <i64> [#uses=1]
|
||||||
|
%tmp15 = mul i64 %tmp14, %o ; <i64> [#uses=1]
|
||||||
|
%tmp16 = mul i64 %tmp15, %q ; <i64> [#uses=1]
|
||||||
|
%tmp17 = mul i64 %n, 37 ; <i64> [#uses=1]
|
||||||
|
%tmp18 = mul i64 %tmp17, %o ; <i64> [#uses=1]
|
||||||
|
%tmp19 = mul i64 %tmp18, %q ; <i64> [#uses=1]
|
||||||
|
br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
|
||||||
|
|
||||||
|
bb.nph3.split: ; preds = %bb.nph3
|
||||||
|
br label %bb2.preheader
|
||||||
|
|
||||||
|
bb2.preheader: ; preds = %bb.nph3.split, %bb4
|
||||||
|
%i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ] ; <i64> [#uses=3]
|
||||||
|
br i1 true, label %bb.nph, label %bb3
|
||||||
|
|
||||||
|
return: ; preds = %bb4.return_crit_edge.split, %entry
|
||||||
|
ret void
|
||||||
|
}
|
56
test/Transforms/LoopStrengthReduce/address-space-loop.ll
Normal file
56
test/Transforms/LoopStrengthReduce/address-space-loop.ll
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
; RUN: opt -S -loop-reduce < %s | FileCheck %s
|
||||||
|
|
||||||
|
; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
|
||||||
|
; should be able to form pretty GEPs.
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||||
|
|
||||||
|
; Copy of uglygep with a different address space
|
||||||
|
; This tests expandAddToGEP uses the right smaller integer type for
|
||||||
|
; another address space
|
||||||
|
define void @Z4() nounwind {
|
||||||
|
; CHECK-LABEL: @Z4(
|
||||||
|
bb:
|
||||||
|
br label %bb3
|
||||||
|
|
||||||
|
bb1: ; preds = %bb3
|
||||||
|
br i1 undef, label %bb10, label %bb2
|
||||||
|
|
||||||
|
bb2: ; preds = %bb1
|
||||||
|
%t = add i16 %t4, 1 ; <i16> [#uses=1]
|
||||||
|
br label %bb3
|
||||||
|
|
||||||
|
bb3: ; preds = %bb2, %bb
|
||||||
|
%t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ] ; <i16> [#uses=3]
|
||||||
|
br label %bb1
|
||||||
|
|
||||||
|
; CHECK: bb10:
|
||||||
|
; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0
|
||||||
|
; Host %t2 computation outside the loop.
|
||||||
|
; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8 addrspace(1)* undef, i16 %t4
|
||||||
|
; CHECK-NEXT: br label %bb14
|
||||||
|
bb10: ; preds = %bb9
|
||||||
|
%t7 = icmp eq i16 %t4, 0 ; <i1> [#uses=1]
|
||||||
|
%t3 = add i16 %t4, 16 ; <i16> [#uses=1]
|
||||||
|
br label %bb14
|
||||||
|
|
||||||
|
; CHECK: bb14:
|
||||||
|
; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]]
|
||||||
|
; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef
|
||||||
|
; Fold %t3's add within the address.
|
||||||
|
; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float addrspace(1)* %t6, i16 4
|
||||||
|
; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)*
|
||||||
|
; Use the induction variable (%t4) to access the right element
|
||||||
|
; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8 addrspace(1)* [[SCEVGEP2]], i16 %t4
|
||||||
|
; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]]
|
||||||
|
; CHECK-NEXT: br label %bb14
|
||||||
|
bb14: ; preds = %bb14, %bb10
|
||||||
|
%t2 = getelementptr inbounds i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1]
|
||||||
|
store i8 undef, i8 addrspace(1)* %t2
|
||||||
|
%t6 = load float addrspace(1)* addrspace(1)* undef
|
||||||
|
%t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)* ; <i8*> [#uses=1]
|
||||||
|
%t9 = getelementptr inbounds i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1]
|
||||||
|
store i8 undef, i8 addrspace(1)* %t9
|
||||||
|
br label %bb14
|
||||||
|
}
|
||||||
|
|
56
test/Transforms/LoopStrengthReduce/uglygep-address-space.ll
Normal file
56
test/Transforms/LoopStrengthReduce/uglygep-address-space.ll
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
; RUN: opt < %s -loop-reduce -S | FileCheck %s
|
||||||
|
|
||||||
|
; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
|
||||||
|
; should be able to form pretty GEPs.
|
||||||
|
|
||||||
|
target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||||
|
|
||||||
|
; Copy of uglygep with a different address space
|
||||||
|
; This tests expandAddToGEP uses the right smaller integer type for
|
||||||
|
; another address space
|
||||||
|
define void @Z4() nounwind {
|
||||||
|
; CHECK: define void @Z4
|
||||||
|
bb:
|
||||||
|
br label %bb3
|
||||||
|
|
||||||
|
bb1: ; preds = %bb3
|
||||||
|
br i1 undef, label %bb10, label %bb2
|
||||||
|
|
||||||
|
bb2: ; preds = %bb1
|
||||||
|
%t = add i16 %t4, 1 ; <i16> [#uses=1]
|
||||||
|
br label %bb3
|
||||||
|
|
||||||
|
bb3: ; preds = %bb2, %bb
|
||||||
|
%t4 = phi i16 [ %t, %bb2 ], [ 0, %bb ] ; <i16> [#uses=3]
|
||||||
|
br label %bb1
|
||||||
|
|
||||||
|
; CHECK: bb10:
|
||||||
|
; CHECK-NEXT: %t7 = icmp eq i16 %t4, 0
|
||||||
|
; Host %t2 computation outside the loop.
|
||||||
|
; CHECK-NEXT: [[SCEVGEP:%[^ ]+]] = getelementptr i8 addrspace(1)* undef, i16 %t4
|
||||||
|
; CHECK-NEXT: br label %bb14
|
||||||
|
bb10: ; preds = %bb9
|
||||||
|
%t7 = icmp eq i16 %t4, 0 ; <i1> [#uses=1]
|
||||||
|
%t3 = add i16 %t4, 16 ; <i16> [#uses=1]
|
||||||
|
br label %bb14
|
||||||
|
|
||||||
|
; CHECK: bb14:
|
||||||
|
; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]]
|
||||||
|
; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef
|
||||||
|
; Fold %t3's add within the address.
|
||||||
|
; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float addrspace(1)* %t6, i16 4
|
||||||
|
; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)*
|
||||||
|
; Use the induction variable (%t4) to access the right element
|
||||||
|
; CHECK-NEXT: [[ADDRESS:%[^ ]+]] = getelementptr i8 addrspace(1)* [[SCEVGEP2]], i16 %t4
|
||||||
|
; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[ADDRESS]]
|
||||||
|
; CHECK-NEXT: br label %bb14
|
||||||
|
bb14: ; preds = %bb14, %bb10
|
||||||
|
%t2 = getelementptr inbounds i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1]
|
||||||
|
store i8 undef, i8 addrspace(1)* %t2
|
||||||
|
%t6 = load float addrspace(1)* addrspace(1)* undef
|
||||||
|
%t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)* ; <i8*> [#uses=1]
|
||||||
|
%t9 = getelementptr inbounds i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1]
|
||||||
|
store i8 undef, i8 addrspace(1)* %t9
|
||||||
|
br label %bb14
|
||||||
|
}
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user