[INDVARS] Extend using of widening of induction variables for the cases of "sub nsw" and "mul nsw" instructions.

Currently only "add nsw" are widened. This patch eliminates tons of "sext" instructions for 64 bit code (and the corresponding target code) in cases like:

int N = 100;
float **A;

void foo(int x0, int x1)
{
        float * A_cur = &A[0][0];
        float * A_next = &A[1][0];
        for(int x = x0; x < x1; ++x).
        {
          // Currently only [x+N] case is widened. Others 2 cases lead to sext.
          // This patch fixes it, so all 3 cases do not need sext.
          const float div = A_cur[x + N] + A_cur[x - N] + A_cur[x * N];
          A_next[x] = div;
        }
}
...
> clang++ test.cpp -march=core-avx2 -Ofast  -fno-unroll-loops -fno-tree-vectorize -S -o -

Differential Revision: http://reviews.llvm.org/D4695



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@216160 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Zinovy Nis 2014-08-21 08:25:45 +00:00
parent 6623af7892
commit 164cd0161e
2 changed files with 36 additions and 8 deletions

View File

@ -757,6 +757,9 @@ protected:
const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
const SCEV *GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
unsigned OpCode) const;
Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
@ -833,13 +836,30 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
}
}
const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
unsigned OpCode) const {
if (OpCode == Instruction::Add)
return SE->getAddExpr(LHS, RHS);
if (OpCode == Instruction::Sub)
return SE->getMinusSCEV(LHS, RHS);
if (OpCode == Instruction::Mul)
return SE->getMulExpr(LHS, RHS);
llvm_unreachable("Unsupported opcode.");
return nullptr;
}
/// No-wrap operations can transfer sign extension of their result to their
/// operands. Generate the SCEV value for the widened operation without
/// actually modifying the IR yet. If the expression after extending the
/// operands is an AddRec for this loop, return it.
const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
// Handle the common case of add<nsw/nuw>
if (DU.NarrowUse->getOpcode() != Instruction::Add)
const unsigned OpCode = DU.NarrowUse->getOpcode();
// Only Add/Sub/Mul instructions supported yet.
if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
OpCode != Instruction::Mul)
return nullptr;
// One operand (NarrowDef) has already been extended to WideDef. Now determine
@ -859,14 +879,13 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
else
return nullptr;
// When creating this AddExpr, don't apply the current operations NSW or NUW
// When creating this SCEV expr, don't apply the current operations NSW or NUW
// flags. This instruction may be guarded by control flow that the no-wrap
// behavior depends on. Non-control-equivalent instructions can be mapped to
// the same SCEV expression, and it would be incorrect to transfer NSW/NUW
// semantics to those operations.
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
GetSCEVByOpCode(SE->getSCEV(DU.WideDef), ExtendOperExpr, OpCode));
if (!AddRec || AddRec->getLoop() != L)
return nullptr;
return AddRec;

View File

@ -1,6 +1,6 @@
; RUN: opt < %s -indvars -S | FileCheck %s
; Test WidenIV::GetExtendedOperandRecurrence.
; add219 should be extended to i64 because it is nsw, even though its
; %add, %sub and %mul should be extended to i64 because it is nsw, even though its
; sext cannot be hoisted outside the loop.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@ -18,13 +18,22 @@ for.body153: ; preds = %for.body153, %for.b
br i1 undef, label %for.body170, label %for.body153
; CHECK: add nsw i64 %indvars.iv, 1
; CHECK: sub nsw i64 %indvars.iv, 2
; CHECK: mul nsw i64 %indvars.iv, 4
for.body170: ; preds = %for.body170, %for.body153
%i2.19 = phi i32 [ %add249, %for.body170 ], [ 0, %for.body153 ]
%add219 = add nsw i32 %i2.19, 1
%idxprom220 = sext i32 %add219 to i64
%add = add nsw i32 %i2.19, 1
%add.idxprom = sext i32 %add to i64
%sub = sub nsw i32 %i2.19, 2
%sub.idxprom = sext i32 %sub to i64
%mul = mul nsw i32 %i2.19, 4
%mul.idxprom = sext i32 %mul to i64
%add249 = add nsw i32 %i2.19, %shl132
br label %for.body170
for.end285: ; preds = %entry
ret void
}