mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
SLPVectorize: Put horizontal reductions feeding a store under separate flag
Put them under a separate flag for experimentation. They are more likely to interfere with loop vectorization which happens later in the pass pipeline. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191371 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
259a6006e8
commit
9660ebb398
@ -54,6 +54,11 @@ static cl::opt<bool>
|
|||||||
ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
|
ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
|
||||||
cl::desc("Attempt to vectorize horizontal reductions"));
|
cl::desc("Attempt to vectorize horizontal reductions"));
|
||||||
|
|
||||||
|
static cl::opt<bool> ShouldStartVectorizeHorAtStore(
|
||||||
|
"slp-vectorize-hor-store", cl::init(false), cl::Hidden,
|
||||||
|
cl::desc(
|
||||||
|
"Attempt to vectorize horizontal reductions feeding into a store"));
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
static const unsigned MinVecRegSize = 128;
|
static const unsigned MinVecRegSize = 128;
|
||||||
@ -2336,20 +2341,20 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Try to vectorize horizontal reductions feeding into a store.
|
// Try to vectorize horizontal reductions feeding into a store.
|
||||||
if (StoreInst *SI = dyn_cast<StoreInst>(it))
|
if (ShouldStartVectorizeHorAtStore)
|
||||||
if (BinaryOperator *BinOp =
|
if (StoreInst *SI = dyn_cast<StoreInst>(it))
|
||||||
dyn_cast<BinaryOperator>(SI->getValueOperand())) {
|
if (BinaryOperator *BinOp =
|
||||||
HorizontalReduction HorRdx;
|
dyn_cast<BinaryOperator>(SI->getValueOperand())) {
|
||||||
if (ShouldVectorizeHor &&
|
HorizontalReduction HorRdx;
|
||||||
((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
|
if (((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
|
||||||
HorRdx.tryToReduce(R, TTI)) ||
|
HorRdx.tryToReduce(R, TTI)) ||
|
||||||
tryToVectorize(BinOp, R))) {
|
tryToVectorize(BinOp, R))) {
|
||||||
Changed = true;
|
Changed = true;
|
||||||
it = BB->begin();
|
it = BB->begin();
|
||||||
e = BB->end();
|
e = BB->end();
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Try to vectorize trees that start at compare instructions.
|
// Try to vectorize trees that start at compare instructions.
|
||||||
if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
|
if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: opt -slp-vectorizer -slp-vectorize-hor -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s
|
; RUN: opt -slp-vectorizer -slp-vectorize-hor -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSTORE
|
||||||
|
|
||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||||
|
|
||||||
@ -15,9 +15,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
|
|||||||
; return sum;
|
; return sum;
|
||||||
; }
|
; }
|
||||||
|
|
||||||
; CHECK-LABEL: add_red
|
; NOSTORE-LABEL: add_red
|
||||||
; CHECK: fmul <4 x float>
|
; NOSTORE: fmul <4 x float>
|
||||||
; CHECK: shufflevector <4 x float>
|
; NOSTORE: shufflevector <4 x float>
|
||||||
|
|
||||||
define i32 @add_red(float* %A, i32 %n) {
|
define i32 @add_red(float* %A, i32 %n) {
|
||||||
entry:
|
entry:
|
||||||
@ -369,6 +369,8 @@ for.end:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=STORE
|
||||||
|
|
||||||
; void foo(double * restrict A, double * restrict B, double * restrict C,
|
; void foo(double * restrict A, double * restrict B, double * restrict C,
|
||||||
; int n) {
|
; int n) {
|
||||||
; for (intptr_t i=0; i < n; ++i) {
|
; for (intptr_t i=0; i < n; ++i) {
|
||||||
@ -376,10 +378,10 @@ for.end:
|
|||||||
; }
|
; }
|
||||||
; }
|
; }
|
||||||
|
|
||||||
; CHECK-LABEL: store_red_double
|
; STORE-LABEL: store_red_double
|
||||||
; CHECK: fmul <2 x double>
|
; STORE: fmul <2 x double>
|
||||||
; CHECK: extractelement <2 x double>
|
; STORE: extractelement <2 x double>
|
||||||
; CHECK: extractelement <2 x double>
|
; STORE: extractelement <2 x double>
|
||||||
|
|
||||||
define void @store_red_double(double* noalias %A, double* noalias %B, double* noalias %C, i32 %n) {
|
define void @store_red_double(double* noalias %A, double* noalias %B, double* noalias %C, i32 %n) {
|
||||||
entry:
|
entry:
|
||||||
|
Loading…
Reference in New Issue
Block a user