mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-17 20:23:59 +00:00
SLP Vectorizer: Canonicalize tree operands of commutitive binary operands.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214338 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -427,6 +427,12 @@ public:
|
|||||||
/// \brief Perform LICM and CSE on the newly generated gather sequences.
|
/// \brief Perform LICM and CSE on the newly generated gather sequences.
|
||||||
void optimizeGatherSequence();
|
void optimizeGatherSequence();
|
||||||
|
|
||||||
|
/// \brief Get the instruction numbering for a given Instruction.
|
||||||
|
int getIndex(Instruction *I) {
|
||||||
|
BlockNumbering &BN = getBlockNumbering(I->getParent());
|
||||||
|
return BN.getIndex(I);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct TreeEntry;
|
struct TreeEntry;
|
||||||
|
|
||||||
@ -2231,7 +2237,8 @@ private:
|
|||||||
unsigned collectStores(BasicBlock *BB, BoUpSLP &R);
|
unsigned collectStores(BasicBlock *BB, BoUpSLP &R);
|
||||||
|
|
||||||
/// \brief Try to vectorize a chain that starts at two arithmetic instrs.
|
/// \brief Try to vectorize a chain that starts at two arithmetic instrs.
|
||||||
bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
|
bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R,
|
||||||
|
BinaryOperator *V = nullptr);
|
||||||
|
|
||||||
/// \brief Try to vectorize a list of operands.
|
/// \brief Try to vectorize a list of operands.
|
||||||
/// \@param BuildVector A list of users to ignore for the purpose of
|
/// \@param BuildVector A list of users to ignore for the purpose of
|
||||||
@ -2404,10 +2411,23 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
|
|||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
|
bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R,
|
||||||
|
BinaryOperator *V) {
|
||||||
if (!A || !B)
|
if (!A || !B)
|
||||||
return false;
|
return false;
|
||||||
Value *VL[] = { A, B };
|
Value *VL[] = { A, B };
|
||||||
|
|
||||||
|
// Canonicalize operands based on source order, so that the ordering in the
|
||||||
|
// expression tree more closely matches the ordering of the source.
|
||||||
|
if (V && V->isCommutative() && isa<Instruction>(A) && isa<Instruction>(B) &&
|
||||||
|
cast<Instruction>(A)->getParent() == cast<Instruction>(B)->getParent()) {
|
||||||
|
assert(V->getOperand(0) == A && V->getOperand(1) == B &&
|
||||||
|
"Expected operands in order.");
|
||||||
|
int IndexA = R.getIndex(cast<Instruction>(A));
|
||||||
|
int IndexB = R.getIndex(cast<Instruction>(B));
|
||||||
|
if (IndexA > IndexB)
|
||||||
|
std::swap(VL[0], VL[1]);
|
||||||
|
}
|
||||||
return tryToVectorizeList(VL, R);
|
return tryToVectorizeList(VL, R);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2508,7 +2528,7 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Try to vectorize V.
|
// Try to vectorize V.
|
||||||
if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
|
if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R, V))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
|
BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
|
||||||
@ -3018,15 +3038,15 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < 2; ++i) {
|
for (int i = 0; i < 2; ++i) {
|
||||||
if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) {
|
if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) {
|
||||||
if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) {
|
if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R, BI)) {
|
||||||
Changed = true;
|
Changed = true;
|
||||||
// We would like to start over since some instructions are deleted
|
// We would like to start over since some instructions are deleted
|
||||||
// and the iterator may become invalid value.
|
// and the iterator may become invalid value.
|
||||||
it = BB->begin();
|
it = BB->begin();
|
||||||
e = BB->end();
|
e = BB->end();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
75
test/Transforms/SLPVectorizer/AArch64/commute.ll
Normal file
75
test/Transforms/SLPVectorizer/AArch64/commute.ll
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
; RUN: opt -S -slp-vectorizer %s | FileCheck %s
|
||||||
|
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||||
|
target triple = "aarch64--linux-gnu"
|
||||||
|
|
||||||
|
%structA = type { [2 x float] }
|
||||||
|
|
||||||
|
define void @test1(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
|
||||||
|
; CHECK-LABEL: test1
|
||||||
|
; CHECK: %arrayidx4 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 0
|
||||||
|
; CHECK: %arrayidx9 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 1
|
||||||
|
; CHECK: %3 = bitcast float* %arrayidx4 to <2 x float>*
|
||||||
|
; CHECK: %4 = load <2 x float>* %3, align 4
|
||||||
|
; CHECK: %5 = fsub <2 x float> %2, %4
|
||||||
|
; CHECK: %6 = fmul <2 x float> %5, %5
|
||||||
|
; CHECK: %7 = extractelement <2 x float> %6, i32 0
|
||||||
|
; CHECK: %8 = extractelement <2 x float> %6, i32 1
|
||||||
|
; CHECK: %add = fadd fast float %7, %8
|
||||||
|
; CHECK: %cmp = fcmp oeq float %add, 0.000000e+00
|
||||||
|
|
||||||
|
entry:
|
||||||
|
br label %for.body3.lr.ph
|
||||||
|
|
||||||
|
for.body3.lr.ph:
|
||||||
|
%conv5 = sitofp i32 %ymin to float
|
||||||
|
%conv = sitofp i32 %xmin to float
|
||||||
|
%arrayidx4 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 0
|
||||||
|
%0 = load float* %arrayidx4, align 4
|
||||||
|
%sub = fsub fast float %conv, %0
|
||||||
|
%arrayidx9 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 1
|
||||||
|
%1 = load float* %arrayidx9, align 4
|
||||||
|
%sub10 = fsub fast float %conv5, %1
|
||||||
|
%mul11 = fmul fast float %sub, %sub
|
||||||
|
%mul12 = fmul fast float %sub10, %sub10
|
||||||
|
%add = fadd fast float %mul11, %mul12
|
||||||
|
%cmp = fcmp oeq float %add, 0.000000e+00
|
||||||
|
br i1 %cmp, label %for.body3.lr.ph, label %for.end27
|
||||||
|
|
||||||
|
for.end27:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @test2(%structA* nocapture readonly %J, i32 %xmin, i32 %ymin) {
|
||||||
|
; CHECK-LABEL: test2
|
||||||
|
; CHECK: %arrayidx4 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 0
|
||||||
|
; CHECK: %arrayidx9 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 1
|
||||||
|
; CHECK: %3 = bitcast float* %arrayidx4 to <2 x float>*
|
||||||
|
; CHECK: %4 = load <2 x float>* %3, align 4
|
||||||
|
; CHECK: %5 = fsub <2 x float> %2, %4
|
||||||
|
; CHECK: %6 = fmul <2 x float> %5, %5
|
||||||
|
; CHECK: %7 = extractelement <2 x float> %6, i32 0
|
||||||
|
; CHECK: %8 = extractelement <2 x float> %6, i32 1
|
||||||
|
; CHECK: %add = fadd fast float %8, %7
|
||||||
|
; CHECK: %cmp = fcmp oeq float %add, 0.000000e+00
|
||||||
|
|
||||||
|
entry:
|
||||||
|
br label %for.body3.lr.ph
|
||||||
|
|
||||||
|
for.body3.lr.ph:
|
||||||
|
%conv5 = sitofp i32 %ymin to float
|
||||||
|
%conv = sitofp i32 %xmin to float
|
||||||
|
%arrayidx4 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 0
|
||||||
|
%0 = load float* %arrayidx4, align 4
|
||||||
|
%sub = fsub fast float %conv, %0
|
||||||
|
%arrayidx9 = getelementptr inbounds %structA* %J, i64 0, i32 0, i64 1
|
||||||
|
%1 = load float* %arrayidx9, align 4
|
||||||
|
%sub10 = fsub fast float %conv5, %1
|
||||||
|
%mul11 = fmul fast float %sub, %sub
|
||||||
|
%mul12 = fmul fast float %sub10, %sub10
|
||||||
|
%add = fadd fast float %mul12, %mul11 ;;;<---- Operands commuted!!
|
||||||
|
%cmp = fcmp oeq float %add, 0.000000e+00
|
||||||
|
br i1 %cmp, label %for.body3.lr.ph, label %for.end27
|
||||||
|
|
||||||
|
for.end27:
|
||||||
|
ret void
|
||||||
|
}
|
Reference in New Issue
Block a user