Reapply "[Reassociate] Add initial support for vector instructions."

This reapplies the patch previously committed at revision 232190.  This was
reverted at revision 232196 as it caused test failures in tests that did not
expect operands to be commuted.  I have made the tests more resilient to
reassociation in revision 232206.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232209 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Robert Lougher
2015-03-13 20:53:01 +00:00
parent 8b5546f6d5
commit fe795a5d20
2 changed files with 200 additions and 58 deletions

View File

@@ -321,10 +321,8 @@ unsigned Reassociate::getRank(Value *V) {
// If this is a not or neg instruction, do not count it for rank. This // If this is a not or neg instruction, do not count it for rank. This
// assures us that X and ~X will have the same rank. // assures us that X and ~X will have the same rank.
Type *Ty = V->getType(); if (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
if ((!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) || !BinaryOperator::isFNeg(I))
(!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I) &&
!BinaryOperator::isFNeg(I)))
++Rank; ++Rank;
DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n"); DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = " << Rank << "\n");
@@ -351,7 +349,7 @@ void Reassociate::canonicalizeOperands(Instruction *I) {
static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name, static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) { Instruction *InsertBefore, Value *FlagsOp) {
if (S1->getType()->isIntegerTy()) if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore); return BinaryOperator::CreateAdd(S1, S2, Name, InsertBefore);
else { else {
BinaryOperator *Res = BinaryOperator *Res =
@@ -363,7 +361,7 @@ static BinaryOperator *CreateAdd(Value *S1, Value *S2, const Twine &Name,
static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name, static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) { Instruction *InsertBefore, Value *FlagsOp) {
if (S1->getType()->isIntegerTy()) if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore); return BinaryOperator::CreateMul(S1, S2, Name, InsertBefore);
else { else {
BinaryOperator *Res = BinaryOperator *Res =
@@ -375,7 +373,7 @@ static BinaryOperator *CreateMul(Value *S1, Value *S2, const Twine &Name,
static BinaryOperator *CreateNeg(Value *S1, const Twine &Name, static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
Instruction *InsertBefore, Value *FlagsOp) { Instruction *InsertBefore, Value *FlagsOp) {
if (S1->getType()->isIntegerTy()) if (S1->getType()->isIntOrIntVectorTy())
return BinaryOperator::CreateNeg(S1, Name, InsertBefore); return BinaryOperator::CreateNeg(S1, Name, InsertBefore);
else { else {
BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore); BinaryOperator *Res = BinaryOperator::CreateFNeg(S1, Name, InsertBefore);
@@ -388,8 +386,8 @@ static BinaryOperator *CreateNeg(Value *S1, const Twine &Name,
/// ///
static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) { static BinaryOperator *LowerNegateToMultiply(Instruction *Neg) {
Type *Ty = Neg->getType(); Type *Ty = Neg->getType();
Constant *NegOne = Ty->isIntegerTy() ? ConstantInt::getAllOnesValue(Ty) Constant *NegOne = Ty->isIntOrIntVectorTy() ?
: ConstantFP::get(Ty, -1.0); ConstantInt::getAllOnesValue(Ty) : ConstantFP::get(Ty, -1.0);
BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg); BinaryOperator *Res = CreateMul(Neg->getOperand(1), NegOne, "", Neg, Neg);
Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op. Neg->setOperand(1, Constant::getNullValue(Ty)); // Drop use of op.
@@ -872,7 +870,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
Constant *Undef = UndefValue::get(I->getType()); Constant *Undef = UndefValue::get(I->getType());
NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode), NewOp = BinaryOperator::Create(Instruction::BinaryOps(Opcode),
Undef, Undef, "", I); Undef, Undef, "", I);
if (NewOp->getType()->isFloatingPointTy()) if (NewOp->getType()->isFPOrFPVectorTy())
NewOp->setFastMathFlags(I->getFastMathFlags()); NewOp->setFastMathFlags(I->getFastMathFlags());
} else { } else {
NewOp = NodesToRewrite.pop_back_val(); NewOp = NodesToRewrite.pop_back_val();
@@ -1520,8 +1518,8 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// Insert a new multiply. // Insert a new multiply.
Type *Ty = TheOp->getType(); Type *Ty = TheOp->getType();
Constant *C = Ty->isIntegerTy() ? ConstantInt::get(Ty, NumFound) Constant *C = Ty->isIntOrIntVectorTy() ?
: ConstantFP::get(Ty, NumFound); ConstantInt::get(Ty, NumFound) : ConstantFP::get(Ty, NumFound);
Instruction *Mul = CreateMul(TheOp, C, "factor", I, I); Instruction *Mul = CreateMul(TheOp, C, "factor", I, I);
// Now that we have inserted a multiply, optimize it. This allows us to // Now that we have inserted a multiply, optimize it. This allows us to
@@ -1661,7 +1659,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// from an expression will drop a use of maxocc, and this can cause // from an expression will drop a use of maxocc, and this can cause
// RemoveFactorFromExpression on successive values to behave differently. // RemoveFactorFromExpression on successive values to behave differently.
Instruction *DummyInst = Instruction *DummyInst =
I->getType()->isIntegerTy() I->getType()->isIntOrIntVectorTy()
? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal) ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)
: BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal); : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal);
@@ -1792,7 +1790,7 @@ static Value *buildMultiplyTree(IRBuilder<> &Builder,
Value *LHS = Ops.pop_back_val(); Value *LHS = Ops.pop_back_val();
do { do {
if (LHS->getType()->isIntegerTy()) if (LHS->getType()->isIntOrIntVectorTy())
LHS = Builder.CreateMul(LHS, Ops.pop_back_val()); LHS = Builder.CreateMul(LHS, Ops.pop_back_val());
else else
LHS = Builder.CreateFMul(LHS, Ops.pop_back_val()); LHS = Builder.CreateFMul(LHS, Ops.pop_back_val());
@@ -2090,8 +2088,9 @@ void Reassociate::OptimizeInst(Instruction *I) {
if (I->isCommutative()) if (I->isCommutative())
canonicalizeOperands(I); canonicalizeOperands(I);
// Don't optimize vector instructions. // TODO: We should optimize vector Xor instructions, but they are
if (I->getType()->isVectorTy()) // currently unsupported.
if (I->getType()->isVectorTy() && I->getOpcode() == Instruction::Xor)
return; return;
// Don't optimize floating point instructions that don't have unsafe algebra. // Don't optimize floating point instructions that don't have unsafe algebra.
@@ -2170,9 +2169,6 @@ void Reassociate::OptimizeInst(Instruction *I) {
} }
void Reassociate::ReassociateExpression(BinaryOperator *I) { void Reassociate::ReassociateExpression(BinaryOperator *I) {
assert(!I->getType()->isVectorTy() &&
"Reassociation of vector instructions is not supported.");
// First, walk the expression tree, linearizing the tree, collecting the // First, walk the expression tree, linearizing the tree, collecting the
// operand information. // operand information.
SmallVector<RepeatedValue, 8> Tree; SmallVector<RepeatedValue, 8> Tree;

View File

@@ -1,46 +1,192 @@
; RUN: opt < %s -reassociate -S | FileCheck %s ; RUN: opt < %s -reassociate -S | FileCheck %s
; Canonicalize operands, but don't optimize floating point vector operations. ; Check that a*c+b*c is turned into (a+b)*c
define <4 x float> @test1() { define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: test1 ; CHECK-LABEL: @test1
; CHECK-NEXT: %tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer ; CHECK-NEXT: %tmp = fadd fast <4 x float> %b, %a
; CHECK-NEXT: %tmp2 = fmul fast <4 x float> %tmp1, zeroinitializer ; CHECK-NEXT: %tmp1 = fmul fast <4 x float> %tmp, %c
; CHECK-NEXT: ret <4 x float> %tmp1
%tmp1 = fsub fast <4 x float> zeroinitializer, zeroinitializer %mul = fmul fast <4 x float> %a, %c
%tmp2 = fmul fast <4 x float> zeroinitializer, %tmp1 %mul1 = fmul fast <4 x float> %b, %c
ret <4 x float> %tmp2 %add = fadd fast <4 x float> %mul, %mul1
ret <4 x float> %add
} }
; Commute integer vector operations. ; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
define <2 x i32> @test2(<2 x i32> %x, <2 x i32> %y) { define <2 x float> @test2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
; CHECK-LABEL: test2 ; CHECK-LABEL: @test2
; CHECK-NEXT: %tmp1 = add <2 x i32> %x, %y ; CHECK-NEXT: fadd fast <2 x float> %c, %b
; CHECK-NEXT: %tmp2 = add <2 x i32> %x, %y ; CHECK-NEXT: fmul fast <2 x float> %a, %tmp2
; CHECK-NEXT: %tmp3 = add <2 x i32> %tmp1, %tmp2 ; CHECK-NEXT: fmul fast <2 x float> %tmp3, %a
; CHECK-NEXT: ret <2 x float>
%tmp1 = add <2 x i32> %x, %y %t0 = fmul fast <2 x float> %a, %b
%tmp2 = add <2 x i32> %y, %x %t1 = fmul fast <2 x float> %a, %t0
%tmp3 = add <2 x i32> %tmp1, %tmp2 %t2 = fmul fast <2 x float> %a, %c
ret <2 x i32> %tmp3 %t3 = fmul fast <2 x float> %a, %t2
%t4 = fadd fast <2 x float> %t1, %t3
ret <2 x float> %t4
} }
define <2 x i32> @test3(<2 x i32> %x, <2 x i32> %y) { ; Check that a*b+a*c+d is turned into a*(b+c)+d.
; CHECK-LABEL: test3 define <2 x double> @test3(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %d) {
; CHECK-NEXT: %tmp1 = mul <2 x i32> %x, %y ; CHECK-LABEL: @test3
; CHECK-NEXT: %tmp2 = mul <2 x i32> %x, %y ; CHECK-NEXT: fadd fast <2 x double> %c, %b
; CHECK-NEXT: %tmp3 = mul <2 x i32> %tmp1, %tmp2 ; CHECK-NEXT: fmul fast <2 x double> %tmp, %a
; CHECK-NEXT: fadd fast <2 x double> %tmp1, %d
; CHECK-NEXT: ret <2 x double>
%tmp1 = mul <2 x i32> %x, %y %t0 = fmul fast <2 x double> %a, %b
%tmp2 = mul <2 x i32> %y, %x %t1 = fmul fast <2 x double> %a, %c
%tmp3 = mul <2 x i32> %tmp1, %tmp2 %t2 = fadd fast <2 x double> %t1, %d
ret <2 x i32> %tmp3 %t3 = fadd fast <2 x double> %t0, %t2
ret <2 x double> %t3
} }
define <2 x i32> @test4(<2 x i32> %x, <2 x i32> %y) { ; No fast-math.
; CHECK-LABEL: test4 define <2 x float> @test4(<2 x float> %A) {
; CHECK-NEXT: %tmp1 = and <2 x i32> %x, %y ; CHECK-LABEL: @test4
; CHECK-NEXT: %tmp2 = and <2 x i32> %x, %y ; CHECK-NEXT: %X = fadd <2 x float> %A, <float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: %tmp3 = and <2 x i32> %tmp1, %tmp2 ; CHECK-NEXT: %Y = fadd <2 x float> %A, <float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: %R = fsub <2 x float> %X, %Y
; CHECK-NEXT: ret <2 x float> %R
%X = fadd <2 x float> %A, < float 1.000000e+00, float 1.000000e+00 >
%Y = fadd <2 x float> %A, < float 1.000000e+00, float 1.000000e+00 >
%R = fsub <2 x float> %X, %Y
ret <2 x float> %R
}
; Check 47*X + 47*X -> 94*X.
define <2 x float> @test5(<2 x float> %X) {
; CHECK-LABEL: @test5
; CHECK-NEXT: fmul fast <2 x float> %X, <float 9.400000e+01, float 9.400000e+01>
; CHECK-NEXT: ret <2 x float>
%Y = fmul fast <2 x float> %X, <float 4.700000e+01, float 4.700000e+01>
%Z = fadd fast <2 x float> %Y, %Y
ret <2 x float> %Z
}
; Check X+X+X -> 3*X.
define <2 x float> @test6(<2 x float> %X) {
; CHECK-LABEL: @test6
; CHECK-NEXT: fmul fast <2 x float> %X, <float 3.000000e+00, float 3.000000e+00>
; CHECK-NEXT: ret <2 x float>
%Y = fadd fast <2 x float> %X ,%X
%Z = fadd fast <2 x float> %Y, %X
ret <2 x float> %Z
}
; Check 127*W+50*W -> 177*W.
define <2 x double> @test7(<2 x double> %W) {
; CHECK-LABEL: @test7
; CHECK-NEXT: fmul fast <2 x double> %W, <double 1.770000e+02, double 1.770000e+02>
; CHECK-NEXT: ret <2 x double>
%X = fmul fast <2 x double> %W, <double 127.0, double 127.0>
%Y = fmul fast <2 x double> %W, <double 50.0, double 50.0>
%Z = fadd fast <2 x double> %Y, %X
ret <2 x double> %Z
}
; Check X*12*12 -> X*144.
define <2 x float> @test8(<2 x float> %arg) {
; CHECK-LABEL: @test8
; CHECK: fmul fast <2 x float> %arg, <float 1.440000e+02, float 1.440000e+02>
; CHECK-NEXT: ret <2 x float> %tmp2
%tmp1 = fmul fast <2 x float> <float 1.200000e+01, float 1.200000e+01>, %arg
%tmp2 = fmul fast <2 x float> %tmp1, <float 1.200000e+01, float 1.200000e+01>
ret <2 x float> %tmp2
}
; Check (b+(a+1234))+-a -> b+1234.
define <2 x double> @test9(<2 x double> %b, <2 x double> %a) {
; CHECK-LABEL: @test9
; CHECK: fadd fast <2 x double> %b, <double 1.234000e+03, double 1.234000e+03>
; CHECK-NEXT: ret <2 x double>
%1 = fadd fast <2 x double> %a, <double 1.234000e+03, double 1.234000e+03>
%2 = fadd fast <2 x double> %b, %1
%3 = fsub fast <2 x double> <double 0.000000e+00, double 0.000000e+00>, %a
%4 = fadd fast <2 x double> %2, %3
ret <2 x double> %4
}
; Check -(-(z*40)*a) -> a*40*z.
define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
; CHECK-LABEL: @test10
; CHECK: fmul fast <2 x float> %a, <float 4.000000e+01, float 4.000000e+01>
; CHECK-NEXT: fmul fast <2 x float> %e, %z
; CHECK-NEXT: ret <2 x float>
%d = fmul fast <2 x float> %z, <float 4.000000e+01, float 4.000000e+01>
%c = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %d
%e = fmul fast <2 x float> %a, %c
%f = fsub fast <2 x float> <float 0.000000e+00, float 0.000000e+00>, %e
ret <2 x float> %f
}
; Check x*y+y*x -> x*y*2.
define <2 x double> @test11(<2 x double> %x, <2 x double> %y) {
; CHECK-LABEL: @test11
; CHECK-NEXT: %factor = fmul fast <2 x double> %y, <double 2.000000e+00, double 2.000000e+00>
; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %x
; CHECK-NEXT: ret <2 x double> %tmp1
%1 = fmul fast <2 x double> %x, %y
%2 = fmul fast <2 x double> %y, %x
%3 = fadd fast <2 x double> %1, %2
ret <2 x double> %3
}
; FIXME: shifts should be converted to mul to assist further reassociation.
define <2 x i64> @test12(<2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: @test12
; CHECK-NEXT: %mul = mul <2 x i64> %c, %b
; CHECK-NEXT: %shl = shl <2 x i64> %mul, <i64 5, i64 5>
; CHECK-NEXT: ret <2 x i64> %shl
%mul = mul <2 x i64> %c, %b
%shl = shl <2 x i64> %mul, <i64 5, i64 5>
ret <2 x i64> %shl
}
; FIXME: expressions with a negative const should be canonicalized to assist
; further reassociation.
; We would expect (-5*b)+a -> a-(5*b) but only the constant operand is commuted.
define <4 x float> @test13(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: @test13
; CHECK-NEXT: %mul = fmul fast <4 x float> %b, <float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00>
; CHECK-NEXT: %add = fadd fast <4 x float> %mul, %a
; CHECK-NEXT: ret <4 x float> %add
%mul = fmul fast <4 x float> <float -5.000000e+00, float -5.000000e+00, float -5.000000e+00, float -5.000000e+00>, %b
%add = fadd fast <4 x float> %mul, %a
ret <4 x float> %add
}
; Break up subtract to assist further reassociation.
; Check a+b-c -> a+b+-c.
define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
; CHECK-LABEL: @test14
; CHECK-NEXT: %add = add <2 x i64> %b, %a
; CHECK-NEXT: %c.neg = sub <2 x i64> zeroinitializer, %c
; CHECK-NEXT: %sub = add <2 x i64> %add, %c.neg
; CHECK-NEXT: ret <2 x i64> %sub
%add = add <2 x i64> %b, %a
%sub = sub <2 x i64> %add, %c
ret <2 x i64> %sub
}
define <2 x i32> @test15(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: test15
; CHECK-NEXT: %tmp3 = and <2 x i32> %y, %x
; CHECK-NEXT: ret <2 x i32> %tmp3
%tmp1 = and <2 x i32> %x, %y %tmp1 = and <2 x i32> %x, %y
%tmp2 = and <2 x i32> %y, %x %tmp2 = and <2 x i32> %y, %x
@@ -48,11 +194,10 @@ define <2 x i32> @test4(<2 x i32> %x, <2 x i32> %y) {
ret <2 x i32> %tmp3 ret <2 x i32> %tmp3
} }
define <2 x i32> @test5(<2 x i32> %x, <2 x i32> %y) { define <2 x i32> @test16(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: test5 ; CHECK-LABEL: test16
; CHECK-NEXT: %tmp1 = or <2 x i32> %x, %y ; CHECK-NEXT: %tmp3 = or <2 x i32> %y, %x
; CHECK-NEXT: %tmp2 = or <2 x i32> %x, %y ; CHECK-NEXT: ret <2 x i32> %tmp3
; CHECK-NEXT: %tmp3 = or <2 x i32> %tmp1, %tmp2
%tmp1 = or <2 x i32> %x, %y %tmp1 = or <2 x i32> %x, %y
%tmp2 = or <2 x i32> %y, %x %tmp2 = or <2 x i32> %y, %x
@@ -60,8 +205,9 @@ define <2 x i32> @test5(<2 x i32> %x, <2 x i32> %y) {
ret <2 x i32> %tmp3 ret <2 x i32> %tmp3
} }
define <2 x i32> @test6(<2 x i32> %x, <2 x i32> %y) { ; FIXME: Optimize vector xor. Currently only commute operands.
; CHECK-LABEL: test6 define <2 x i32> @test17(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: test17
; CHECK-NEXT: %tmp1 = xor <2 x i32> %x, %y ; CHECK-NEXT: %tmp1 = xor <2 x i32> %x, %y
; CHECK-NEXT: %tmp2 = xor <2 x i32> %x, %y ; CHECK-NEXT: %tmp2 = xor <2 x i32> %x, %y
; CHECK-NEXT: %tmp3 = xor <2 x i32> %tmp1, %tmp2 ; CHECK-NEXT: %tmp3 = xor <2 x i32> %tmp1, %tmp2