Enhance bool simplifcation in X86 to handle more cases

This patch is revised based on patch from Victor Umansky
<victor.umansky@intel.com>. More cases are handled in X86's bool
simplification, i.e.
- SETCC_CARRY
- value is truncated to i1 with AND

As a by-product, PR5443 is also fixed.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179265 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael Liao 2013-04-11 04:43:09 +00:00
parent 4eb5f18956
commit 959ddbb5e0
4 changed files with 334 additions and 8 deletions

View File

@ -15847,6 +15847,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
SDValue SetCC;
const ConstantSDNode* C = 0;
bool needOppositeCond = (CC == X86::COND_E);
bool checkAgainstTrue = false; // Is it a comparison against 1?
if ((C = dyn_cast<ConstantSDNode>(Op1)))
SetCC = Op2;
@ -15855,18 +15856,46 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
else // Quit if all operands are not constants.
return SDValue();
if (C->getZExtValue() == 1)
if (C->getZExtValue() == 1) {
needOppositeCond = !needOppositeCond;
else if (C->getZExtValue() != 0)
checkAgainstTrue = true;
} else if (C->getZExtValue() != 0)
// Quit if the constant is neither 0 or 1.
return SDValue();
// Skip 'zext' or 'trunc' node.
if (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
SetCC.getOpcode() == ISD::TRUNCATE)
SetCC = SetCC.getOperand(0);
bool truncatedToBoolWithAnd = false;
// Skip (zext $x), (trunc $x), or (and $x, 1) node.
while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
SetCC.getOpcode() == ISD::TRUNCATE ||
SetCC.getOpcode() == ISD::AND) {
if (SetCC.getOpcode() == ISD::AND) {
int OpIdx = -1;
ConstantSDNode *CS;
if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) &&
CS->getZExtValue() == 1)
OpIdx = 1;
if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) &&
CS->getZExtValue() == 1)
OpIdx = 0;
if (OpIdx == -1)
break;
SetCC = SetCC.getOperand(OpIdx);
truncatedToBoolWithAnd = true;
} else
SetCC = SetCC.getOperand(0);
}
switch (SetCC.getOpcode()) {
case X86ISD::SETCC_CARRY:
// Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to
// simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1,
// i.e. it's a comparison against true but the result of SETCC_CARRY is not
// truncated to i1 using 'and'.
if (checkAgainstTrue && !truncatedToBoolWithAnd)
break;
assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B &&
"Invalid use of SETCC_CARRY!");
// FALL THROUGH
case X86ISD::SETCC:
// Set the condition code or opposite one if necessary.
CC = X86::CondCode(SetCC.getConstantOperandVal(0));

View File

@ -119,8 +119,8 @@ entry:
; X64: test8:
; X64: addq
; X64-NEXT: sbbq
; X64-NEXT: testb
; X64-NEXT: setb
; X64: ret
define i32 @test9(i32 %x, i32 %y) nounwind readnone {
%cmp = icmp eq i32 %x, 10

View File

@ -0,0 +1,150 @@
; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
declare i32 @llvm.x86.avx.ptestz.256(<4 x i64> %p1, <4 x i64> %p2) nounwind
declare i32 @llvm.x86.avx.ptestc.256(<4 x i64> %p1, <4 x i64> %p2) nounwind
define <4 x float> @test1(<4 x i64> %a, <4 x float> %b) nounwind {
entry:
; CHECK: test1:
; CHECK: vptest
; CHECK-NEXT: jne
; CHECK: ret
%res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind
%one = icmp ne i32 %res, 0
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test3(<4 x i64> %a, <4 x float> %b) nounwind {
entry:
; CHECK: test3:
; CHECK: vptest
; CHECK-NEXT: jne
; CHECK: ret
%res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind
%one = trunc i32 %res to i1
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test4(<4 x i64> %a, <4 x float> %b) nounwind {
entry:
; CHECK: test4:
; CHECK: vptest
; CHECK-NEXT: jae
; CHECK: ret
%res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind
%one = icmp ne i32 %res, 0
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test6(<4 x i64> %a, <4 x float> %b) nounwind {
entry:
; CHECK: test6:
; CHECK: vptest
; CHECK-NEXT: jae
; CHECK: ret
%res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind
%one = trunc i32 %res to i1
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test7(<4 x i64> %a, <4 x float> %b) nounwind {
entry:
; CHECK: test7:
; CHECK: vptest
; CHECK-NEXT: jne
; CHECK: ret
%res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind
%one = icmp eq i32 %res, 1
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test8(<4 x i64> %a, <4 x float> %b) nounwind {
entry:
; CHECK: test8:
; CHECK: vptest
; CHECK-NEXT: je
; CHECK: ret
%res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind
%one = icmp ne i32 %res, 1
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}

View File

@ -108,3 +108,150 @@ bb2: ; preds = %entry, %bb1
ret float %.0
}
declare i32 @llvm.x86.sse41.ptestz(<4 x float> %p1, <4 x float> %p2) nounwind
declare i32 @llvm.x86.sse41.ptestc(<4 x float> %p1, <4 x float> %p2) nounwind
define <4 x float> @test5(<4 x float> %a, <4 x float> %b) nounwind {
entry:
; CHECK: test5:
; CHECK: ptest
; CHECK-NEXT: jne
; CHECK: ret
%res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind
%one = icmp ne i32 %res, 0
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test7(<4 x float> %a, <4 x float> %b) nounwind {
entry:
; CHECK: test7:
; CHECK: ptest
; CHECK-NEXT: jne
; CHECK: ret
%res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind
%one = trunc i32 %res to i1
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test8(<4 x float> %a, <4 x float> %b) nounwind {
entry:
; CHECK: test8:
; CHECK: ptest
; CHECK-NEXT: jae
; CHECK: ret
%res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind
%one = icmp ne i32 %res, 0
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test10(<4 x float> %a, <4 x float> %b) nounwind {
entry:
; CHECK: test10:
; CHECK: ptest
; CHECK-NEXT: jae
; CHECK: ret
%res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind
%one = trunc i32 %res to i1
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test11(<4 x float> %a, <4 x float> %b) nounwind {
entry:
; CHECK: test11:
; CHECK: ptest
; CHECK-NEXT: jne
; CHECK: ret
%res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind
%one = icmp eq i32 %res, 1
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}
define <4 x float> @test12(<4 x float> %a, <4 x float> %b) nounwind {
entry:
; CHECK: test12:
; CHECK: ptest
; CHECK-NEXT: je
; CHECK: ret
%res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind
%one = icmp ne i32 %res, 1
br i1 %one, label %bb1, label %bb2
bb1:
%c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
bb2:
%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
br label %return
return:
%e = phi <4 x float> [%c, %bb1], [%d, %bb2]
ret <4 x float> %e
}