Enhance bool simplifcation in X86 to handle more cases

This patch is revised based on patch from Victor Umansky <victor.umansky@intel.com>. More cases are handled in X86's bool simplification, i.e. - SETCC_CARRY - value is truncated to i1 with AND As a by-product, PR5443 is also fixed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179265 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-06 09:44:39 +00:00 · 2013-04-11 04:43:09 +00:00 · 2013-04-11 04:43:09 +00:00 · 959ddbb5e0
commit 959ddbb5e0
parent 4eb5f18956
4 changed files with 334 additions and 8 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -15847,6 +15847,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
  SDValue SetCC;
  const ConstantSDNode* C = 0;
  bool needOppositeCond = (CC == X86::COND_E);
+  bool checkAgainstTrue = false; // Is it a comparison against 1?

  if ((C = dyn_cast<ConstantSDNode>(Op1)))
    SetCC = Op2;
@ -15855,18 +15856,46 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
  else // Quit if all operands are not constants.
    return SDValue();

-  if (C->getZExtValue() == 1)
+  if (C->getZExtValue() == 1) {
    needOppositeCond = !needOppositeCond;
-  else if (C->getZExtValue() != 0)
+    checkAgainstTrue = true;
+  } else if (C->getZExtValue() != 0)
    // Quit if the constant is neither 0 or 1.
    return SDValue();

-  // Skip 'zext' or 'trunc' node.
-  if (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
-      SetCC.getOpcode() == ISD::TRUNCATE)
-    SetCC = SetCC.getOperand(0);
+  bool truncatedToBoolWithAnd = false;
+  // Skip (zext $x), (trunc $x), or (and $x, 1) node.
+  while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
+         SetCC.getOpcode() == ISD::TRUNCATE ||
+         SetCC.getOpcode() == ISD::AND) {
+    if (SetCC.getOpcode() == ISD::AND) {
+      int OpIdx = -1;
+      ConstantSDNode *CS;
+      if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) &&
+          CS->getZExtValue() == 1)
+        OpIdx = 1;
+      if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) &&
+          CS->getZExtValue() == 1)
+        OpIdx = 0;
+      if (OpIdx == -1)
+        break;
+      SetCC = SetCC.getOperand(OpIdx);
+      truncatedToBoolWithAnd = true;
+    } else
+      SetCC = SetCC.getOperand(0);
+  }

  switch (SetCC.getOpcode()) {
+  case X86ISD::SETCC_CARRY:
+    // Since SETCC_CARRY gives output based on R = CF ? ~0 : 0, it's unsafe to
+    // simplify it if the result of SETCC_CARRY is not canonicalized to 0 or 1,
+    // i.e. it's a comparison against true but the result of SETCC_CARRY is not
+    // truncated to i1 using 'and'.
+    if (checkAgainstTrue && !truncatedToBoolWithAnd)
+      break;
+    assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B &&
+           "Invalid use of SETCC_CARRY!");
+    // FALL THROUGH
  case X86ISD::SETCC:
    // Set the condition code or opposite one if necessary.
    CC = X86::CondCode(SetCC.getConstantOperandVal(0));
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@ -119,8 +119,8 @@ entry:

 ; X64: test8:
 ; X64: addq
-; X64-NEXT: sbbq
-; X64-NEXT: testb
+; X64-NEXT: setb
+; X64: ret

 define i32 @test9(i32 %x, i32 %y) nounwind readnone {
  %cmp = icmp eq i32 %x, 10
--- a/test/CodeGen/X86/avx-brcond.ll
+++ b/test/CodeGen/X86/avx-brcond.ll
@ -0,0 +1,150 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+declare i32 @llvm.x86.avx.ptestz.256(<4 x i64> %p1, <4 x i64> %p2) nounwind
+declare i32 @llvm.x86.avx.ptestc.256(<4 x i64> %p1, <4 x i64> %p2) nounwind
+
+define <4 x float> @test1(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test1:
+; CHECK: vptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test3(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test3:
+; CHECK: vptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test4(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test4:
+; CHECK: vptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test6(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test6:
+; CHECK: vptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test7(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test7:
+; CHECK: vptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp eq i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test8(<4 x i64> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test8:
+; CHECK: vptest
+; CHECK-NEXT:	je
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a, <4 x i64> %a) nounwind 
+  %one = icmp ne i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+
--- a/test/CodeGen/X86/brcond.ll
+++ b/test/CodeGen/X86/brcond.ll
@ -108,3 +108,150 @@ bb2:                                              ; preds = %entry, %bb1
  ret float %.0
 }

+declare i32 @llvm.x86.sse41.ptestz(<4 x float> %p1, <4 x float> %p2) nounwind
+declare i32 @llvm.x86.sse41.ptestc(<4 x float> %p1, <4 x float> %p2) nounwind
+
+define <4 x float> @test5(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test5:
+; CHECK: ptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test7(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test7:
+; CHECK: ptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test8(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test8:
+; CHECK: ptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp ne i32 %res, 0 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test10(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test10:
+; CHECK: ptest
+; CHECK-NEXT:	jae
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = trunc i32 %res to i1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test11(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test11:
+; CHECK: ptest
+; CHECK-NEXT:	jne
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp eq i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+
+define <4 x float> @test12(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+; CHECK: test12:
+; CHECK: ptest
+; CHECK-NEXT:	je
+; CHECK: ret
+
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a, <4 x float> %a) nounwind 
+  %one = icmp ne i32 %res, 1 
+  br i1 %one, label %bb1, label %bb2
+
+bb1:
+  %c = fadd <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+  br label %return
+
+bb2:
+	%d = fdiv <4 x float> %b, < float 1.000000e+002, float 2.000000e+002, float 3.000000e+002, float 4.000000e+002 >
+	br label %return
+
+return:
+  %e = phi <4 x float> [%c, %bb1], [%d, %bb2]
+  ret <4 x float> %e
+}
+