From 3112581441cd22ac955b1af2d08effe3bab975da Mon Sep 17 00:00:00 2001
From: Dan Gohman <gohman@apple.com>
Date: Sat, 7 Mar 2009 01:58:32 +0000
Subject: [PATCH] Arithmetic instructions don't set EFLAGS bits OF and CF bits
 the same say the "test" instruction does in overflow cases, so eliminating
 the test is only safe when those bits aren't needed, as is the case for
 COND_E and COND_NE, or if it can be proven that no overflow will occur. For
 now, just restrict the optimization to COND_E and COND_NE and don't do any
 overflow analysis.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@66318 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelLowering.cpp | 40 ++++++++++++++++++++++--------
 lib/Target/X86/X86ISelLowering.h   |  9 ++++---
 test/CodeGen/X86/peep-test-2.ll    | 17 +++++++++++++
 3 files changed, 52 insertions(+), 14 deletions(-)
 create mode 100644 test/CodeGen/X86/peep-test-2.ll
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9ce5e484e7d..ad5395ab063 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -5363,12 +5363,31 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
 
 /// Emit nodes that will be selected as "test Op0,Op0", or something
 /// equivalent.
-SDValue X86TargetLowering::EmitTest(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
+                                    SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
 
+  // CF and OF aren't always set the way we want. Determine which
+  // of these we need.
+  bool NeedCF = false;
+  bool NeedOF = false;
+  switch (X86CC) {
+  case X86::COND_A: case X86::COND_AE:
+  case X86::COND_B: case X86::COND_BE:
+    NeedCF = true;
+    break;
+  case X86::COND_G: case X86::COND_GE:
+  case X86::COND_L: case X86::COND_LE:
+  case X86::COND_O: case X86::COND_NO:
+    NeedOF = true;
+    break;
+  default: break;
+  }
+
   // See if we can use the EFLAGS value from the operand instead of
-  // doing a separate TEST.
-  if (Op.getResNo() == 0) {
+  // doing a separate TEST. TEST always sets OF and CF to 0, so unless
+  // we prove that the arithmetic won't overflow, we can't use OF or CF.
+  if (Op.getResNo() == 0 && !NeedOF && !NeedCF) {
     unsigned Opcode = 0;
     unsigned NumOperands = 0;
     switch (Op.getNode()->getOpcode()) {
@@ -5425,9 +5444,9 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, SelectionDAG &DAG) {
     if (Opcode != 0) {
       const MVT *VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::i32);
       SmallVector<SDValue, 4> Ops;
-      for (unsigned i = 0, e = NumOperands; i != e; ++i)
+      for (unsigned i = 0; i != NumOperands; ++i)
         Ops.push_back(Op.getOperand(i));
-      SDValue New = DAG.getNode(Opcode, dl, VTs, 2, &Ops[0], Ops.size());
+      SDValue New = DAG.getNode(Opcode, dl, VTs, 2, &Ops[0], NumOperands);
       DAG.ReplaceAllUsesWith(Op, New);
       return SDValue(New.getNode(), 1);
     }
@@ -5440,10 +5459,11 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, SelectionDAG &DAG) {
 
 /// Emit nodes that will be selected as "cmp Op0,Op1", or something
 /// equivalent.
-SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, SelectionDAG &DAG) {
+SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+                                   SelectionDAG &DAG) {
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1))
     if (C->getAPIntValue() == 0)
-      return EmitTest(Op0, DAG);
+      return EmitTest(Op0, X86CC, DAG);
 
   DebugLoc dl = Op0.getDebugLoc();
   return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
@@ -5511,7 +5531,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
   bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
   unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
 
-  SDValue Cond = EmitCmp(Op0, Op1, DAG);
+  SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
   return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
                      DAG.getConstant(X86CC, MVT::i8), Cond);
 }
@@ -5677,7 +5697,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
 
   if (addTest) {
     CC = DAG.getConstant(X86::COND_NE, MVT::i8);
-    Cond = EmitTest(Cond, DAG);
+    Cond = EmitTest(Cond, X86::COND_NE, DAG);
   }
 
   const MVT *VTs = DAG.getNodeValueTypes(Op.getValueType(),
@@ -5827,7 +5847,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
 
   if (addTest) {
     CC = DAG.getConstant(X86::COND_NE, MVT::i8);
-    Cond = EmitTest(Cond, DAG);
+    Cond = EmitTest(Cond, X86::COND_NE, DAG);
   }
   return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
                      Chain, Dest, CC, Cond);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 5d3f287a381..6666b451e24 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -661,12 +661,13 @@ namespace llvm {
                                                         unsigned cmovOpc) const;
 
     /// Emit nodes that will be selected as "test Op0,Op0", or something
-    /// equivalent.
-    SDValue EmitTest(SDValue Op0, SelectionDAG &DAG);
+    /// equivalent, for use with the given x86 condition code.
+    SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG);
 
     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
-    /// equivalent.
-    SDValue EmitCmp(SDValue Op0, SDValue Op1, SelectionDAG &DAG);
+    /// equivalent, for use with the given x86 condition code.
+    SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+                    SelectionDAG &DAG);
   };
 
   namespace X86 {
diff --git a/test/CodeGen/X86/peep-test-2.ll b/test/CodeGen/X86/peep-test-2.ll
new file mode 100644
index 00000000000..788f610365c
--- /dev/null
+++ b/test/CodeGen/X86/peep-test-2.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep testl
+
+; It's tempting to eliminate the testl instruction here and just use the
+; EFLAGS value from the incl, however it can't be known whether the add
+; will overflow, and if it does the incl would set OF, and the
+; subsequent setg would return true.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define i32 @f(i32 %j) nounwind readnone {
+entry:
+	%0 = add i32 %j, 1		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 0		; <i1> [#uses=1]
+	%2 = zext i1 %1 to i32		; <i32> [#uses=1]
+	ret i32 %2
+}