[PowerPC] Fold i1 extensions with other ops

Consider this function from our README.txt file: int foo(int a, int b) { return (a < b) << 4; } We now explicitly track CR bits by default, so the comment in the README.txt about not really having a SETCC is no longer accurate, but we did generate this somewhat silly code: cmpw 0, 3, 4 li 3, 0 li 12, 1 isel 3, 12, 3, 0 sldi 3, 3, 4 blr which generates the zext as a select between 0 and 1, and then shifts the result by a constant amount. Here we preprocess the DAG in order to fold the results of operations on an extension of an i1 value into the SELECT_I[48] pseudo instruction when the resulting constant can be materialized using one instruction (just like the 0 and 1). This was not implemented as a DAGCombine because the resulting code would have been anti-canonical and depends on replacing chained user nodes, which does not fit well into the lowering paradigm. Now we generate: cmpw 0, 3, 4 li 3, 0 li 12, 16 isel 3, 12, 3, 0 blr which is less silly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225203 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-12 23:37:33 +00:00 · 2015-01-05 21:10:24 +00:00 · 2015-01-05 21:10:24 +00:00 · ccc83e4a08
commit ccc83e4a08
parent dd0552884b
3 changed files with 125 additions and 17 deletions
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@ -217,6 +217,7 @@ private:
    void PeepholeCROps();

    SDValue combineToCMPB(SDNode *N);
+    void foldBoolExts(SDValue &Res, SDNode *&N);

    bool AllUsersSelectZero(SDNode *N);
    void SwapAllSelectUsers(SDNode *N);
@ -3173,6 +3174,73 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
  return Res;
 }

+// When CR bit registers are enabled, an extension of an i1 variable to a i32
+// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
+// involves constant materialization of a 0 or a 1 or both. If the result of
+// the extension is then operated upon by some operator that can be constant
+// folded with a constant 0 or 1, and that constant can be materialized using
+// only one instruction (like a zero or one), then we should fold in those
+// operations with the select.
+void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
+  if (!PPCSubTarget->useCRBits())
+    return;
+
+  if (N->getOpcode() != ISD::ZERO_EXTEND &&
+      N->getOpcode() != ISD::SIGN_EXTEND &&
+      N->getOpcode() != ISD::ANY_EXTEND)
+    return;
+
+  if (N->getOperand(0).getValueType() != MVT::i1)
+    return;
+
+  if (!N->hasOneUse())
+    return;
+
+  SDLoc dl(N);
+  EVT VT = N->getValueType(0);
+  SDValue Cond = N->getOperand(0);
+  SDValue ConstTrue =
+    CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, VT);
+  SDValue ConstFalse = CurDAG->getConstant(0, VT);
+
+  do {
+    SDNode *User = *N->use_begin();
+    if (User->getNumOperands() != 2)
+      break;
+
+    auto TryFold = [this, N, User](SDValue Val) {
+      SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
+      SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
+      SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
+
+      return CurDAG->FoldConstantArithmetic(User->getOpcode(),
+                                            User->getValueType(0),
+                                            O0.getNode(), O1.getNode());
+    };
+
+    SDValue TrueRes = TryFold(ConstTrue);
+    if (!TrueRes)
+      break;
+    SDValue FalseRes = TryFold(ConstFalse);
+    if (!FalseRes)
+      break;
+
+    // For us to materialize these using one instruction, we must be able to
+    // represent them as signed 16-bit integers.
+    uint64_t True  = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
+             False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
+    if (!isInt<16>(True) || !isInt<16>(False))
+      break;
+
+    // We can replace User with a new SELECT node, and try again to see if we
+    // can fold the select with its user.
+    Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
+    N = User;
+    ConstTrue = TrueRes;
+    ConstFalse = FalseRes;
+  } while (N->hasOneUse());
+}
+
 void PPCDAGToDAGISel::PreprocessISelDAG() {
  SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
  ++Position;
@ -3191,6 +3259,9 @@ void PPCDAGToDAGISel::PreprocessISelDAG() {
      break;
    }

+    if (!Res)
+      foldBoolExts(Res, N);
+
    if (Res) {
      DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld:    ");
      DEBUG(N->dump(CurDAG));
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@ -252,23 +252,6 @@ anything though, because the compares still wouldn't be shared.

 ===-------------------------------------------------------------------------===

-We should custom expand setcc instead of pretending that we have it.  That
-would allow us to expose the access of the crbit after the mfcr, allowing
-that access to be trivially folded into other ops.  A simple example:
-
-int foo(int a, int b) { return (a < b) << 4; }
-
-compiles into:
-
-_foo:
-        cmpw cr7, r3, r4
-        mfcr r2, 1
-        rlwinm r2, r2, 29, 31, 31
-        slwi r3, r2, 4
-        blr
-
-===-------------------------------------------------------------------------===
-
 Fold add and sub with constant into non-extern, non-weak addresses so this:

 static int a;
--- a/test/CodeGen/PowerPC/i1-ext-fold.ll
+++ b/test/CodeGen/PowerPC/i1-ext-fold.ll
@ -0,0 +1,54 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo(i32 signext %a, i32 signext %b) #0 {
+entry:
+  %cmp = icmp slt i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  %shl = shl nuw nsw i32 %conv, 4
+  ret i32 %shl
+
+; CHECK-LABEL: @foo
+; CHECK-DAG: cmpw
+; CHECK-DAG: li [[REG1:[0-9]+]], 0
+; CHECK-DAG: li [[REG2:[0-9]+]], 16
+; CHECK: isel 3, [[REG2]], [[REG1]],
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo2(i32 signext %a, i32 signext %b) #0 {
+entry:
+  %cmp = icmp slt i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  %shl = shl nuw nsw i32 %conv, 4
+  %add1 = or i32 %shl, 5
+  ret i32 %add1
+
+; CHECK-LABEL: @foo2
+; CHECK-DAG: cmpw
+; CHECK-DAG: li [[REG1:[0-9]+]], 5
+; CHECK-DAG: li [[REG2:[0-9]+]], 21
+; CHECK: isel 3, [[REG2]], [[REG1]],
+; CHECK: blr
+}
+
+; Function Attrs: nounwind readnone
+define signext i32 @foo3(i32 signext %a, i32 signext %b) #0 {
+entry:
+  %cmp = icmp sle i32 %a, %b
+  %conv = zext i1 %cmp to i32
+  %shl = shl nuw nsw i32 %conv, 4
+  ret i32 %shl
+
+; CHECK-LABEL: @foo3
+; CHECK-DAG: cmpw
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK: isel 3, 0, [[REG1]],
+; CHECK: blr
+}
+
+attributes #0 = { nounwind readnone }
+