[SystemZ] Use zeroing form of RISBG for shift-and-AND sequences

Extend r186072 to handle shifts and ANDs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186073 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-04 10:30:01 +00:00 · 2013-07-11 09:10:09 +00:00 · 2013-07-11 09:10:09 +00:00 · 261e2877eb
commit 261e2877eb
parent b3cabb44c3
2 changed files with 68 additions and 7 deletions
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@ -613,20 +613,81 @@ SDNode *SystemZDAGToDAGISel::tryRISBGForAND(SDNode *N) {
  unsigned Start, End;
  ConstantSDNode *MaskNode =
    dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
-  if (!MaskNode
-      || !isRISBGMask(MaskNode->getZExtValue(), BitSize, Start, End))
+  if (!MaskNode)
    return 0;

+  SDValue Input = N->getOperand(0);
+  uint64_t Mask = MaskNode->getZExtValue();
+  if (!isRISBGMask(Mask, BitSize, Start, End)) {
+    APInt KnownZero, KnownOne;
+    CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne);
+    Mask |= KnownZero.getZExtValue();
+    if (!isRISBGMask(Mask, BitSize, Start, End))
+      return 0;
+  }
+
+  unsigned Rotate = 0;
+  if (Input->getOpcode() == ISD::ROTL && BitSize == 64) {
+    // Any 64-bit rotate left can be merged into the RISBG.
+    if (ConstantSDNode *CountNode =
+        dyn_cast<ConstantSDNode>(Input.getOperand(1).getNode())) {
+      Rotate = CountNode->getZExtValue() & (BitSize - 1);
+      Input = Input->getOperand(0);
+    }
+  } else if (Input->getOpcode() == ISD::SHL) {
+    // Try to convert (and (shl X, count), mask) into
+    // (and (rotl X, count), mask&(~0<<count)), where the new mask
+    // removes bits from the original mask that are zeroed by the shl
+    // but that are not necessarily zero in X.
+    if (ConstantSDNode *CountNode =
+        dyn_cast<ConstantSDNode>(Input.getOperand(1).getNode())) {
+      uint64_t Count = CountNode->getZExtValue();
+      if (Count > 0 &&
+          Count < BitSize &&
+          isRISBGMask(Mask & (allOnes(BitSize - Count) << Count),
+                      BitSize, Start, End)) {
+        Rotate = Count;
+        Input = Input->getOperand(0);
+      }
+    }
+  } else if (Input->getOpcode() == ISD::SRL) {
+    // Try to convert (and (srl X, count), mask) into
+    // (and (rotl X, size-count), mask&(~0>>count)), which is similar
+    // to SLL above.
+    if (ConstantSDNode *CountNode =
+        dyn_cast<ConstantSDNode>(Input.getOperand(1).getNode())) {
+      uint64_t Count = CountNode->getZExtValue();
+      if (Count > 0 &&
+          Count < BitSize &&
+          isRISBGMask(Mask & allOnes(BitSize - Count), BitSize, Start, End)) {
+        Rotate = 64 - Count;
+        Input = Input->getOperand(0);
+      }
+    }
+  } else if (Start <= End && Input->getOpcode() == ISD::SRA) {
+    // Try to convert (and (sra X, count), mask) into
+    // (and (rotl X, size-count), mask).  The mask must not include
+    // any sign bits.
+    if (ConstantSDNode *CountNode =
+        dyn_cast<ConstantSDNode>(Input.getOperand(1).getNode())) {
+      uint64_t Count = CountNode->getZExtValue();
+      if (Count > 0 && Count < BitSize && Start >= 64 - (BitSize - Count)) {
+        Rotate = 64 - Count;
+        Input = Input->getOperand(0);
+      }
+    }
+  }
+
  // Prefer register extensions like LLC over RSIBG.
-  if ((Start == 32 || Start == 48 || Start == 56) && End == 63)
+  if (Rotate == 0 && (Start == 32 || Start == 48 || Start == 56) && End == 63)
    return 0;

  SDValue Ops[5] = {
    getUNDEF64(SDLoc(N)),
-    convertTo(SDLoc(N), MVT::i64, N->getOperand(0)),
+    convertTo(SDLoc(N), MVT::i64, Input),
    CurDAG->getTargetConstant(Start, MVT::i32),
    CurDAG->getTargetConstant(End | 128, MVT::i32),
-    CurDAG->getTargetConstant(0, MVT::i32)
+    CurDAG->getTargetConstant(Rotate, MVT::i32)
  };
  N = CurDAG->getMachineNode(SystemZ::RISBG, SDLoc(N), MVT::i64, Ops);
  return convertTo(SDLoc(N), VT, SDValue(N, 0)).getNode();
--- a/test/CodeGen/SystemZ/fp-move-02.ll
+++ b/test/CodeGen/SystemZ/fp-move-02.ll
@ -16,7 +16,7 @@ define float @f1(i32 %a) {
 ; surrounding code.
 define float @f2(i64 %big) {
 ; CHECK: f2:
-; CHECK: sllg [[REGISTER:%r[0-5]]], %r2, 31
+; CHECK: risbg [[REGISTER:%r[0-5]]], %r2, 0, 159, 31
 ; CHECK: ldgr %f0, [[REGISTER]]
  %shift = lshr i64 %big, 1
  %a = trunc i64 %shift to i32
@ -27,7 +27,7 @@ define float @f2(i64 %big) {
 ; Another example of the same thing.
 define float @f3(i64 %big) {
 ; CHECK: f3:
-; CHECK: sllg [[REGISTER:%r[0-5]]], %r2, 2
+; CHECK: risbg [[REGISTER:%r[0-5]]], %r2, 0, 159, 2
 ; CHECK: ldgr %f0, [[REGISTER]]
  %shift = ashr i64 %big, 30
  %a = trunc i64 %shift to i32