[SystemZ] Optimize (sext (ashr (shl ...), ...))

...into (ashr (shl (anyext X), ...), ...), which requires one fewer instruction. The (anyext X) can sometimes be simplified too. I didn't do this in DAGCombiner because widening shifts isn't a win on all targets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199114 91177308-0d34-0410-b5e6-96231b3b80d8
2026-04-20 16:17:38 +00:00 · 2014-01-13 15:17:53 +00:00
parent 1023a9492f
commit 80a3301b37
3 changed files with 52 additions and 3 deletions
@@ -209,6 +209,9 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
  setOperationAction(ISD::OR, MVT::i64, Custom);

+  // Give LowerOperation the chance to optimize SIGN_EXTEND sequences.
+  setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
+
  // FIXME: Can we support these natively?
  setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
  setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
@@ -2174,6 +2177,36 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
                                   MVT::i64, HighOp, Low32);
 }

+SDValue SystemZTargetLowering::lowerSIGN_EXTEND(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  // Convert (sext (ashr (shl X, C1), C2)) to
+  // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
+  // cheap as narrower ones.
+  SDValue N0 = Op.getOperand(0);
+  EVT VT = Op.getValueType();
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
+    ConstantSDNode *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    SDValue Inner = N0.getOperand(0);
+    if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
+      ConstantSDNode *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1));
+      if (ShlAmt) {
+        unsigned Extra = (VT.getSizeInBits() -
+                          N0.getValueType().getSizeInBits());
+        unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
+        unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
+        EVT ShiftVT = N0.getOperand(1).getValueType();
+        SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
+                                  Inner.getOperand(0));
+        SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
+                                  DAG.getConstant(NewShlAmt, ShiftVT));
+        return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
+                           DAG.getConstant(NewSraAmt, ShiftVT));
+      }
+    }
+  }
+  return SDValue();
+}
+
 // Op is an atomic load.  Lower it into a normal volatile load.
 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
                                                SelectionDAG &DAG) const {
@@ -2426,6 +2459,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
    return lowerUDIVREM(Op, DAG);
  case ISD::OR:
    return lowerOR(Op, DAG);
+  case ISD::SIGN_EXTEND:
+    return lowerSIGN_EXTEND(Op, DAG);
  case ISD::ATOMIC_SWAP:
    return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
  case ISD::ATOMIC_STORE:
@@ -279,6 +279,7 @@ private:
  SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
  SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
  SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
  SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
  SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
  SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
@@ -14,13 +14,14 @@ define i64 @f1(i32 %a) {
  ret i64 %ext
 }

-; ...and again with the highest shift count.
+; ...and again with the highest shift count that doesn't reduce to an
+; ashr/sext pair.
 define i64 @f2(i32 %a) {
 ; CHECK-LABEL: f2:
-; CHECK: sllg [[REG:%r[0-5]]], %r2, 32
+; CHECK: sllg [[REG:%r[0-5]]], %r2, 33
 ; CHECK: srag %r2, [[REG]], 63
 ; CHECK: br %r14
-  %shr = lshr i32 %a, 31
+  %shr = lshr i32 %a, 30
  %trunc = trunc i32 %shr to i1
  %ext = sext i1 %trunc to i64
  ret i64 %ext
@@ -76,3 +77,15 @@ define i64 @f6(i64 %a) {
  %and = and i64 %shr, 256
  ret i64 %and
 }
+
+; Test another form of f1.
+define i64 @f7(i32 %a) {
+; CHECK-LABEL: f7:
+; CHECK: sllg [[REG:%r[0-5]]], %r2, 62
+; CHECK: srag %r2, [[REG]], 63
+; CHECK: br %r14
+  %1 = shl i32 %a, 30
+  %sext = ashr i32 %1, 31
+  %ext = sext i32 %sext to i64
+  ret i64 %ext
+}