From 80a3301b377a6bdccded883e67ddbfc65cf82d23 Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Mon, 13 Jan 2014 15:17:53 +0000 Subject: [PATCH] [SystemZ] Optimize (sext (ashr (shl ...), ...)) ...into (ashr (shl (anyext X), ...), ...), which requires one fewer instruction. The (anyext X) can sometimes be simplified too. I didn't do this in DAGCombiner because widening shifts isn't a win on all targets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199114 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/SystemZ/SystemZISelLowering.cpp | 35 ++++++++++++++++++++++ lib/Target/SystemZ/SystemZISelLowering.h | 1 + test/CodeGen/SystemZ/shift-10.ll | 19 ++++++++++-- 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 32db1129862..0ca145e3a61 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -209,6 +209,9 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) // Give LowerOperation the chance to replace 64-bit ORs with subregs. setOperationAction(ISD::OR, MVT::i64, Custom); + // Give LowerOperation the chance to optimize SIGN_EXTEND sequences. + setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); + // FIXME: Can we support these natively? setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); @@ -2174,6 +2177,36 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { MVT::i64, HighOp, Low32); } +SDValue SystemZTargetLowering::lowerSIGN_EXTEND(SDValue Op, + SelectionDAG &DAG) const { + // Convert (sext (ashr (shl X, C1), C2)) to + // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as + // cheap as narrower ones. + SDValue N0 = Op.getOperand(0); + EVT VT = Op.getValueType(); + if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { + ConstantSDNode *SraAmt = dyn_cast(N0.getOperand(1)); + SDValue Inner = N0.getOperand(0); + if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { + ConstantSDNode *ShlAmt = dyn_cast(Inner.getOperand(1)); + if (ShlAmt) { + unsigned Extra = (VT.getSizeInBits() - + N0.getValueType().getSizeInBits()); + unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; + unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; + EVT ShiftVT = N0.getOperand(1).getValueType(); + SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, + Inner.getOperand(0)); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, + DAG.getConstant(NewShlAmt, ShiftVT)); + return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, + DAG.getConstant(NewSraAmt, ShiftVT)); + } + } + } + return SDValue(); +} + // Op is an atomic load. Lower it into a normal volatile load. SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const { @@ -2426,6 +2459,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerUDIVREM(Op, DAG); case ISD::OR: return lowerOR(Op, DAG); + case ISD::SIGN_EXTEND: + return lowerSIGN_EXTEND(Op, DAG); case ISD::ATOMIC_SWAP: return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); case ISD::ATOMIC_STORE: diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 2caa0bcb6df..197b1da32c2 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -279,6 +279,7 @@ private: SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, diff --git a/test/CodeGen/SystemZ/shift-10.ll b/test/CodeGen/SystemZ/shift-10.ll index 46ed2180dfd..bf2f0f1776e 100644 --- a/test/CodeGen/SystemZ/shift-10.ll +++ b/test/CodeGen/SystemZ/shift-10.ll @@ -14,13 +14,14 @@ define i64 @f1(i32 %a) { ret i64 %ext } -; ...and again with the highest shift count. +; ...and again with the highest shift count that doesn't reduce to an +; ashr/sext pair. define i64 @f2(i32 %a) { ; CHECK-LABEL: f2: -; CHECK: sllg [[REG:%r[0-5]]], %r2, 32 +; CHECK: sllg [[REG:%r[0-5]]], %r2, 33 ; CHECK: srag %r2, [[REG]], 63 ; CHECK: br %r14 - %shr = lshr i32 %a, 31 + %shr = lshr i32 %a, 30 %trunc = trunc i32 %shr to i1 %ext = sext i1 %trunc to i64 ret i64 %ext @@ -76,3 +77,15 @@ define i64 @f6(i64 %a) { %and = and i64 %shr, 256 ret i64 %and } + +; Test another form of f1. +define i64 @f7(i32 %a) { +; CHECK-LABEL: f7: +; CHECK: sllg [[REG:%r[0-5]]], %r2, 62 +; CHECK: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %1 = shl i32 %a, 30 + %sext = ashr i32 %1, 31 + %ext = sext i32 %sext to i64 + ret i64 %ext +}