[SystemZ] Optimize (sext (ashr (shl ...), ...))

...into (ashr (shl (anyext X), ...), ...), which requires one fewer
instruction.  The (anyext X) can sometimes be simplified too.

I didn't do this in DAGCombiner because widening shifts isn't a win
on all targets.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199114 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Richard Sandiford 2014-01-13 15:17:53 +00:00
parent 1023a9492f
commit 80a3301b37
3 changed files with 52 additions and 3 deletions

View File

@ -209,6 +209,9 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
// Give LowerOperation the chance to replace 64-bit ORs with subregs.
setOperationAction(ISD::OR, MVT::i64, Custom);
// Give LowerOperation the chance to optimize SIGN_EXTEND sequences.
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
// FIXME: Can we support these natively?
setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
@ -2174,6 +2177,36 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
MVT::i64, HighOp, Low32);
}
SDValue SystemZTargetLowering::lowerSIGN_EXTEND(SDValue Op,
SelectionDAG &DAG) const {
// Convert (sext (ashr (shl X, C1), C2)) to
// (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
// cheap as narrower ones.
SDValue N0 = Op.getOperand(0);
EVT VT = Op.getValueType();
if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
ConstantSDNode *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
SDValue Inner = N0.getOperand(0);
if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
ConstantSDNode *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1));
if (ShlAmt) {
unsigned Extra = (VT.getSizeInBits() -
N0.getValueType().getSizeInBits());
unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
EVT ShiftVT = N0.getOperand(1).getValueType();
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
Inner.getOperand(0));
SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
DAG.getConstant(NewShlAmt, ShiftVT));
return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
DAG.getConstant(NewSraAmt, ShiftVT));
}
}
}
return SDValue();
}
// Op is an atomic load. Lower it into a normal volatile load.
SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
SelectionDAG &DAG) const {
@ -2426,6 +2459,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerUDIVREM(Op, DAG);
case ISD::OR:
return lowerOR(Op, DAG);
case ISD::SIGN_EXTEND:
return lowerSIGN_EXTEND(Op, DAG);
case ISD::ATOMIC_SWAP:
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
case ISD::ATOMIC_STORE:

View File

@ -279,6 +279,7 @@ private:
SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,

View File

@ -14,13 +14,14 @@ define i64 @f1(i32 %a) {
ret i64 %ext
}
; ...and again with the highest shift count.
; ...and again with the highest shift count that doesn't reduce to an
; ashr/sext pair.
define i64 @f2(i32 %a) {
; CHECK-LABEL: f2:
; CHECK: sllg [[REG:%r[0-5]]], %r2, 32
; CHECK: sllg [[REG:%r[0-5]]], %r2, 33
; CHECK: srag %r2, [[REG]], 63
; CHECK: br %r14
%shr = lshr i32 %a, 31
%shr = lshr i32 %a, 30
%trunc = trunc i32 %shr to i1
%ext = sext i1 %trunc to i64
ret i64 %ext
@ -76,3 +77,15 @@ define i64 @f6(i64 %a) {
%and = and i64 %shr, 256
ret i64 %and
}
; Test another form of f1.
define i64 @f7(i32 %a) {
; CHECK-LABEL: f7:
; CHECK: sllg [[REG:%r[0-5]]], %r2, 62
; CHECK: srag %r2, [[REG]], 63
; CHECK: br %r14
%1 = shl i32 %a, 30
%sext = ashr i32 %1, 31
%ext = sext i32 %sext to i64
ret i64 %ext
}