diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 17631c0c9d9..9d6a3b40b91 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -635,6 +635,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), NarrowShl)); } + // Repeat the SHL optimization above in cases where an extension + // intervenes: (shl (anyext (shr x, c1)), c2) to + // (shl (anyext x), c2-c1). This requires that the bottom c1 bits + // aren't demanded (as above) and that the shifted upper c1 bits of + // x aren't demanded. + if (InOp.hasOneUse() && + InnerOp.getOpcode() == ISD::SRL && + InnerOp.hasOneUse() && + isa(InnerOp.getOperand(1))) { + uint64_t InnerShAmt = cast(InnerOp.getOperand(1)) + ->getZExtValue(); + if (InnerShAmt < ShAmt && + InnerShAmt < InnerBits && + NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 && + NewMask.trunc(ShAmt) == 0) { + SDValue NewSA = + TLO.DAG.getConstant(ShAmt - InnerShAmt, + Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, + InnerOp.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, + NewExt, NewSA)); + } + } } KnownZero <<= SA->getZExtValue(); diff --git a/test/CodeGen/SystemZ/shift-10.ll b/test/CodeGen/SystemZ/shift-10.ll new file mode 100644 index 00000000000..47cd0027f14 --- /dev/null +++ b/test/CodeGen/SystemZ/shift-10.ll @@ -0,0 +1,67 @@ +; Test compound shifts. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +; Test a shift right followed by a sign extension. This can use two shifts. +define i64 @f1(i32 %a) { +; CHECK-LABEL: f1: +; CHECK: sllg [[REG:%r[0-5]]], %r2, 62 +; CHECK: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %shr = lshr i32 %a, 1 + %trunc = trunc i32 %shr to i1 + %ext = sext i1 %trunc to i64 + ret i64 %ext +} + +; ...and again with the highest shift count. +define i64 @f2(i32 %a) { +; CHECK-LABEL: f2: +; CHECK: sllg [[REG:%r[0-5]]], %r2, 32 +; CHECK: srag %r2, [[REG]], 63 +; CHECK: br %r14 + %shr = lshr i32 %a, 31 + %trunc = trunc i32 %shr to i1 + %ext = sext i1 %trunc to i64 + ret i64 %ext +} + +; Test a left shift that of an extended right shift in a case where folding +; is possible. +define i64 @f3(i32 %a) { +; CHECK-LABEL: f3: +; CHECK: risbg %r2, %r2, 27, 181, 9 +; CHECK: br %r14 + %shr = lshr i32 %a, 1 + %ext = zext i32 %shr to i64 + %shl = shl i64 %ext, 10 + %and = and i64 %shl, 137438952960 + ret i64 %and +} + +; ...and again with a larger right shift. +define i64 @f4(i32 %a) { +; CHECK-LABEL: f4: +; CHECK: risbg %r2, %r2, 30, 158, 3 +; CHECK: br %r14 + %shr = lshr i32 %a, 30 + %ext = sext i32 %shr to i64 + %shl = shl i64 %ext, 33 + %and = and i64 %shl, 8589934592 + ret i64 %and +} + +; Repeat the previous test in a case where all bits outside the +; bottom 3 matter. FIXME: can still use RISBG here. +define i64 @f5(i32 %a) { +; CHECK-LABEL: f5: +; CHECK: srl %r2, 30 +; CHECK: sllg %r2, %r2, 33 +; CHECK: lhi %r2, 7 +; CHECK: br %r14 + %shr = lshr i32 %a, 30 + %ext = sext i32 %shr to i64 + %shl = shl i64 %ext, 33 + %or = or i64 %shl, 7 + ret i64 %or +}