diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b0960c5baa3..8a02162665f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -800,6 +800,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::XOR); } + if (Subtarget->hasV6Ops()) + setTargetDAGCombine(ISD::SRL); + setStackPointerRegisterToSaveRestore(ARM::SP); if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || @@ -7964,6 +7967,18 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); + if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { + // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high + // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. + SDValue N1 = N->getOperand(1); + if (ConstantSDNode *C = dyn_cast(N1)) { + SDValue N0 = N->getOperand(0); + if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && + DAG.MaskedValueIsZero(N0.getOperand(0), + APInt::getHighBitsSet(32, 16))) + return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1); + } + } // Nothing to be done for scalar shifts. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll index ea44c28fb70..6bb67431198 100644 --- a/test/CodeGen/ARM/rev.ll +++ b/test/CodeGen/ARM/rev.ll @@ -112,11 +112,11 @@ entry: ret i32 %conv3 } +; rdar://10750814 define zeroext i16 @test9(i16 zeroext %v) nounwind readnone { entry: ; CHECK: test9 -; CHECK: rev r0, r0 -; CHECK: lsr r0, r0, #16 +; CHECK: rev16 r0, r0 %conv = zext i16 %v to i32 %shr4 = lshr i32 %conv, 8 %shl = shl nuw nsw i32 %conv, 8