diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0eecd39d247..a565da5299c 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3341,6 +3341,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { unsigned OpSizeInBits = VT.getSizeInBits(); SDValue LHSShiftArg = LHSShift.getOperand(0); SDValue LHSShiftAmt = LHSShift.getOperand(1); + SDValue RHSShiftArg = RHSShift.getOperand(0); SDValue RHSShiftAmt = RHSShift.getOperand(1); // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) @@ -3420,10 +3421,27 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // (rotr x, (sub 32, y)) if (ConstantSDNode *SUBC = dyn_cast(RExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) + if (SUBC->getAPIntValue() == OpSizeInBits) { return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); + } else if (LHSShiftArg.getOpcode() == ISD::ZERO_EXTEND || + LHSShiftArg.getOpcode() == ISD::ANY_EXTEND) { + // fold (or (shl (*ext x), (*ext y)), + // (srl (*ext x), (*ext (sub 32, y)))) -> + // (*ext (rotl x, y)) + // fold (or (shl (*ext x), (*ext y)), + // (srl (*ext x), (*ext (sub 32, y)))) -> + // (*ext (rotr x, (sub 32, y))) + SDValue LArgExtOp0 = LHSShiftArg.getOperand(0); + EVT LArgVT = LArgExtOp0.getValueType(); + if (LArgVT.getSizeInBits() == SUBC->getAPIntValue()) { + SDValue V = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, LArgVT, + LArgExtOp0, + HasROTL ? LHSShiftAmt : RHSShiftAmt); + return DAG.getNode(LHSShiftArg.getOpcode(), DL, VT, V).getNode(); + } + } } else if (LExtOp0.getOpcode() == ISD::SUB && RExtOp0 == LExtOp0.getOperand(1)) { // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> @@ -3432,10 +3450,27 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // (rotl x, (sub 32, y)) if (ConstantSDNode *SUBC = dyn_cast(LExtOp0.getOperand(0))) - if (SUBC->getAPIntValue() == OpSizeInBits) + if (SUBC->getAPIntValue() == OpSizeInBits) { return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); + } else if (RHSShiftArg.getOpcode() == ISD::ZERO_EXTEND || + RHSShiftArg.getOpcode() == ISD::ANY_EXTEND) { + // fold (or (shl (*ext x), (*ext (sub 32, y))), + // (srl (*ext x), (*ext y))) -> + // (*ext (rotl x, y)) + // fold (or (shl (*ext x), (*ext (sub 32, y))), + // (srl (*ext x), (*ext y))) -> + // (*ext (rotr x, (sub 32, y))) + SDValue RArgExtOp0 = RHSShiftArg.getOperand(0); + EVT RArgVT = RArgExtOp0.getValueType(); + if (RArgVT.getSizeInBits() == SUBC->getAPIntValue()) { + SDValue V = DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, RArgVT, + RArgExtOp0, + HasROTR ? RHSShiftAmt : LHSShiftAmt); + return DAG.getNode(RHSShiftArg.getOpcode(), DL, VT, V).getNode(); + } + } } } diff --git a/test/CodeGen/X86/rotate3.ll b/test/CodeGen/X86/rotate3.ll new file mode 100644 index 00000000000..b92f7c2a60a --- /dev/null +++ b/test/CodeGen/X86/rotate3.ll @@ -0,0 +1,76 @@ +; Check that (or (shl x, y), (srl x, (sub 32, y))) is folded into (rotl x, y) +; and (or (shl x, (sub 32, y)), (srl x, r)) into (rotr x, y) even if the +; argument is zero extended. Fix for PR16726. + +; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s + +define zeroext i8 @rolbyte(i32 %nBits_arg, i8 %x_arg) nounwind readnone { +entry: + %tmp1 = zext i8 %x_arg to i32 + %tmp3 = shl i32 %tmp1, %nBits_arg + %tmp8 = sub i32 8, %nBits_arg + %tmp10 = lshr i32 %tmp1, %tmp8 + %tmp11 = or i32 %tmp3, %tmp10 + %tmp12 = trunc i32 %tmp11 to i8 + ret i8 %tmp12 +} +; CHECK: rolb %cl, %{{[a-z0-9]+}} + + +define zeroext i8 @rorbyte(i32 %nBits_arg, i8 %x_arg) nounwind readnone { +entry: + %tmp1 = zext i8 %x_arg to i32 + %tmp3 = lshr i32 %tmp1, %nBits_arg + %tmp8 = sub i32 8, %nBits_arg + %tmp10 = shl i32 %tmp1, %tmp8 + %tmp11 = or i32 %tmp3, %tmp10 + %tmp12 = trunc i32 %tmp11 to i8 + ret i8 %tmp12 +} +; CHECK: rorb %cl, %{{[a-z0-9]+}} + +define zeroext i16 @rolword(i32 %nBits_arg, i16 %x_arg) nounwind readnone { +entry: + %tmp1 = zext i16 %x_arg to i32 + %tmp3 = shl i32 %tmp1, %nBits_arg + %tmp8 = sub i32 16, %nBits_arg + %tmp10 = lshr i32 %tmp1, %tmp8 + %tmp11 = or i32 %tmp3, %tmp10 + %tmp12 = trunc i32 %tmp11 to i16 + ret i16 %tmp12 +} +; CHECK: rolw %cl, %{{[a-z0-9]+}} + +define zeroext i16 @rorword(i32 %nBits_arg, i16 %x_arg) nounwind readnone { +entry: + %tmp1 = zext i16 %x_arg to i32 + %tmp3 = lshr i32 %tmp1, %nBits_arg + %tmp8 = sub i32 16, %nBits_arg + %tmp10 = shl i32 %tmp1, %tmp8 + %tmp11 = or i32 %tmp3, %tmp10 + %tmp12 = trunc i32 %tmp11 to i16 + ret i16 %tmp12 +} +; CHECK: rorw %cl, %{{[a-z0-9]+}} + +define i64 @roldword(i64 %nBits_arg, i32 %x_arg) nounwind readnone { +entry: + %tmp1 = zext i32 %x_arg to i64 + %tmp3 = shl i64 %tmp1, %nBits_arg + %tmp8 = sub i64 32, %nBits_arg + %tmp10 = lshr i64 %tmp1, %tmp8 + %tmp11 = or i64 %tmp3, %tmp10 + ret i64 %tmp11 +} +; CHECK: roll %cl, %{{[a-z0-9]+}} + +define zeroext i64 @rordword(i64 %nBits_arg, i32 %x_arg) nounwind readnone { +entry: + %tmp1 = zext i32 %x_arg to i64 + %tmp3 = lshr i64 %tmp1, %nBits_arg + %tmp8 = sub i64 32, %nBits_arg + %tmp10 = shl i64 %tmp1, %tmp8 + %tmp11 = or i64 %tmp3, %tmp10 + ret i64 %tmp11 +} +; CHECK: rorl %cl, %{{[a-z0-9]+}}