diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index de6bbe396e5..2dade858326 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -61,6 +61,8 @@ class VectorLegalizer { // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if // SINT_TO_FLOAT and SHR on vectors isn't legal. SDValue ExpandUINT_TO_FLOAT(SDValue Op); + // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. + SDValue ExpandSEXTINREG(SDValue Op); // Implement vselect in terms of XOR, AND, OR when blend is not supported // by the target. SDValue ExpandVSELECT(SDValue Op); @@ -262,7 +264,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { // FALL THROUGH } case TargetLowering::Expand: - if (Node->getOpcode() == ISD::VSELECT) + if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) + Result = ExpandSEXTINREG(Op); + else if (Node->getOpcode() == ISD::VSELECT) Result = ExpandVSELECT(Op); else if (Node->getOpcode() == ISD::SELECT) Result = ExpandSELECT(Op); @@ -501,6 +505,26 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); } +SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { + EVT VT = Op.getValueType(); + + // Make sure that the SRA and SRL instructions are available. + if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + DebugLoc DL = Op.getDebugLoc(); + EVT OrigTy = cast(Op->getOperand(1))->getVT(); + + unsigned BW = VT.getScalarType().getSizeInBits(); + unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); + SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT); + + Op = Op.getOperand(0); + Op = DAG.getNode(ISD::SRL, DL, VT, Op, ShiftSz); + return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. diff --git a/test/CodeGen/PowerPC/vec_extload.ll b/test/CodeGen/PowerPC/vec_extload.ll index 15a3f9f2959..42334d7030d 100644 --- a/test/CodeGen/PowerPC/vec_extload.ll +++ b/test/CodeGen/PowerPC/vec_extload.ll @@ -15,55 +15,9 @@ define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) { ret <16 x i8> %c } ; CHECK: v16si8_sext_in_reg: -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lbz -; CHECK: stb -; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vsrb +; CHECK: vsrab +; CHECK: blr ; The zero extend uses a more clever logic: a vector splat ; and a logic and to set higher bits to 0. @@ -83,31 +37,9 @@ define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) { ret <8 x i16> %c } ; CHECK: v8si16_sext_in_reg: -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lhz -; CHECK: sth -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lhz -; CHECK: sth -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lhz -; CHECK: sth -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lhz -; CHECK: sth -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lhz -; CHECK: sth -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lhz -; CHECK: sth -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lhz -; CHECK: sth -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lhz -; CHECK: sth -; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vsrh +; CHECK: vsrah +; CHECK: blr ; Same as v8si16_sext_in_reg, but instead of creating the mask ; with a splat, loads it from memory. @@ -129,19 +61,9 @@ define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) { ret <4 x i32> %c } ; CHECK: v4si32_sext_in_reg: -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lha -; CHECK: stw -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lha -; CHECK: stw -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lha -; CHECK: stw -; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} -; CHECK: lha -; CHECK: stw -; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}} +; CHECK: vsrw +; CHECK: vsraw +; CHECK: blr ; Same as v8si16_sext_in_reg. define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) {