PPC: Implement efficient lowering of sign_extend_inreg.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172269 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nadav Rotem 2013-01-11 22:57:48 +00:00
parent 9b1bb05386
commit 66de2af815
2 changed files with 34 additions and 88 deletions

View File

@ -61,6 +61,8 @@ class VectorLegalizer {
// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
// SINT_TO_FLOAT and SHR on vectors isn't legal. // SINT_TO_FLOAT and SHR on vectors isn't legal.
SDValue ExpandUINT_TO_FLOAT(SDValue Op); SDValue ExpandUINT_TO_FLOAT(SDValue Op);
// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
SDValue ExpandSEXTINREG(SDValue Op);
// Implement vselect in terms of XOR, AND, OR when blend is not supported // Implement vselect in terms of XOR, AND, OR when blend is not supported
// by the target. // by the target.
SDValue ExpandVSELECT(SDValue Op); SDValue ExpandVSELECT(SDValue Op);
@ -262,7 +264,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
// FALL THROUGH // FALL THROUGH
} }
case TargetLowering::Expand: case TargetLowering::Expand:
if (Node->getOpcode() == ISD::VSELECT) if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG)
Result = ExpandSEXTINREG(Op);
else if (Node->getOpcode() == ISD::VSELECT)
Result = ExpandVSELECT(Op); Result = ExpandVSELECT(Op);
else if (Node->getOpcode() == ISD::SELECT) else if (Node->getOpcode() == ISD::SELECT)
Result = ExpandSELECT(Op); Result = ExpandSELECT(Op);
@ -501,6 +505,26 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
} }
SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
EVT VT = Op.getValueType();
// Make sure that the SRA and SRL instructions are available.
if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
return DAG.UnrollVectorOp(Op.getNode());
DebugLoc DL = Op.getDebugLoc();
EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
unsigned BW = VT.getScalarType().getSizeInBits();
unsigned OrigBW = OrigTy.getScalarType().getSizeInBits();
SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT);
Op = Op.getOperand(0);
Op = DAG.getNode(ISD::SRL, DL, VT, Op, ShiftSz);
return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
}
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// Implement VSELECT in terms of XOR, AND, OR // Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively. // on platforms which do not support blend natively.

View File

@ -15,55 +15,9 @@ define <16 x i8> @v16si8_sext_in_reg(<16 x i8> %a) {
ret <16 x i8> %c ret <16 x i8> %c
} }
; CHECK: v16si8_sext_in_reg: ; CHECK: v16si8_sext_in_reg:
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} ; CHECK: vsrb
; CHECK: lbz ; CHECK: vsrab
; CHECK: stb ; CHECK: blr
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lbz
; CHECK: stb
; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
; The zero extend uses a more clever logic: a vector splat ; The zero extend uses a more clever logic: a vector splat
; and a logic and to set higher bits to 0. ; and a logic and to set higher bits to 0.
@ -83,31 +37,9 @@ define <8 x i16> @v8si16_sext_in_reg(<8 x i16> %a) {
ret <8 x i16> %c ret <8 x i16> %c
} }
; CHECK: v8si16_sext_in_reg: ; CHECK: v8si16_sext_in_reg:
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} ; CHECK: vsrh
; CHECK: lhz ; CHECK: vsrah
; CHECK: sth ; CHECK: blr
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lhz
; CHECK: sth
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lhz
; CHECK: sth
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lhz
; CHECK: sth
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lhz
; CHECK: sth
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lhz
; CHECK: sth
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lhz
; CHECK: sth
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lhz
; CHECK: sth
; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
; Same as v8si16_sext_in_reg, but instead of creating the mask ; Same as v8si16_sext_in_reg, but instead of creating the mask
; with a splat, loads it from memory. ; with a splat, loads it from memory.
@ -129,19 +61,9 @@ define <4 x i32> @v4si32_sext_in_reg(<4 x i32> %a) {
ret <4 x i32> %c ret <4 x i32> %c
} }
; CHECK: v4si32_sext_in_reg: ; CHECK: v4si32_sext_in_reg:
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}} ; CHECK: vsrw
; CHECK: lha ; CHECK: vsraw
; CHECK: stw ; CHECK: blr
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lha
; CHECK: stw
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lha
; CHECK: stw
; CHECK: stvx 2, {{[0-9]+}}, {{[0-9]+}}
; CHECK: lha
; CHECK: stw
; CHECK: lvx 2, {{[0-9]+}}, {{[0-9]+}}
; Same as v8si16_sext_in_reg. ; Same as v8si16_sext_in_reg.
define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) { define <4 x i32> @v4si32_zext_in_reg(<4 x i32> %a) {