diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td index ec2f663908f..047eeb4f1b7 100644 --- a/lib/Target/CellSPU/SPUCallingConv.td +++ b/lib/Target/CellSPU/SPUCallingConv.td @@ -37,7 +37,7 @@ def RetCC_SPU : CallingConv<[ //===----------------------------------------------------------------------===// def CCC_SPU : CallingConv<[ CCIfType<[i8, i16, i32, i64, i128, f32, f64, - v16i8, v8i16, v4i32, v4f32, v2i64, v2f64], + v16i8, v8i16, v4i32, v4f32, v2i64, v2f64, v2i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index ece19b9b89f..bcde5794af8 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -1067,6 +1067,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, case MVT::v4i32: case MVT::v8i16: case MVT::v16i8: + case MVT::v2i32: ArgRegClass = &SPU::VECREGRegClass; break; } @@ -1622,8 +1623,7 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); } case MVT::v2i32: { - SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T); + return SDValue(); } case MVT::v2i64: { return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); @@ -1768,6 +1768,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { } else if (EltVT == MVT::i16) { V2EltIdx0 = 8; maskVT = MVT::v8i16; + } else if (VecVT == MVT::v2i32 || VecVT == MVT::v2f32 ) { + V2EltIdx0 = 2; + maskVT = MVT::v4i32; } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { V2EltIdx0 = 4; maskVT = MVT::v4i32; @@ -1847,6 +1850,15 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { for (unsigned j = 0; j < BytesPerElement; ++j) ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); } + // For half vectors padd the mask with zeros for the second half. + // This is needed because mask is assumed to be full vector elsewhere in + // the SPU backend. + if(VecVT == MVT::v2i32 || VecVT == MVT::v2f32) + for( unsigned i = 0; i < 2; ++i ) + { + for (unsigned j = 0; j < BytesPerElement; ++j) + ResultMask.push_back(DAG.getConstant(0,MVT::i8)); + } SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, &ResultMask[0], ResultMask.size()); @@ -1877,6 +1889,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; + case MVT::v2i32: n_copies = 2; VT = MVT::i32; break; } SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT); @@ -1997,7 +2010,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // Variable index: Rotate the requested element into slot 0, then replicate // slot 0 across the vector EVT VecVT = N.getValueType(); - if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { + if (!VecVT.isSimple() || !VecVT.isVector()) { report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" "vector type!"); } diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index a7fb14c26a7..bc9668a8097 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -607,7 +607,7 @@ class ARegInst: multiclass AddInstruction { def v4i32: AVecInst; def v16i8: AVecInst; - + def v2i32: AVecInst; def r32: ARegInst; } @@ -672,6 +672,12 @@ def SFvec : RRForm<0b00000010000, (outs VECREG:$rT), "sf\t$rT, $rA, $rB", IntegerOp, [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>; +def SF2vec : RRForm<0b00000010000, (outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB), + "sf\t$rT, $rA, $rB", IntegerOp, + [(set (v2i32 VECREG:$rT), (sub (v2i32 VECREG:$rB), (v2i32 VECREG:$rA)))]>; + + def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), "sf\t$rT, $rA, $rB", IntegerOp, [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>; @@ -829,6 +835,10 @@ def MPYUv4i32: MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [/* no pattern */]>; +def MPYUv2i32: + MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [/* no pattern */]>; + def MPYUr16: MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB), [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>; @@ -908,6 +918,10 @@ def MPYHv4i32: MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [/* no pattern */]>; +def MPYHv2i32: + MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [/* no pattern */]>; + def MPYHr32: MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), [/* no pattern */]>; @@ -1561,6 +1575,9 @@ def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)), def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)), (ORv4i32_i32 R32C:$rA)>; +def : Pat<(v2i32 (SPUprefslot2vec R32C:$rA)), + (ORv4i32_i32 R32C:$rA)>; + def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)), (ORv2i64_i64 R64C:$rA)>; @@ -1582,6 +1599,9 @@ def : Pat<(SPUvec2prefslot (v8i16 VECREG:$rA)), def : Pat<(SPUvec2prefslot (v4i32 VECREG:$rA)), (ORi32_v4i32 VECREG:$rA)>; +def : Pat<(SPUvec2prefslot (v2i32 VECREG:$rA)), + (ORi32_v4i32 VECREG:$rA)>; + def : Pat<(SPUvec2prefslot (v2i64 VECREG:$rA)), (ORi64_v2i64 VECREG:$rA)>; @@ -2123,6 +2143,8 @@ multiclass ShuffleBytes def v8i16_m32 : SHUFBVecInst; def v4i32 : SHUFBVecInst; def v4i32_m32 : SHUFBVecInst; + def v2i32 : SHUFBVecInst; + def v2i32_m32 : SHUFBVecInst; def v2i64 : SHUFBVecInst; def v2i64_m32 : SHUFBVecInst; diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td index ed7129e3329..7205593b040 100644 --- a/lib/Target/CellSPU/SPUMathInstr.td +++ b/lib/Target/CellSPU/SPUMathInstr.td @@ -39,7 +39,7 @@ def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), (FSMBIv8i16 0xcccc))>; //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ -// v4i32, i32 multiply instruction sequence: +// v4i32, v2i32, i32 multiply instruction sequence: //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ def MPYv4i32: @@ -49,6 +49,14 @@ def MPYv4i32: (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))), (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>; +def MPYv2i32: + Pat<(mul (v2i32 VECREG:$rA), (v2i32 VECREG:$rB)), + (Av2i32 + (v2i32 (Av2i32 (v2i32 (MPYHv2i32 VECREG:$rA, VECREG:$rB)), + (v2i32 (MPYHv2i32 VECREG:$rB, VECREG:$rA)))), + (v2i32 (MPYUv2i32 VECREG:$rA, VECREG:$rB)))>; + + def MPYi32: Pat<(mul R32C:$rA, R32C:$rB), (Ar32 diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll new file mode 100644 index 00000000000..be3822a8d0c --- /dev/null +++ b/test/CodeGen/CellSPU/v2i32.ll @@ -0,0 +1,57 @@ +;RUN: llc --march=cellspu %s -o - | FileCheck %s +%vec = type <2 x i32> + +define %vec @test_ret(%vec %param) +{ +;CHECK: bi $lr + ret %vec %param +} + +define %vec @test_add(%vec %param) +{ +;CHECK: a $3, $3, $3 + %1 = add %vec %param, %param +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_sub(%vec %param) +{ +;CHECK: sf $3, $4, $3 + %1 = sub %vec %param, + +;CHECK: bi $lr + ret %vec %1 +} + +define %vec @test_mul(%vec %param) +{ +;CHECK: mpyu +;CHECK: mpyh +;CHECK: a +;CHECK: a $3 + %1 = mul %vec %param, %param + +;CHECK: bi $lr + ret %vec %1 +} + +define <2 x i32> @test_splat(i32 %param ) { +;TODO insertelement transforms to a PREFSLOT2VEC, that trasforms to the +; somewhat redundant: +;CHECK-NOT or $3, $3, $3 +;CHECK: lqa +;CHECK: shufb + %sv = insertelement <1 x i32> undef, i32 %param, i32 0 + %rv = shufflevector <1 x i32> %sv, <1 x i32> undef, <2 x i32> zeroinitializer +;CHECK: bi $lr + ret <2 x i32> %rv +} + +define i32 @test_extract() { +;CHECK: shufb $3 + %rv = extractelement <2 x i32> zeroinitializer, i32 undef ; [#uses=1] +;CHECK: bi $lr + ret i32 %rv +} +