From 99534bb81a0d945edd61d4db17549bde8449f94c Mon Sep 17 00:00:00 2001 From: Kalle Raiskila Date: Mon, 9 Aug 2010 16:33:00 +0000 Subject: [PATCH] Have SPU handle halfvec stores aligned by 8 bytes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110576 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CellSPU/SPUISelLowering.cpp | 35 +++++++++++++++++++------- lib/Target/CellSPU/SPUISelLowering.h | 2 ++ lib/Target/CellSPU/SPUInstrInfo.td | 22 ++++++++++++++++ lib/Target/CellSPU/SPUNodes.td | 6 +++++ lib/Target/CellSPU/SPURegisterInfo.cpp | 1 + test/CodeGen/CellSPU/bigstack.ll | 6 ++--- test/CodeGen/CellSPU/v2f32.ll | 12 +++++++++ 7 files changed, 72 insertions(+), 12 deletions(-) diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 7a6bb0f6efe..1dc969b4b69 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -470,6 +470,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + setOperationAction(ISD::STORE, MVT::v2i32, Custom); + setOperationAction(ISD::STORE, MVT::v2f32, Custom); + setShiftAmountType(MVT::i32); setBooleanContents(ZeroOrNegativeOneBooleanContent); @@ -518,6 +521,8 @@ SPUTargetLowering::getTargetNodeName(unsigned Opcode) const node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; + node_names[(unsigned) SPUISD::HALF2VEC] = "SPUISD::HALF2VEC"; + node_names[(unsigned) SPUISD::VEC2HALF] = "SPUISD::VEC2HALF"; } std::map::iterator i = node_names.find(Opcode); @@ -738,12 +743,14 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); DebugLoc dl = Op.getDebugLoc(); unsigned alignment = SN->getAlignment(); + const bool isVec = VT.isVector(); + EVT eltTy = isVec ? VT.getVectorElementType(): VT; switch (SN->getAddressingMode()) { case ISD::UNINDEXED: { // The vector type we really want to load from the 16-byte chunk. EVT vecVT = EVT::getVectorVT(*DAG.getContext(), - VT, (128 / VT.getSizeInBits())); + eltTy, (128 / eltTy.getSizeInBits())); SDValue alignLoadVec; SDValue basePtr = SN->getBasePtr(); @@ -752,7 +759,6 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { if (alignment == 16) { ConstantSDNode *CN; - // Special cases for a known aligned load to simplify the base pointer // and insertion byte: if (basePtr.getOpcode() == ISD::ADD @@ -776,6 +782,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, basePtr, DAG.getConstant(0, PtrVT)); + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); } } else { // Unaligned load: must be more pessimistic about addressing modes: @@ -812,8 +821,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { DAG.getConstant(0, PtrVT)); } - // Re-emit as a v16i8 vector load - alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, + // Load the memory to which to store. + alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr, SN->getSrcValue(), SN->getSrcValueOffset(), SN->isVolatile(), SN->isNonTemporal(), 16); @@ -844,11 +853,19 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { } #endif - SDValue insertEltOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs); - SDValue vectorizeOp = - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue); - + SDValue insertEltOp; + SDValue vectorizeOp; + if (isVec) + { + // FIXME: this works only if the vector is 64bit! + insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v2i64, insertEltOffs); + vectorizeOp = DAG.getNode(SPUISD::HALF2VEC, dl, vecVT, theValue); + } + else + { + insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs); + vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue); + } result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, vectorizeOp, alignLoadVec, DAG.getNode(ISD::BIT_CONVERT, dl, diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 6d3c90b7512..4022679c9ec 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -54,6 +54,8 @@ namespace llvm { ADD64_MARKER, ///< i64 addition marker SUB64_MARKER, ///< i64 subtraction marker MUL64_MARKER, ///< i64 multiply marker + HALF2VEC, ///< Promote 64 bit vector to 128 bits + VEC2HALF, ///< Extract first 64 bits from 128 bit vector LAST_SPUISD ///< Last user-defined instruction }; } diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index f1d08867180..ff776fbe231 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -1468,6 +1468,9 @@ class ORCvtGPRCVec: class ORCvtVecGPRC: ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>; +class ORCvtVecVec: + ORCvtForm<(outs VECREG:$rT), (ins VECREG:$rA)>; + multiclass BitwiseOr { def v16i8: ORVecInst; @@ -1514,6 +1517,13 @@ multiclass BitwiseOr def f32_v4f32: ORExtractElt; def f64_v2f64: ORExtractElt; + // half <-> full vector mappings + def v2i32_v4i32: ORCvtVecVec; + def v4i32_v2i32: ORCvtVecVec; + def v2f32_v4f32: ORCvtVecVec; + def v4f32_v2f32: ORCvtVecVec; + + // Conversion from vector to GPRC def i128_vec: ORCvtVecGPRC; @@ -1623,6 +1633,18 @@ def : Pat<(SPUvec2prefslot (v2f32 VECREG:$rA)), def : Pat<(SPUvec2prefslot (v2f64 VECREG:$rA)), (ORf64_v2f64 VECREG:$rA)>; +// Conversions between 64 bit and 128 bit vectors. + +def : Pat<(v4i32 (SPUhalf2vec (v2i32 VECREG:$rA))), + (ORv4i32_v2i32 (v2i32 VECREG:$rA))>; +def : Pat<(v4f32 (SPUhalf2vec (v2f32 VECREG:$rA))), + (ORv4f32_v2f32 (v2f32 VECREG:$rA))>; + +def : Pat<(v2i32 (SPUvec2half (v4i32 VECREG:$rA))), + (ORv2i32_v4i32 VECREG:$rA)>; +def : Pat<(v2f32 (SPUvec2half (v4f32 VECREG:$rA))), + (ORv2f32_v4f32 VECREG:$rA)>; + // Load Register: This is an assembler alias for a bitwise OR of a register // against itself. It's here because it brings some clarity to assembly // language output. diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 647da3051d3..dbacfc856d4 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -117,6 +117,12 @@ def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>; def SPU_vec_demote : SDTypeProfile<1, 1, []>; def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>; +def SPU_half_2_vec : SDTypeProfile<1, 1, []>; +def SPUhalf2vec: SDNode<"SPUISD::HALF2VEC", SPU_half_2_vec, []>; + +def SPU_vec_2_half : SDTypeProfile<1, 1, []>; +def SPUvec2half: SDNode<"SPUISD::VEC2HALF", SPU_vec_2_half, []>; + // Address high and low components, used for [r+r] type addressing def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>; def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>; diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index a98c1a14ac8..9927d55af2e 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -587,6 +587,7 @@ SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const case SPU::LQDr32: return SPU::LQXr32; case SPU::LQDr128: return SPU::LQXr128; case SPU::LQDv16i8: return SPU::LQXv16i8; + case SPU::LQDv4i32: return SPU::LQXv4i32; case SPU::LQDv4f32: return SPU::LQXv4f32; case SPU::STQDr32: return SPU::STQXr32; case SPU::STQDr128: return SPU::STQXr128; diff --git a/test/CodeGen/CellSPU/bigstack.ll b/test/CodeGen/CellSPU/bigstack.ll index 5483f463732..63293e2aecb 100644 --- a/test/CodeGen/CellSPU/bigstack.ll +++ b/test/CodeGen/CellSPU/bigstack.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=cellspu -o %t1.s -; RUN: grep lqx %t1.s | count 4 -; RUN: grep il %t1.s | grep -v file | count 7 -; RUN: grep stqx %t1.s | count 2 +; RUN: grep lqx %t1.s | count 3 +; RUN: grep il %t1.s | grep -v file | count 5 +; RUN: grep stqx %t1.s | count 1 define i32 @bigstack() nounwind { entry: diff --git a/test/CodeGen/CellSPU/v2f32.ll b/test/CodeGen/CellSPU/v2f32.ll index b32c23b4280..3249631a3c7 100644 --- a/test/CodeGen/CellSPU/v2f32.ll +++ b/test/CodeGen/CellSPU/v2f32.ll @@ -61,3 +61,15 @@ define %vec @test_insert(){ ret %vec %rv } +define void @test_unaligned_store() { +;CHECK: cdd $3, 8($3) +;CHECK: lqd +;CHECK: shufb +;CHECK: stqd + %data = alloca [4 x float], align 16 ; <[4 x float]*> [#uses=1] + %ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; [#uses=1] + %vptr = bitcast float* %ptr to <2 x float>* ; <[1 x <2 x float>]*> [#uses=1] + store <2 x float> undef, <2 x float>* %vptr + ret void +} +