From f1d0b2bedaa065972a5ba17259055c1176cd1497 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 20 Mar 2006 01:53:53 +0000 Subject: [PATCH] Custom lower arbitrary VECTOR_SHUFFLE's to VPERM. TODO: leave specific ones as VECTOR_SHUFFLE's and turn them into specialized operations like vsplt* git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26887 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelLowering.cpp | 46 +++++++++++++++++++++++--- lib/Target/PowerPC/PPCISelLowering.h | 4 +++ lib/Target/PowerPC/PPCInstrInfo.td | 25 ++++++++------ 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 5a2acd46d98..258de780eb6 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -167,6 +167,11 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand); setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Expand); + + // FIXME: We don't support any BUILD_VECTOR's yet. We should custom expand + // the ones we do, like splat(0.0) and splat(-0.0). + setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand); } if (TM.getSubtarget().hasAltivec()) { @@ -179,11 +184,11 @@ PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) setOperationAction(ISD::LOAD , MVT::v4f32, Legal); setOperationAction(ISD::ADD , MVT::v4i32, Legal); setOperationAction(ISD::LOAD , MVT::v4i32, Legal); - // FIXME: We don't support any BUILD_VECTOR's yet. We should custom expand - // the ones we do! - setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Expand); - setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Expand); - + setOperationAction(ISD::LOAD , MVT::v16i8, Legal); + + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); } @@ -209,6 +214,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; case PPCISD::LVE_X: return "PPCISD::LVE_X"; + case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; @@ -566,6 +572,36 @@ SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { return DAG.getNode(PPCISD::LVE_X, Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL)); } + case ISD::VECTOR_SHUFFLE: { + // FIXME: Cases that are handled by instructions that take permute + // immediates (such as vsplt*) shouldn't be lowered here! Also handle cases + // that are cheaper to do as multiple such instructions than as a constant + // pool load/vperm pair. + + // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant + // vector that will get spilled to the constant pool. + SDOperand V1 = Op.getOperand(0); + SDOperand V2 = Op.getOperand(1); + if (V2.getOpcode() == ISD::UNDEF) V2 = V1; + SDOperand PermMask = Op.getOperand(2); + + // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except + // that it is in input element units, not in bytes. Convert now. + MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType()); + unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8; + + std::vector ResultMask; + for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { + unsigned SrcElt =cast(PermMask.getOperand(i))->getValue(); + + for (unsigned j = 0; j != BytesPerElement; ++j) + ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, + MVT::i8)); + } + + SDOperand VPermMask =DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask); + return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask); + } } return SDOperand(); } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 058f85bb075..1e883001a62 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -56,6 +56,10 @@ namespace llvm { /// the third is the SRCVALUE node. LVE_X, + /// VPERM - The PPC VPERM Instruction. + /// + VPERM, + /// Hi/Lo - These represent the high and low 16-bit parts of a global /// address respectively. These nodes have two operands, the first of /// which must be a TargetGlobalAddress, and the second of which must be a diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index a7899c8038e..ce308ee5335 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -26,6 +26,10 @@ def SDT_PPCShiftOp : SDTypeProfile<1, 2, [ // PPCshl, PPCsra, PPCsrl def SDT_PPCCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>; def SDT_PPCRetFlag : SDTypeProfile<0, 0, []>; +def SDT_PPCvperm : SDTypeProfile<1, 3, [ + SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2> +]>; + //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // @@ -46,6 +50,7 @@ def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; def PPClve_x : SDNode<"PPCISD::LVE_X", SDTLoad, [SDNPHasChain]>; +def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift // amounts. These nodes are generated by the multi-precision shift code. @@ -118,15 +123,6 @@ def imm16Shifted : PatLeaf<(imm), [{ return ((unsigned)N->getValue() & 0xFFFF0000U) == (unsigned)N->getValue(); }], HI16>; -/* -// Example of a legalize expander: Only for PPC64. -def : Expander<(set i64:$dst, (fp_to_sint f64:$src)), - [(set f64:$tmp , (FCTIDZ f64:$src)), - (set i32:$tmpFI, (CreateNewFrameIndex 8, 8)), - (store f64:$tmp, i32:$tmpFI), - (set i64:$dst, (load i32:$tmpFI))], - Subtarget_PPC64>; -*/ //===----------------------------------------------------------------------===// // PowerPC Flag Definitions. @@ -956,7 +952,9 @@ def VNMSUBFP: VAForm_1<47, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC), Requires<[FPContractions]>; def VPERM : VAForm_1<43, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC), - "vperm $vD, $vA, $vC, $vB", VecFP, []>; + "vperm $vD, $vA, $vC, $vB", VecFP, + [(set VRRC:$vD, + (PPCvperm (v4f32 VRRC:$vA), VRRC:$vB, VRRC:$vC))]>; // VX-Form instructions. AltiVec arithmetic ops. @@ -1153,6 +1151,13 @@ def : Pat<(f64 (extload xaddr:$src, f32)), def : Pat<(v4i32 (load xoaddr:$src)), (v4i32 (LVX xoaddr:$src))>; +def : Pat<(v16i8 (load xoaddr:$src)), + (v16i8 (LVX xoaddr:$src))>; + + +def : Pat<(PPCvperm (v4i32 VRRC:$vA), VRRC:$vB, VRRC:$vC), + (v4i32 (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC))>; + def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst), (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>; def : Pat<(v4i32 (PPClve_x xoaddr:$src)),