From 4a3d35abefa3a1f6558ef88b25f2a320c76d5328 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Wed, 5 Aug 2009 00:49:09 +0000 Subject: [PATCH] Change DAG nodes for Neon VLD2/3/4 operations to return multiple results. Get rid of yesterday's code to fix the register usage during isel. Select the new DAG nodes to machine instructions. The new pre-alloc pass to choose adjacent registers for these results is not done, so the results of this will generally not assemble yet. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78136 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 58 ++++++++++++++++++++++++++++- lib/Target/ARM/ARMISelLowering.cpp | 29 +++------------ lib/Target/ARM/ARMInstrNEON.td | 50 +++++++++++++++++++++---- lib/Target/ARM/ARMTargetMachine.cpp | 2 + 4 files changed, 108 insertions(+), 31 deletions(-) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 8ef541f621d..a773916eb28 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1284,7 +1284,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { MVT HalfVT; unsigned Opc = 0; switch (VT.getVectorElementType().getSimpleVT()) { - default: assert(false && "unhandled VDUP splat type"); + default: llvm_unreachable("unhandled VDUP splat type"); case MVT::i8: Opc = ARM::VDUPLN8q; HalfVT = MVT::v8i8; break; case MVT::i16: Opc = ARM::VDUPLN16q; HalfVT = MVT::v4i16; break; case MVT::i32: Opc = ARM::VDUPLN32q; HalfVT = MVT::v2i32; break; @@ -1304,6 +1304,62 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { break; } + + case ARMISD::VLD2D: { + MVT VT = Op.getValueType(); + SDValue MemAddr, MemUpdate, MemOpc; + if (!SelectAddrMode6(Op, N->getOperand(1), MemAddr, MemUpdate, MemOpc)) + return NULL; + unsigned Opc; + switch (VT.getSimpleVT()) { + default: llvm_unreachable("unhandled VLD2D type"); + case MVT::v8i8: Opc = ARM::VLD2d8; break; + case MVT::v4i16: Opc = ARM::VLD2d16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VLD2d32; break; + case MVT::v1i64: Opc = ARM::VLD2d64; break; + } + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc }; + return CurDAG->getTargetNode(Opc, dl, VT, VT, MVT::Other, Ops, 3); + } + + case ARMISD::VLD3D: { + MVT VT = Op.getValueType(); + SDValue MemAddr, MemUpdate, MemOpc; + if (!SelectAddrMode6(Op, N->getOperand(1), MemAddr, MemUpdate, MemOpc)) + return NULL; + unsigned Opc; + switch (VT.getSimpleVT()) { + default: llvm_unreachable("unhandled VLD3D type"); + case MVT::v8i8: Opc = ARM::VLD3d8; break; + case MVT::v4i16: Opc = ARM::VLD3d16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VLD3d32; break; + case MVT::v1i64: Opc = ARM::VLD3d64; break; + } + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc }; + return CurDAG->getTargetNode(Opc, dl, VT, VT, VT, MVT::Other, Ops, 3); + } + + case ARMISD::VLD4D: { + MVT VT = Op.getValueType(); + SDValue MemAddr, MemUpdate, MemOpc; + if (!SelectAddrMode6(Op, N->getOperand(1), MemAddr, MemUpdate, MemOpc)) + return NULL; + unsigned Opc; + switch (VT.getSimpleVT()) { + default: llvm_unreachable("unhandled VLD4D type"); + case MVT::v8i8: Opc = ARM::VLD4d8; break; + case MVT::v4i16: Opc = ARM::VLD4d16; break; + case MVT::v2f32: + case MVT::v2i32: Opc = ARM::VLD4d32; break; + case MVT::v1i64: Opc = ARM::VLD4d64; break; + } + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc }; + std::vector ResTys(4, VT); + ResTys.push_back(MVT::Other); + return CurDAG->getTargetNode(Opc, dl, ResTys, Ops, 3); + } } return SelectCode(Op); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index eb7754745cb..987fc7fdc36 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1323,7 +1323,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, } static SDValue LowerNeonVLDIntrinsic(SDValue Op, SelectionDAG &DAG, - unsigned Opcode, unsigned NumVecs) { + unsigned Opcode) { SDNode *Node = Op.getNode(); MVT VT = Node->getValueType(0); DebugLoc dl = Op.getDebugLoc(); @@ -1332,25 +1332,8 @@ static SDValue LowerNeonVLDIntrinsic(SDValue Op, SelectionDAG &DAG, return SDValue(); // unimplemented SDValue Ops[] = { Node->getOperand(0), - Node->getOperand(1) }; - SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); - SDValue Result = DAG.getNode(Opcode, dl, Tys, Ops, 2); - - static const unsigned VLDRegs[] = { - ARM::D0, ARM::D1, ARM::D2, ARM::D3 - }; - - SmallVector ResultVals; - SDValue Chain = Result.getValue(0); - SDValue Flag = Result.getValue(1); - for (unsigned N = 0; N < NumVecs; ++N) { - Chain = DAG.getCopyFromReg(Chain, dl, VLDRegs[N], VT, Flag).getValue(1); - ResultVals.push_back(Chain.getValue(0)); - Flag = Chain.getValue(2); - } - ResultVals.push_back(Chain); - return DAG.getNode(ISD::MERGE_VALUES, dl, Node->getVTList(), - ResultVals.data(), NumVecs + 1); + Node->getOperand(2) }; + return DAG.getNode(Opcode, dl, Node->getVTList(), Ops, 2); } SDValue @@ -1359,13 +1342,13 @@ ARMTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) { switch (IntNo) { case Intrinsic::arm_neon_vld2i: case Intrinsic::arm_neon_vld2f: - return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD2D, 2); + return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD2D); case Intrinsic::arm_neon_vld3i: case Intrinsic::arm_neon_vld3f: - return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD3D, 3); + return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD3D); case Intrinsic::arm_neon_vld4i: case Intrinsic::arm_neon_vld4f: - return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD4D, 4); + return LowerNeonVLDIntrinsic(Op, DAG, ARMISD::VLD4D); case Intrinsic::arm_neon_vst2i: case Intrinsic::arm_neon_vst2f: case Intrinsic::arm_neon_vst3i: diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 540bd2f57f9..e69ae6151ed 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -68,13 +68,18 @@ def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; def NEONvduplaneq : SDNode<"ARMISD::VDUPLANEQ", SDTypeProfile<1, 2, [SDTCisVT<2, i32>]>>; -def SDTARMVLD : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; -def NEONvld2d : SDNode<"ARMISD::VLD2D", SDTARMVLD, - [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>; -def NEONvld3d : SDNode<"ARMISD::VLD3D", SDTARMVLD, - [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>; -def NEONvld4d : SDNode<"ARMISD::VLD4D", SDTARMVLD, - [SDNPHasChain, SDNPOutFlag, SDNPMayLoad]>; +def SDTARMVLD2 : SDTypeProfile<2, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; +def SDTARMVLD3 : SDTypeProfile<3, 1, [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, SDTCisPtrTy<3>]>; +def SDTARMVLD4 : SDTypeProfile<4, 1, [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>, SDTCisPtrTy<4>]>; +def NEONvld2d : SDNode<"ARMISD::VLD2D", SDTARMVLD2, + [SDNPHasChain, SDNPMayLoad]>; +def NEONvld3d : SDNode<"ARMISD::VLD3D", SDTARMVLD3, + [SDNPHasChain, SDNPMayLoad]>; +def NEONvld4d : SDNode<"ARMISD::VLD4D", SDTARMVLD4, + [SDNPHasChain, SDNPMayLoad]>; //===----------------------------------------------------------------------===// // NEON operand definitions @@ -183,6 +188,37 @@ def VST1q32 : VST1Q<"vst1.32", v4i32, int_arm_neon_vst1i>; def VST1qf : VST1Q<"vst1.32", v4f32, int_arm_neon_vst1f>; def VST1q64 : VST1Q<"vst1.64", v2i64, int_arm_neon_vst1i>; +// VLD2 : Vector Load (multiple 2-element structures) +class VLD2D + : NLdSt<(outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), + !strconcat(OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr"), []>; + +def VLD2d8 : VLD2D<"vld2.8">; +def VLD2d16 : VLD2D<"vld2.16">; +def VLD2d32 : VLD2D<"vld2.32">; +def VLD2d64 : VLD2D<"vld2.64">; + +// VLD3 : Vector Load (multiple 3-element structures) +class VLD3D + : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), + !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), []>; + +def VLD3d8 : VLD3D<"vld3.8">; +def VLD3d16 : VLD3D<"vld3.16">; +def VLD3d32 : VLD3D<"vld3.32">; +def VLD3d64 : VLD3D<"vld3.64">; + +// VLD4 : Vector Load (multiple 4-element structures) +class VLD4D + : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr), + !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), []>; + +def VLD4d8 : VLD4D<"vld4.8">; +def VLD4d16 : VLD4D<"vld4.16">; +def VLD4d32 : VLD4D<"vld4.32">; +def VLD4d64 : VLD4D<"vld4.64">; + //===----------------------------------------------------------------------===// // NEON pattern fragments diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index a6e987b086b..ff0b1d3c712 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -93,6 +93,8 @@ bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM, bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + // Call NEON pre-alloc pass here. + // FIXME: temporarily disabling load / store optimization pass for Thumb mode. if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb()) PM.add(createARMLoadStoreOptimizationPass(true));