diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 53f2282c4f0..0865ab841ad 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -130,6 +130,10 @@ private: virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode, std::vector &OutOps); + + /// PairDRegs - Insert a pair of double registers into an implicit def to + /// form a quad register. + SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1); }; } @@ -923,6 +927,20 @@ SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDValue Op) { return 0; } +/// PairDRegs - Insert a pair of double registers into an implicit def to +/// form a quad register. +SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue Undef = + SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, dl, VT), 0); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32); + SDNode *Pair = CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, + VT, Undef, V0, SubReg0); + return CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, + VT, SDValue(Pair, 0), V1, SubReg1); +} + SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); @@ -1332,16 +1350,33 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDValue MemAddr, MemUpdate, MemOpc; if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) return NULL; + EVT RegVT = VT; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vld2 type"); case MVT::v8i8: Opc = ARM::VLD2d8; break; case MVT::v4i16: Opc = ARM::VLD2d16; break; case MVT::v2f32: case MVT::v2i32: Opc = ARM::VLD2d32; break; + case MVT::v16i8: Opc = ARM::VLD2q8; RegVT = MVT::v8i8; break; + case MVT::v8i16: Opc = ARM::VLD2q16; RegVT = MVT::v4i16; break; + case MVT::v4f32: Opc = ARM::VLD2q32; RegVT = MVT::v2f32; break; + case MVT::v4i32: Opc = ARM::VLD2q32; RegVT = MVT::v2i32; break; } SDValue Chain = N->getOperand(0); const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; - return CurDAG->getMachineNode(Opc, dl, VT, VT, MVT::Other, Ops, 4); + if (RegVT == VT) + return CurDAG->getMachineNode(Opc, dl, VT, VT, MVT::Other, Ops, 4); + + // Quad registers are loaded as pairs of double registers. + std::vector ResTys(4, RegVT); + ResTys.push_back(MVT::Other); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4); + SDNode *Q0 = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); + SDNode *Q1 = PairDRegs(VT, SDValue(VLd, 2), SDValue(VLd, 3)); + ReplaceUses(SDValue(N, 0), SDValue(Q0, 0)); + ReplaceUses(SDValue(N, 1), SDValue(Q1, 0)); + ReplaceUses(SDValue(N, 2), SDValue(VLd, 4)); + return NULL; } case Intrinsic::arm_neon_vld3: { diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index b9dbd6f3846..3f07d302b03 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -182,11 +182,20 @@ let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { class VLD2D : NLdSt<(outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD2, !strconcat(OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr"), "", []>; +class VLD2Q + : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr), IIC_VLD2, + !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), + "", []>; def VLD2d8 : VLD2D<"vld2.8">; def VLD2d16 : VLD2D<"vld2.16">; def VLD2d32 : VLD2D<"vld2.32">; +def VLD2q8 : VLD2Q<"vld2.8">; +def VLD2q16 : VLD2Q<"vld2.16">; +def VLD2q32 : VLD2Q<"vld2.32">; + // VLD3 : Vector Load (multiple 3-element structures) class VLD3D : NLdSt<(outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 8edfb9a2057..041afd04140 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -23,6 +23,16 @@ namespace llvm { class ARMBaseInstrInfo; class Type; +namespace ARM { + /// SubregIndex - The index of various subregister classes. Note that + /// these indices must be kept in sync with the class indices in the + /// ARMRegisterInfo.td file. + enum SubregIndex { + SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4, + DSUBREG_0 = 5, DSUBREG_1 = 6 + }; +} + struct ARMRegisterInfo : public ARMBaseRegisterInfo { public: ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index 9edb44b4daa..da1c662edb6 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -52,6 +52,13 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, NumRegs = 2; return true; + case ARM::VLD2q8: + case ARM::VLD2q16: + case ARM::VLD2q32: + FirstOpnd = 0; + NumRegs = 4; + return true; + case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: