diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index edb6c9cfe84..9e7f5466b26 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -183,6 +183,11 @@ private: /// QuadDRegs - Form a quad register pair from a quad of D registers. /// SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); + + /// OctoDRegs - Form 8 consecutive D registers. + /// + SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3, + SDValue V4, SDValue V5, SDValue V6, SDValue V7); }; } @@ -991,6 +996,26 @@ SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); } +/// OctoDRegs - Form 8 consecutive D registers. +/// +SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3, + SDValue V4, SDValue V5, + SDValue V6, SDValue V7) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::DSUBREG_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::DSUBREG_3, MVT::i32); + SDValue SubReg4 = CurDAG->getTargetConstant(ARM::DSUBREG_4, MVT::i32); + SDValue SubReg5 = CurDAG->getTargetConstant(ARM::DSUBREG_5, MVT::i32); + SDValue SubReg6 = CurDAG->getTargetConstant(ARM::DSUBREG_6, MVT::i32); + SDValue SubReg7 = CurDAG->getTargetConstant(ARM::DSUBREG_7, MVT::i32); + const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3, + V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16); +} + /// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type /// for a 64-bit subregister of the vector. static EVT GetNEONSubregVT(EVT VT) { @@ -1065,18 +1090,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); } - SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, VT, RegSeq); - ReplaceUses(SDValue(N, 0), D0); - SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, VT, RegSeq); - ReplaceUses(SDValue(N, 1), D1); - - if (NumVecs > 2) { - SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_2, dl, VT, RegSeq); - ReplaceUses(SDValue(N, 2), D2); - } - if (NumVecs > 3) { - SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_3, dl, VT, RegSeq); - ReplaceUses(SDValue(N, 3), D3); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue D = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0+Vec, + dl, VT, RegSeq); + ReplaceUses(SDValue(N, Vec), D); } ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs)); return NULL; @@ -1134,10 +1151,36 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); Chain = SDValue(VLdB, NumVecs+1); - // Combine the even and odd subregs to produce the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + if (llvm::ModelWithRegSequence()) { + SDValue V0 = SDValue(VLdA, 0); + SDValue V1 = SDValue(VLdB, 0); + SDValue V2 = SDValue(VLdA, 1); + SDValue V3 = SDValue(VLdB, 1); + SDValue V4 = SDValue(VLdA, 2); + SDValue V5 = SDValue(VLdB, 2); + SDValue V6 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), + 0) + : SDValue(VLdA, 3); + SDValue V7 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), + 0) + : SDValue(VLdB, 3); + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, + V4, V5, V6, V7), 0); + + // Extract out the 3 / 4 Q registers. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue Q = CurDAG->getTargetExtractSubreg(ARM::QSUBREG_0+Vec, + dl, VT, RegSeq); + ReplaceUses(SDValue(N, Vec), Q); + } + } else { + // Combine the even and odd subregs to produce the result. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec)); + ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); + } } } ReplaceUses(SDValue(N, NumVecs), Chain); diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index 6c43e698c1b..017e6f74439 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -33,7 +33,8 @@ namespace { private: bool FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs) const; + unsigned FirstOpnd, unsigned NumRegs, + unsigned Offset, unsigned Stride) const; bool PreAllocNEONRegisters(MachineBasicBlock &MBB); }; @@ -340,13 +341,16 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, bool NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, - unsigned FirstOpnd, unsigned NumRegs) const { + unsigned FirstOpnd, unsigned NumRegs, + unsigned Offset, unsigned Stride) const { MachineOperand &FMO = MI->getOperand(FirstOpnd); assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand"); unsigned VirtReg = FMO.getReg(); (void)VirtReg; assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "expected a virtual register"); + + unsigned LastSubIdx = 0; if (FMO.isDef()) { MachineInstr *RegSeq = 0; for (unsigned R = 0; R < NumRegs; ++R) { @@ -363,13 +367,28 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, return false; if (RegSeq && RegSeq != UseMI) return false; + unsigned OpIdx = 1 + (Offset + R * Stride) * 2; + if (UseMI->getOperand(OpIdx).getReg() != VirtReg) + llvm_unreachable("Malformed REG_SEQUENCE instruction!"); + unsigned SubIdx = UseMI->getOperand(OpIdx + 1).getImm(); + if (LastSubIdx) { + if (LastSubIdx != SubIdx-Stride) + return false; + } else { + // Must start from arm_dsubreg_0 or arm_qsubreg_0. + if (SubIdx != (ARM::DSUBREG_0+Offset) && + SubIdx != (ARM::QSUBREG_0+Offset)) + return false; + } RegSeq = UseMI; + LastSubIdx = SubIdx; } - // Make sure trailing operands of REG_SEQUENCE are undef. - unsigned NumExps = (RegSeq->getNumOperands() - 1) / 2; - for (unsigned i = NumRegs * 2 + 1; i < NumExps; i += 2) { - const MachineOperand &MO = RegSeq->getOperand(i); + // In the case of vld3, etc., make sure the trailing operand of + // REG_SEQUENCE is an undef. + if (NumRegs == 3) { + unsigned OpIdx = 1 + (Offset + 3 * Stride) * 2; + const MachineOperand &MO = RegSeq->getOperand(OpIdx); unsigned VirtReg = MO.getReg(); MachineInstr *DefMI = MRI->getVRegDef(VirtReg); if (!DefMI || !DefMI->isImplicitDef()) @@ -379,7 +398,6 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, } unsigned LastSrcReg = 0; - unsigned LastSubIdx = 0; SmallVector SubIds; for (unsigned R = 0; R < NumRegs; ++R) { const MachineOperand &MO = MI->getOperand(FirstOpnd + R); @@ -400,11 +418,12 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, return false; unsigned SubIdx = DefMI->getOperand(2).getImm(); if (LastSubIdx) { - if (LastSubIdx != SubIdx-1) + if (LastSubIdx != SubIdx-Stride) return false; } else { // Must start from arm_dsubreg_0 or arm_qsubreg_0. - if (SubIdx != ARM::DSUBREG_0 && SubIdx != ARM::QSUBREG_0) + if (SubIdx != (ARM::DSUBREG_0+Offset) && + SubIdx != (ARM::QSUBREG_0+Offset)) return false; } SubIds.push_back(SubIdx); @@ -447,7 +466,7 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) continue; if (llvm::ModelWithRegSequence() && - FormsRegSequence(MI, FirstOpnd, NumRegs)) + FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) continue; MachineBasicBlock::iterator NextI = llvm::next(MBBI);