diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 3bddca02c3f..235bf36a67f 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -143,6 +143,11 @@ private: unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); + /// SelectVTBL - Select NEON VTBL intrinsics. NumVecs should be 2, 3 or 4. + /// These are custom-selected so that a REG_SEQUENCE can be generated to + /// force the table registers to be consecutive. + SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, unsigned Opc); + /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned); @@ -1197,7 +1202,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { - assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range"); + assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, Align; @@ -1524,6 +1529,42 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, return NULL; } +SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs, unsigned Opc) { + assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // Form a REG_SEQUENCE to force register allocation. + SDValue RegSeq; + SDValue V0 = N->getOperand(1); + SDValue V1 = N->getOperand(2); + if (NumVecs == 2) + RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0); + else { + SDValue V2 = N->getOperand(3); + // If it's a vtbl3, form a quad D-register and leave the last part as + // an undef. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) + : N->getOperand(4); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + // Now extract the D registers back out. + SmallVector Ops; + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, RegSeq)); + + Ops.push_back(N->getOperand(NumVecs+1)); + Ops.push_back(getAL(CurDAG)); // predicate + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register + return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), NumVecs+3); +} + SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) @@ -2281,6 +2322,22 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; } + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); + switch (IntNo) { + default: + break; + + case Intrinsic::arm_neon_vtbl2: + return SelectVTBL(N, 2, ARM::VTBL2); + case Intrinsic::arm_neon_vtbl3: + return SelectVTBL(N, 3, ARM::VTBL3); + case Intrinsic::arm_neon_vtbl4: + return SelectVTBL(N, 4, ARM::VTBL4); + } + break; + } + case ISD::CONCAT_VECTORS: return SelectConcatVector(N); } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 3297035deb4..cdd8d5ed9d0 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -3291,22 +3291,16 @@ let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 - DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; + "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", []>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 - DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; + "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", []>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTB4, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, - DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; + "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>; } // hasExtraSrcRegAllocReq = 1 // VTBX : Vector Table Extension