diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp index 448e73ac623..255685232d1 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -480,7 +480,6 @@ void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) { setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, VT.getSimpleVT(), Custom); setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Custom); @@ -1973,8 +1972,6 @@ SDValue ARM64TargetLowering::LowerOperation(SDValue Op, return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); - case ISD::SCALAR_TO_VECTOR: - return LowerSCALAR_TO_VECTOR(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: @@ -5577,53 +5574,6 @@ SDValue ARM64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, Op.getOperand(1)); } -SDValue ARM64TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, - SelectionDAG &DAG) const { - assert(Op.getOpcode() == ISD::SCALAR_TO_VECTOR && "Unknown opcode!"); - // Some AdvSIMD intrinsics leave their results in the scalar B/H/S/D - // registers. The default lowering will copy those to a GPR then back - // to a vector register. Instead, just recognize those cases and reference - // the vector register they're already a subreg of. - SDValue Op0 = Op->getOperand(0); - if (Op0->getOpcode() != ISD::INTRINSIC_WO_CHAIN) - return Op; - unsigned IID = getIntrinsicID(Op0.getNode()); - // The below list of intrinsics isn't exhaustive. Add cases as-needed. - // FIXME: Even better would be if there were an attribute on the node - // that we could query and set in the intrinsics definition or something. - unsigned SubIdx; - switch (IID) { - default: - // Early exit if this isn't one of the intrinsics we handle. - return Op; - case Intrinsic::arm64_neon_uaddv: - case Intrinsic::arm64_neon_saddv: - case Intrinsic::arm64_neon_uaddlv: - case Intrinsic::arm64_neon_saddlv: - switch (Op0.getValueType().getSizeInBits()) { - default: - llvm_unreachable("Illegal result size from ARM64 vector intrinsic!"); - case 8: - SubIdx = ARM64::bsub; - break; - case 16: - SubIdx = ARM64::hsub; - break; - case 32: - SubIdx = ARM64::ssub; - break; - case 64: - SubIdx = ARM64::dsub; - break; - } - } - MachineSDNode *N = - DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(Op), - Op.getValueType(), DAG.getUNDEF(Op.getValueType()), - Op0, DAG.getTargetConstant(SubIdx, MVT::i32)); - return SDValue(N, 0); -} - SDValue ARM64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getOperand(0).getValueType(); diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td index 2fe17204abc..71910a65286 100644 --- a/lib/Target/ARM64/ARM64InstrInfo.td +++ b/lib/Target/ARM64/ARM64InstrInfo.td @@ -1890,31 +1890,39 @@ defm FMOV : UnscaledConversion<"fmov">; def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>; def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>; -def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; -def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>; +def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), + (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), + (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>; -def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), (FMOVDXr V64:$Vn)>; -def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), (FMOVDXr V64:$Vn)>; +def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; +def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), + (COPY_TO_REGCLASS V64:$Vn, GPR64)>; -def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), (COPY_TO_REGCLASS GPR32:$Xn, - FPR32)>; -def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), (COPY_TO_REGCLASS FPR32:$Xn, - GPR32)>; -def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), (COPY_TO_REGCLASS GPR64:$Xn, - FPR64)>; -def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), (COPY_TO_REGCLASS FPR64:$Xn, - GPR64)>; +def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), + (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>; +def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), + (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>; +def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), + (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>; +def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), + (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>; //===----------------------------------------------------------------------===// // Floating point conversion instruction. @@ -2971,16 +2979,18 @@ def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx), defm INS : SIMDIns; def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)), - (INSvi8gpr (v16i8 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn)>; + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)), - (EXTRACT_SUBREG - (INSvi8gpr (v16i8 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn), dsub)>; + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)), - (INSvi16gpr (v8i16 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn)>; + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)), - (EXTRACT_SUBREG - (INSvi16gpr (v8i16 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn), dsub)>; + (SUBREG_TO_REG (i32 0), + (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>; def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))), (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), diff --git a/test/CodeGen/ARM64/vaddv.ll b/test/CodeGen/ARM64/vaddv.ll index 44bfa845db8..3988e0c120e 100644 --- a/test/CodeGen/ARM64/vaddv.ll +++ b/test/CodeGen/ARM64/vaddv.ll @@ -136,6 +136,18 @@ entry: ret i64 %vaddv.i } +define <1 x i64> @test_vaddv_u64_to_vec(<2 x i64> %a1) { +; CHECK-LABEL: test_vaddv_u64_to_vec: +; CHECK: addp.2d d0, v0 +; CHECK-NOT: fmov +; CHECK-NOT: ins +; CHECK: ret +entry: + %vaddv.i = tail call i64 @llvm.arm64.neon.uaddv.i64.v2i64(<2 x i64> %a1) + %vec = insertelement <1 x i64> undef, i64 %vaddv.i, i32 0 + ret <1 x i64> %vec +} + define signext i8 @test_vaddvq_s8(<16 x i8> %a1) { ; CHECK-LABEL: test_vaddvq_s8: ; CHECK: addv.16b b[[REGNUM:[0-9]+]], v0