diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index 9849fe44b22..05fe22bed07 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -5954,23 +5954,28 @@ def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> { let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; } -multiclass NeonI_Scalar_DUP_Elt_pattern { - def : Pat<(ResTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)), - (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; +def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 0)), + (f32 (EXTRACT_SUBREG (v4f32 VPR128:$Rn), sub_32))>; +def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 1)), + (f32 (DUPsv_S (v4f32 VPR128:$Rn), 1))>; +def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 2)), + (f32 (DUPsv_S (v4f32 VPR128:$Rn), 2))>; +def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 3)), + (f32 (DUPsv_S (v4f32 VPR128:$Rn), 3))>; - def : Pat<(ResTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)), - (ResTy (DUPI - (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), - OpNImm:$Imm))>; -} +def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 0)), + (f64 (EXTRACT_SUBREG (v2f64 VPR128:$Rn), sub_64))>; +def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 1)), + (f64 (DUPdv_D (v2f64 VPR128:$Rn), 1))>; -// Patterns for vector extract of FP data using scalar DUP instructions -defm : NeonI_Scalar_DUP_Elt_pattern; -defm : NeonI_Scalar_DUP_Elt_pattern; +def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 0)), + (f32 (EXTRACT_SUBREG (v2f32 VPR64:$Rn), sub_32))>; +def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 1)), + (f32 (DUPsv_S (v4f32 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + 1))>; + +def : Pat<(f64 (vector_extract (v1f64 VPR64:$Rn), 0)), + (f64 (EXTRACT_SUBREG (v1f64 VPR64:$Rn), sub_64))>; multiclass NeonI_Scalar_DUP_Ext_Vec_pattern %v) { ret float %tmp1 } +define float @test_dup_sv2S_0(<2 x float> %v) { + ;CHECK-LABEL: test_dup_sv2S_0 + ;CHECK-NOT: dup {{s[0-9]+}}, {{v[0-9]+}}.s[0] + ;CHECK: ret + %tmp1 = extractelement <2 x float> %v, i32 0 + ret float %tmp1 +} + define float @test_dup_sv4S(<4 x float> %v) { - ;CHECK: test_dup_sv4S - ;CHECK: dup {{s[0-9]+}}, {{v[0-9]+}}.s[0] + ;CHECK-LABEL: test_dup_sv4S + ;CHECK-NOT: dup {{s[0-9]+}}, {{v[0-9]+}}.s[0] + ;CHECK: ret %tmp1 = extractelement <4 x float> %v, i32 0 ret float %tmp1 } @@ -29,6 +38,14 @@ define double @test_dup_dv2D(<2 x double> %v) { ret double %tmp1 } +define double @test_dup_dv2D_0(<2 x double> %v) { + ;CHECK: test_dup_dv2D_0 + ;CHECK-NOT: dup {{d[0-9]+}}, {{v[0-9]+}}.d[0] + ;CHECK: ret + %tmp1 = extractelement <2 x double> %v, i32 1 + ret double %tmp1 +} + define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) { ;CHECK: test_vector_dup_bv16B ;CHECK: dup {{b[0-9]+}}, {{v[0-9]+}}.b[14]