diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 0bff9876e4f..4d54a2303d6 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -255,10 +255,10 @@ def int_aarch64_neon_vqshlu_n : Neon_N2V_Intrinsic; def int_aarch64_neon_vqshlus_n : Neon_N2V_Intrinsic; // Shift Right And Insert (Immediate) -def int_aarch64_neon_vsrid_n : Neon_2Arg_ShiftImm_Intrinsic; +def int_aarch64_neon_vsrid_n : Neon_3Arg_ShiftImm_Intrinsic; // Shift Left And Insert (Immediate) -def int_aarch64_neon_vslid_n : Neon_2Arg_ShiftImm_Intrinsic; +def int_aarch64_neon_vslid_n : Neon_3Arg_ShiftImm_Intrinsic; // Scalar Signed Fixed-point Convert To Floating-Point (Immediate) def int_aarch64_neon_vcvtf32_n_s32 : diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td index c53909edce6..95e54f2fcb3 100644 --- a/lib/Target/AArch64/AArch64InstrNEON.td +++ b/lib/Target/AArch64/AArch64InstrNEON.td @@ -4017,7 +4017,7 @@ multiclass NeonI_ScalarShiftLeftImm_BHSD_size opcode, } } -class NeonI_ScalarShiftImm_accum_D_size opcode, string asmop> +class NeonI_ScalarShiftRightImm_accum_D_size opcode, string asmop> : NeonI_ScalarShiftImm opcode, string asmop> let Constraints = "$Src = $Rd"; } +class NeonI_ScalarShiftLeftImm_accum_D_size opcode, string asmop> + : NeonI_ScalarShiftImm { + bits<6> Imm; + let Inst{22} = 0b1; // immh:immb = 1xxxxxx + let Inst{21-16} = Imm; + let Constraints = "$Src = $Rd"; +} + class NeonI_ScalarShiftImm_narrow_size opcode, string asmop, RegisterClass FPRCD, RegisterClass FPRCS, Operand ImmTy> @@ -4092,7 +4103,7 @@ multiclass Neon_ScalarShiftImm_BHSD_size_patterns + Instruction INSTD> : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), (i32 imm:$Imm))), (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>; @@ -4146,19 +4157,19 @@ defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">; defm : Neon_ScalarShiftImm_D_size_patterns; // Scalar Signed Shift Right and Accumulate (Immediate) -def SSRA : NeonI_ScalarShiftImm_accum_D_size<0b0, 0b00010, "ssra">; +def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">; def : Neon_ScalarShiftImm_accum_D_size_patterns; // Scalar Unsigned Shift Right and Accumulate (Immediate) -def USRA : NeonI_ScalarShiftImm_accum_D_size<0b1, 0b00010, "usra">; +def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">; def : Neon_ScalarShiftImm_accum_D_size_patterns; // Scalar Signed Rounding Shift Right and Accumulate (Immediate) -def SRSRA : NeonI_ScalarShiftImm_accum_D_size<0b0, 0b00110, "srsra">; +def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">; def : Neon_ScalarShiftImm_accum_D_size_patterns; // Scalar Unsigned Rounding Shift Right and Accumulate (Immediate) -def URSRA : NeonI_ScalarShiftImm_accum_D_size<0b1, 0b00110, "ursra">; +def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">; def : Neon_ScalarShiftImm_accum_D_size_patterns; // Scalar Shift Left (Immediate) @@ -4184,12 +4195,12 @@ defm : Neon_ScalarShiftImm_BHSD_size_patterns; // Shift Right And Insert (Immediate) -defm SRI : NeonI_ScalarShiftRightImm_D_size<0b1, 0b01000, "sri">; -defm : Neon_ScalarShiftImm_D_size_patterns; +def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">; +def : Neon_ScalarShiftImm_accum_D_size_patterns; // Shift Left And Insert (Immediate) -defm SLI : NeonI_ScalarShiftLeftImm_D_size<0b1, 0b01010, "sli">; -defm : Neon_ScalarShiftImm_D_size_patterns; +def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">; +def : Neon_ScalarShiftImm_accum_D_size_patterns; // Signed Saturating Shift Right Narrow (Immediate) defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">; diff --git a/test/CodeGen/AArch64/neon-scalar-shift-imm.ll b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll index b11540f80ae..7e099a31a3b 100644 --- a/test/CodeGen/AArch64/neon-scalar-shift-imm.ll +++ b/test/CodeGen/AArch64/neon-scalar-shift-imm.ll @@ -266,47 +266,51 @@ entry: declare <1 x i64> @llvm.aarch64.neon.vqshlus.n.v1i64(<1 x i64>, i32) -define i64 @test_vsrid_n_s64(i64 %a) { +define i64 @test_vsrid_n_s64(i64 %a, i64 %b) { ; CHECK: test_vsrid_n_s64 ; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63 entry: %vsri = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsri1 = call <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64> %vsri, i32 63) - %0 = extractelement <1 x i64> %vsri1, i32 0 + %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0 + %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63) + %0 = extractelement <1 x i64> %vsri2, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64>, i32) +declare <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64>, <1 x i64>, i32) -define i64 @test_vsrid_n_u64(i64 %a) { +define i64 @test_vsrid_n_u64(i64 %a, i64 %b) { ; CHECK: test_vsrid_n_u64 ; CHECK: sri {{d[0-9]+}}, {{d[0-9]+}}, #63 entry: %vsri = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsri1 = call <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64> %vsri, i32 63) - %0 = extractelement <1 x i64> %vsri1, i32 0 + %vsri1 = insertelement <1 x i64> undef, i64 %b, i32 0 + %vsri2 = call <1 x i64> @llvm.aarch64.neon.vsrid.n(<1 x i64> %vsri, <1 x i64> %vsri1, i32 63) + %0 = extractelement <1 x i64> %vsri2, i32 0 ret i64 %0 } -define i64 @test_vslid_n_s64(i64 %a) { +define i64 @test_vslid_n_s64(i64 %a, i64 %b) { ; CHECK: test_vslid_n_s64 ; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63 entry: %vsli = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsli1 = call <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64> %vsli, i32 63) - %0 = extractelement <1 x i64> %vsli1, i32 0 + %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0 + %vsli2 = call <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63) + %0 = extractelement <1 x i64> %vsli2, i32 0 ret i64 %0 } -declare <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64>, i32) +declare <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64>, <1 x i64>, i32) -define i64 @test_vslid_n_u64(i64 %a) { +define i64 @test_vslid_n_u64(i64 %a, i64 %b) { ; CHECK: test_vslid_n_u64 ; CHECK: sli {{d[0-9]+}}, {{d[0-9]+}}, #63 entry: %vsli = insertelement <1 x i64> undef, i64 %a, i32 0 - %vsli1 = call <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64> %vsli, i32 63) - %0 = extractelement <1 x i64> %vsli1, i32 0 + %vsli1 = insertelement <1 x i64> undef, i64 %b, i32 0 + %vsli2 = call <1 x i64> @llvm.aarch64.neon.vslid.n(<1 x i64> %vsli, <1 x i64> %vsli1, i32 63) + %0 = extractelement <1 x i64> %vsli2, i32 0 ret i64 %0 }