mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-25 16:30:05 +00:00
[AArch64] Add support for NEON scalar arithmetic instructions:
SQDMULH, SQRDMULH, FMULX, FRECPS, and FRSQRTS. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192107 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
fbe4f5afce
commit
2aeb4771a6
@ -318,9 +318,12 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
|
||||
setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v1i64, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v1f32, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v1f64, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
|
||||
}
|
||||
}
|
||||
|
@ -2991,6 +2991,40 @@ class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
|
||||
[],
|
||||
NoItinerary>;
|
||||
|
||||
multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode,
|
||||
string asmop, bit Commutable = 0>
|
||||
{
|
||||
let isCommutable = Commutable in {
|
||||
def hhh : NeonI_Scalar3Same<u, 0b01, opcode,
|
||||
(outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm),
|
||||
!strconcat(asmop, " $Rd, $Rn, $Rm"),
|
||||
[],
|
||||
NoItinerary>;
|
||||
def sss : NeonI_Scalar3Same<u, 0b10, opcode,
|
||||
(outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
|
||||
!strconcat(asmop, " $Rd, $Rn, $Rm"),
|
||||
[],
|
||||
NoItinerary>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
|
||||
string asmop, bit Commutable = 0>
|
||||
{
|
||||
let isCommutable = Commutable in {
|
||||
def sss : NeonI_Scalar3Same<u, {size_high, 0b0}, opcode,
|
||||
(outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm),
|
||||
!strconcat(asmop, " $Rd, $Rn, $Rm"),
|
||||
[],
|
||||
NoItinerary>;
|
||||
def ddd : NeonI_Scalar3Same<u, {size_high, 0b1}, opcode,
|
||||
(outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm),
|
||||
!strconcat(asmop, " $Rd, $Rn, $Rm"),
|
||||
[],
|
||||
NoItinerary>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
|
||||
string asmop, bit Commutable = 0>
|
||||
{
|
||||
@ -3018,16 +3052,18 @@ multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
|
||||
}
|
||||
}
|
||||
|
||||
multiclass Neon_Scalar_D_size_patterns<SDPatternOperator opnode,
|
||||
Instruction INSTD> {
|
||||
multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
|
||||
Instruction INSTD> {
|
||||
def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
|
||||
(INSTD FPR64:$Rn, FPR64:$Rm)>;
|
||||
}
|
||||
|
||||
multiclass Neon_Scalar_BHSD_size_patterns<SDPatternOperator opnode,
|
||||
Instruction INSTB, Instruction INSTH,
|
||||
Instruction INSTS, Instruction INSTD>
|
||||
: Neon_Scalar_D_size_patterns<opnode, INSTD> {
|
||||
multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
|
||||
Instruction INSTB,
|
||||
Instruction INSTH,
|
||||
Instruction INSTS,
|
||||
Instruction INSTD>
|
||||
: Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
|
||||
def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
|
||||
(INSTB FPR8:$Rn, FPR8:$Rm)>;
|
||||
|
||||
@ -3038,6 +3074,24 @@ multiclass Neon_Scalar_BHSD_size_patterns<SDPatternOperator opnode,
|
||||
(INSTS FPR32:$Rn, FPR32:$Rm)>;
|
||||
}
|
||||
|
||||
multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
|
||||
Instruction INSTH,
|
||||
Instruction INSTS> {
|
||||
def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
|
||||
(INSTH FPR16:$Rn, FPR16:$Rm)>;
|
||||
def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
|
||||
(INSTS FPR32:$Rn, FPR32:$Rm)>;
|
||||
}
|
||||
|
||||
multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
|
||||
Instruction INSTS,
|
||||
Instruction INSTD> {
|
||||
def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))),
|
||||
(INSTS FPR32:$Rn, FPR32:$Rm)>;
|
||||
def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
|
||||
(INSTD FPR64:$Rn, FPR64:$Rm)>;
|
||||
}
|
||||
|
||||
// Scalar Integer Add
|
||||
let isCommutable = 1 in {
|
||||
def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
|
||||
@ -3047,14 +3101,14 @@ def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
|
||||
def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
|
||||
|
||||
// Pattern for Scalar Integer Add and Sub with D register only
|
||||
defm : Neon_Scalar_D_size_patterns<add, ADDddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<sub, SUBddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
|
||||
|
||||
// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
|
||||
|
||||
// Scalar Integer Saturating Add (Signed, Unsigned)
|
||||
defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
|
||||
@ -3066,21 +3120,57 @@ defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
|
||||
|
||||
// Patterns to match llvm.arm.* intrinsic for
|
||||
// Scalar Integer Saturating Add, Sub (Signed, Unsigned)
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
|
||||
|
||||
// Patterns to match llvm.aarch64.* intrinsic for
|
||||
// Scalar Integer Saturating Add, Sub (Signed, Unsigned)
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb, SQADDhhh,
|
||||
SQADDsss, SQADDddd>;
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb, UQADDhhh,
|
||||
UQADDsss, UQADDddd>;
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb, SQSUBhhh,
|
||||
SQSUBsss, SQSUBddd>;
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb, UQSUBhhh,
|
||||
UQSUBsss, UQSUBddd>;
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb,
|
||||
SQADDhhh, SQADDsss, SQADDddd>;
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb,
|
||||
UQADDhhh, UQADDsss, UQADDddd>;
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb,
|
||||
SQSUBhhh, SQSUBsss, SQSUBddd>;
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb,
|
||||
UQSUBhhh, UQSUBsss, UQSUBddd>;
|
||||
|
||||
// Scalar Integer Saturating Doubling Multiply Half High
|
||||
defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
|
||||
|
||||
// Scalar Integer Saturating Rounding Doubling Multiply Half High
|
||||
defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
|
||||
|
||||
// Patterns to match llvm.arm.* intrinsic for
|
||||
// Scalar Integer Saturating Doubling Multiply Half High and
|
||||
// Scalar Integer Saturating Rounding Doubling Multiply Half High
|
||||
defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
|
||||
SQDMULHsss>;
|
||||
defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
|
||||
SQRDMULHsss>;
|
||||
|
||||
// Scalar Floating-point Multiply Extended
|
||||
defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
|
||||
|
||||
// Scalar Floating-point Reciprocal Step
|
||||
defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
|
||||
|
||||
// Scalar Floating-point Reciprocal Square Root Step
|
||||
defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
|
||||
|
||||
// Patterns to match llvm.arm.* intrinsic for
|
||||
// Scalar Floating-point Reciprocal Step and
|
||||
// Scalar Floating-point Reciprocal Square Root Step
|
||||
defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrecps, FRECPSsss,
|
||||
FRECPSddd>;
|
||||
defm : Neon_Scalar3Same_SD_size_patterns<int_arm_neon_vrsqrts, FRSQRTSsss,
|
||||
FRSQRTSddd>;
|
||||
|
||||
// Patterns to match llvm.aarch64.* intrinsic for
|
||||
// Scalar Floating-point Multiply Extended,
|
||||
defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vmulx, FMULXsss,
|
||||
FMULXddd>;
|
||||
|
||||
// Scalar Integer Shift Left (Signed, Unsigned)
|
||||
def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
|
||||
@ -3088,13 +3178,13 @@ def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
|
||||
|
||||
// Patterns to match llvm.arm.* intrinsic for
|
||||
// Scalar Integer Shift Left (Signed, Unsigned)
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
|
||||
|
||||
// Patterns to match llvm.aarch64.* intrinsic for
|
||||
// Scalar Integer Shift Left (Signed, Unsigned)
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
|
||||
|
||||
// Scalar Integer Saturating Shift Left (Signed, Unsigned)
|
||||
defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
|
||||
@ -3102,15 +3192,15 @@ defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
|
||||
|
||||
// Patterns to match llvm.aarch64.* intrinsic for
|
||||
// Scalar Integer Saturating Shift Letf (Signed, Unsigned)
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb, SQSHLhhh,
|
||||
SQSHLsss, SQSHLddd>;
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb, UQSHLhhh,
|
||||
UQSHLsss, UQSHLddd>;
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
|
||||
SQSHLhhh, SQSHLsss, SQSHLddd>;
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
|
||||
UQSHLhhh, UQSHLsss, UQSHLddd>;
|
||||
|
||||
// Patterns to match llvm.arm.* intrinsic for
|
||||
// Scalar Integer Saturating Shift Letf (Signed, Unsigned)
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
|
||||
|
||||
// Scalar Integer Rounding Shift Left (Signed, Unsigned)
|
||||
def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
|
||||
@ -3118,13 +3208,13 @@ def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
|
||||
|
||||
// Patterns to match llvm.aarch64.* intrinsic for
|
||||
// Scalar Integer Rounding Shift Left (Signed, Unsigned)
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
|
||||
|
||||
// Patterns to match llvm.arm.* intrinsic for
|
||||
// Scalar Integer Rounding Shift Left (Signed, Unsigned)
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
|
||||
|
||||
// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
|
||||
defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
|
||||
@ -3132,15 +3222,15 @@ defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
|
||||
|
||||
// Patterns to match llvm.aarch64.* intrinsic for
|
||||
// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb, SQRSHLhhh,
|
||||
SQRSHLsss, SQRSHLddd>;
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb, UQRSHLhhh,
|
||||
UQRSHLsss, UQRSHLddd>;
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
|
||||
SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
|
||||
defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
|
||||
UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
|
||||
|
||||
// Patterns to match llvm.arm.* intrinsic for
|
||||
// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
|
||||
defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
|
||||
|
||||
// Scalar Reduce Pairwise
|
||||
|
||||
@ -4507,3 +4597,7 @@ def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
|
||||
def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
|
||||
(FMOVdx $src)>;
|
||||
|
||||
def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))),
|
||||
(v1f32 FPR32:$Rn)>;
|
||||
def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
|
||||
(v1f64 FPR64:$Rn)>;
|
70
test/CodeGen/AArch64/neon-scalar-mul.ll
Normal file
70
test/CodeGen/AArch64/neon-scalar-mul.ll
Normal file
@ -0,0 +1,70 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
define i16 @test_vqdmulhh_s16(i16 %a, i16 %b) {
|
||||
; CHECK: test_vqdmulhh_s16
|
||||
; CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
|
||||
%1 = insertelement <1 x i16> undef, i16 %a, i32 0
|
||||
%2 = insertelement <1 x i16> undef, i16 %b, i32 0
|
||||
%3 = call <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16> %1, <1 x i16> %2)
|
||||
%4 = extractelement <1 x i16> %3, i32 0
|
||||
ret i16 %4
|
||||
}
|
||||
|
||||
define i32 @test_vqdmulhs_s32(i32 %a, i32 %b) {
|
||||
; CHECK: test_vqdmulhs_s32
|
||||
; CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
|
||||
%1 = insertelement <1 x i32> undef, i32 %a, i32 0
|
||||
%2 = insertelement <1 x i32> undef, i32 %b, i32 0
|
||||
%3 = call <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32> %1, <1 x i32> %2)
|
||||
%4 = extractelement <1 x i32> %3, i32 0
|
||||
ret i32 %4
|
||||
}
|
||||
|
||||
declare <1 x i16> @llvm.arm.neon.vqdmulh.v1i16(<1 x i16>, <1 x i16>)
|
||||
declare <1 x i32> @llvm.arm.neon.vqdmulh.v1i32(<1 x i32>, <1 x i32>)
|
||||
|
||||
define i16 @test_vqrdmulhh_s16(i16 %a, i16 %b) {
|
||||
; CHECK: test_vqrdmulhh_s16
|
||||
; CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
|
||||
%1 = insertelement <1 x i16> undef, i16 %a, i32 0
|
||||
%2 = insertelement <1 x i16> undef, i16 %b, i32 0
|
||||
%3 = call <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16> %1, <1 x i16> %2)
|
||||
%4 = extractelement <1 x i16> %3, i32 0
|
||||
ret i16 %4
|
||||
}
|
||||
|
||||
define i32 @test_vqrdmulhs_s32(i32 %a, i32 %b) {
|
||||
; CHECK: test_vqrdmulhs_s32
|
||||
; CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
|
||||
%1 = insertelement <1 x i32> undef, i32 %a, i32 0
|
||||
%2 = insertelement <1 x i32> undef, i32 %b, i32 0
|
||||
%3 = call <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32> %1, <1 x i32> %2)
|
||||
%4 = extractelement <1 x i32> %3, i32 0
|
||||
ret i32 %4
|
||||
}
|
||||
|
||||
declare <1 x i16> @llvm.arm.neon.vqrdmulh.v1i16(<1 x i16>, <1 x i16>)
|
||||
declare <1 x i32> @llvm.arm.neon.vqrdmulh.v1i32(<1 x i32>, <1 x i32>)
|
||||
|
||||
define float @test_vmulxs_f32(float %a, float %b) {
|
||||
; CHECK: test_vmulxs_f32
|
||||
; CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
|
||||
%1 = insertelement <1 x float> undef, float %a, i32 0
|
||||
%2 = insertelement <1 x float> undef, float %b, i32 0
|
||||
%3 = call <1 x float> @llvm.aarch64.neon.vmulx.v1f32(<1 x float> %1, <1 x float> %2)
|
||||
%4 = extractelement <1 x float> %3, i32 0
|
||||
ret float %4
|
||||
}
|
||||
|
||||
define double @test_vmulxd_f64(double %a, double %b) {
|
||||
; CHECK: test_vmulxd_f64
|
||||
; CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
|
||||
%1 = insertelement <1 x double> undef, double %a, i32 0
|
||||
%2 = insertelement <1 x double> undef, double %b, i32 0
|
||||
%3 = call <1 x double> @llvm.aarch64.neon.vmulx.v1f64(<1 x double> %1, <1 x double> %2)
|
||||
%4 = extractelement <1 x double> %3, i32 0
|
||||
ret double %4
|
||||
}
|
||||
|
||||
declare <1 x float> @llvm.aarch64.neon.vmulx.v1f32(<1 x float>, <1 x float>)
|
||||
declare <1 x double> @llvm.aarch64.neon.vmulx.v1f64(<1 x double>, <1 x double>)
|
47
test/CodeGen/AArch64/neon-scalar-recip.ll
Normal file
47
test/CodeGen/AArch64/neon-scalar-recip.ll
Normal file
@ -0,0 +1,47 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
define float @test_vrecpss_f32(float %a, float %b) {
|
||||
; CHECK: test_vrecpss_f32
|
||||
; CHECK: frecps {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
|
||||
%1 = insertelement <1 x float> undef, float %a, i32 0
|
||||
%2 = insertelement <1 x float> undef, float %b, i32 0
|
||||
%3 = call <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float> %1, <1 x float> %2)
|
||||
%4 = extractelement <1 x float> %3, i32 0
|
||||
ret float %4
|
||||
}
|
||||
|
||||
define double @test_vrecpsd_f64(double %a, double %b) {
|
||||
; CHECK: test_vrecpsd_f64
|
||||
; CHECK: frecps {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
|
||||
%1 = insertelement <1 x double> undef, double %a, i32 0
|
||||
%2 = insertelement <1 x double> undef, double %b, i32 0
|
||||
%3 = call <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double> %1, <1 x double> %2)
|
||||
%4 = extractelement <1 x double> %3, i32 0
|
||||
ret double %4
|
||||
}
|
||||
|
||||
declare <1 x float> @llvm.arm.neon.vrecps.v1f32(<1 x float>, <1 x float>)
|
||||
declare <1 x double> @llvm.arm.neon.vrecps.v1f64(<1 x double>, <1 x double>)
|
||||
|
||||
define float @test_vrsqrtss_f32(float %a, float %b) {
|
||||
; CHECK: test_vrsqrtss_f32
|
||||
; CHECK: frsqrts {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
|
||||
%1 = insertelement <1 x float> undef, float %a, i32 0
|
||||
%2 = insertelement <1 x float> undef, float %b, i32 0
|
||||
%3 = call <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float> %1, <1 x float> %2)
|
||||
%4 = extractelement <1 x float> %3, i32 0
|
||||
ret float %4
|
||||
}
|
||||
|
||||
define double @test_vrsqrtsd_f64(double %a, double %b) {
|
||||
; CHECK: test_vrsqrtsd_f64
|
||||
; CHECK: frsqrts {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
|
||||
%1 = insertelement <1 x double> undef, double %a, i32 0
|
||||
%2 = insertelement <1 x double> undef, double %b, i32 0
|
||||
%3 = call <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double> %1, <1 x double> %2)
|
||||
%4 = extractelement <1 x double> %3, i32 0
|
||||
ret double %4
|
||||
}
|
||||
|
||||
declare <1 x float> @llvm.arm.neon.vrsqrts.v1f32(<1 x float>, <1 x float>)
|
||||
declare <1 x double> @llvm.arm.neon.vrsqrts.v1f64(<1 x double>, <1 x double>)
|
@ -826,6 +826,33 @@
|
||||
// CHECK-ERROR: uqsub h1, h2, d2
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Doubling Multiply Half High (Signed)
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
sqdmulh h10, s11, h12
|
||||
sqdmulh s20, h21, s2
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqdmulh h10, s11, h12
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqdmulh s20, h21, s2
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Rounding Doubling Multiply Half High (Signed)
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
sqrdmulh h10, s11, h12
|
||||
sqrdmulh s20, h21, s2
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqrdmulh h10, s11, h12
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: sqrdmulh s20, h21, s2
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Shift Left (Signed and Unsigned Integer)
|
||||
@ -3771,3 +3798,44 @@
|
||||
// CHECK-ERROR: fminv d0, v1.2d
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Floating-point Multiply Extended
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fmulx s20, h22, s15
|
||||
fmulx d23, d11, s1
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fmulx s20, h22, s15
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fmulx d23, d11, s1
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Floating-point Reciprocal Step
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
frecps s21, s16, h13
|
||||
frecps d22, s30, d21
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: frecps s21, s16, h13
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: frecps d22, s30, d21
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Floating-point Reciprocal Square Root Step
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
frsqrts s21, h5, s12
|
||||
frsqrts d8, s22, d18
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: frsqrts s21, h5, s12
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: frsqrts d8, s22, d18
|
||||
// CHECK-ERROR: ^
|
||||
|
33
test/MC/AArch64/neon-scalar-mul.s
Normal file
33
test/MC/AArch64/neon-scalar-mul.s
Normal file
@ -0,0 +1,33 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Doubling Multiply Half High
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
sqdmulh h10, h11, h12
|
||||
sqdmulh s20, s21, s2
|
||||
|
||||
// CHECK: sqdmulh h10, h11, h12 // encoding: [0x6a,0xb5,0x6c,0x5e]
|
||||
// CHECK: sqdmulh s20, s21, s2 // encoding: [0xb4,0xb6,0xa2,0x5e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Rounding Doubling Multiply Half High
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
sqrdmulh h10, h11, h12
|
||||
sqrdmulh s20, s21, s2
|
||||
|
||||
// CHECK: sqrdmulh h10, h11, h12 // encoding: [0x6a,0xb5,0x6c,0x7e]
|
||||
// CHECK: sqrdmulh s20, s21, s2 // encoding: [0xb4,0xb6,0xa2,0x7e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Floating-point Multiply Extended
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
fmulx s20, s22, s15
|
||||
fmulx d23, d11, d1
|
||||
|
||||
// CHECK: fmulx s20, s22, s15 // encoding: [0xd4,0xde,0x2f,0x5e]
|
||||
// CHECK: fmulx d23, d11, d1 // encoding: [0x77,0xdd,0x61,0x5e]
|
23
test/MC/AArch64/neon-scalar-recip.s
Normal file
23
test/MC/AArch64/neon-scalar-recip.s
Normal file
@ -0,0 +1,23 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
// Check that the assembler can handle the documented syntax for AArch64
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Floating-point Reciprocal Step
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
frecps s21, s16, s13
|
||||
frecps d22, d30, d21
|
||||
|
||||
// CHECK: frecps s21, s16, s13 // encoding: [0x15,0xfe,0x2d,0x5e]
|
||||
// CHECK: frecps d22, d30, d21 // encoding: [0xd6,0xff,0x75,0x5e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Floating-point Reciprocal Square Root Step
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
frsqrts s21, s5, s12
|
||||
frsqrts d8, d22, d18
|
||||
|
||||
// CHECK: frsqrts s21, s5, s12 // encoding: [0xb5,0xfc,0xac,0x5e]
|
||||
// CHECK: frsqrts d8, d22, d18 // encoding: [0xc8,0xfe,0xf2,0x5e]
|
@ -1452,3 +1452,43 @@
|
||||
0x20 0x60 0x22 0x6e
|
||||
0x20 0x60 0x62 0x6e
|
||||
0x20 0x60 0xa2 0x6e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Integer Saturating Doubling Multiply Half High
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: sqdmulh h10, h11, h12
|
||||
# CHECK: sqdmulh s20, s21, s2
|
||||
0x6a,0xb5,0x6c,0x5e
|
||||
0xb4,0xb6,0xa2,0x5e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Scalar Integer Saturating Rounding Doubling Multiply Half High
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: sqrdmulh h10, h11, h12
|
||||
# CHECK: sqrdmulh s20, s21, s2
|
||||
0x6a,0xb5,0x6c,0x7e
|
||||
0xb4,0xb6,0xa2,0x7e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Floating-point multiply extended
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: fmulx s20, s22, s15
|
||||
# CHECK: fmulx d23, d11, d1
|
||||
0xd4,0xde,0x2f,0x5e
|
||||
0x77,0xdd,0x61,0x5e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Floating-point Reciprocal Step
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: frecps s21, s16, s13
|
||||
# CHECK: frecps d22, d30, d21
|
||||
0x15,0xfe,0x2d,0x5e
|
||||
0xd6,0xff,0x75,0x5e
|
||||
|
||||
#----------------------------------------------------------------------
|
||||
# Floating-point Reciprocal Square Root Step
|
||||
#----------------------------------------------------------------------
|
||||
# CHECK: frsqrts s21, s5, s12
|
||||
# CHECK: frsqrts d8, d22, d18
|
||||
0xb5,0xfc,0xac,0x5e
|
||||
0xc8,0xfe,0xf2,0x5e
|
||||
|
Loading…
x
Reference in New Issue
Block a user