mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-02 07:11:49 +00:00
Initial support for Neon scalar instructions.
Patch by Ana Pazos. 1.Added support for v1ix and v1fx types. 2.Added Scalar Pairwise Reduce instructions. 3.Added initial implementation of Scalar Arithmetic instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191263 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
44e8441767
commit
477fc628b3
@ -67,41 +67,44 @@ namespace llvm {
|
||||
v32i1 = 17, // 32 x i1
|
||||
v64i1 = 18, // 64 x i1
|
||||
|
||||
v2i8 = 19, // 2 x i8
|
||||
v4i8 = 20, // 4 x i8
|
||||
v8i8 = 21, // 8 x i8
|
||||
v16i8 = 22, // 16 x i8
|
||||
v32i8 = 23, // 32 x i8
|
||||
v64i8 = 24, // 64 x i8
|
||||
v1i16 = 25, // 1 x i16
|
||||
v2i16 = 26, // 2 x i16
|
||||
v4i16 = 27, // 4 x i16
|
||||
v8i16 = 28, // 8 x i16
|
||||
v16i16 = 29, // 16 x i16
|
||||
v32i16 = 30, // 32 x i16
|
||||
v1i32 = 31, // 1 x i32
|
||||
v2i32 = 32, // 2 x i32
|
||||
v4i32 = 33, // 4 x i32
|
||||
v8i32 = 34, // 8 x i32
|
||||
v16i32 = 35, // 16 x i32
|
||||
v1i64 = 36, // 1 x i64
|
||||
v2i64 = 37, // 2 x i64
|
||||
v4i64 = 38, // 4 x i64
|
||||
v8i64 = 39, // 8 x i64
|
||||
v16i64 = 40, // 16 x i64
|
||||
v1i8 = 19, // 1 x i8
|
||||
v2i8 = 20, // 2 x i8
|
||||
v4i8 = 21, // 4 x i8
|
||||
v8i8 = 22, // 8 x i8
|
||||
v16i8 = 23, // 16 x i8
|
||||
v32i8 = 24, // 32 x i8
|
||||
v64i8 = 25, // 64 x i8
|
||||
v1i16 = 26, // 1 x i16
|
||||
v2i16 = 27, // 2 x i16
|
||||
v4i16 = 28, // 4 x i16
|
||||
v8i16 = 29, // 8 x i16
|
||||
v16i16 = 30, // 16 x i16
|
||||
v32i16 = 31, // 32 x i16
|
||||
v1i32 = 32, // 1 x i32
|
||||
v2i32 = 33, // 2 x i32
|
||||
v4i32 = 34, // 4 x i32
|
||||
v8i32 = 35, // 8 x i32
|
||||
v16i32 = 36, // 16 x i32
|
||||
v1i64 = 37, // 1 x i64
|
||||
v2i64 = 38, // 2 x i64
|
||||
v4i64 = 39, // 4 x i64
|
||||
v8i64 = 40, // 8 x i64
|
||||
v16i64 = 41, // 16 x i64
|
||||
|
||||
FIRST_INTEGER_VECTOR_VALUETYPE = v2i1,
|
||||
LAST_INTEGER_VECTOR_VALUETYPE = v16i64,
|
||||
|
||||
v2f16 = 41, // 2 x f16
|
||||
v8f16 = 42, // 8 x f16
|
||||
v2f32 = 43, // 2 x f32
|
||||
v4f32 = 44, // 4 x f32
|
||||
v8f32 = 45, // 8 x f32
|
||||
v16f32 = 46, // 16 x f32
|
||||
v2f64 = 47, // 2 x f64
|
||||
v4f64 = 48, // 4 x f64
|
||||
v8f64 = 49, // 8 x f64
|
||||
v2f16 = 42, // 2 x f16
|
||||
v8f16 = 43, // 8 x f16
|
||||
v1f32 = 44, // 1 x f32
|
||||
v2f32 = 45, // 2 x f32
|
||||
v4f32 = 46, // 4 x f32
|
||||
v8f32 = 47, // 8 x f32
|
||||
v16f32 = 48, // 16 x f32
|
||||
v1f64 = 49, // 1 x f64
|
||||
v2f64 = 50, // 2 x f64
|
||||
v4f64 = 51, // 4 x f64
|
||||
v8f64 = 52, // 8 x f64
|
||||
|
||||
FIRST_FP_VECTOR_VALUETYPE = v2f16,
|
||||
LAST_FP_VECTOR_VALUETYPE = v8f64,
|
||||
@ -109,17 +112,17 @@ namespace llvm {
|
||||
FIRST_VECTOR_VALUETYPE = v2i1,
|
||||
LAST_VECTOR_VALUETYPE = v8f64,
|
||||
|
||||
x86mmx = 50, // This is an X86 MMX value
|
||||
x86mmx = 53, // This is an X86 MMX value
|
||||
|
||||
Glue = 51, // This glues nodes together during pre-RA sched
|
||||
Glue = 54, // This glues nodes together during pre-RA sched
|
||||
|
||||
isVoid = 52, // This has no value
|
||||
isVoid = 55, // This has no value
|
||||
|
||||
Untyped = 53, // This value takes a register, but has
|
||||
Untyped = 56, // This value takes a register, but has
|
||||
// unspecified type. The register class
|
||||
// will be determined by the opcode.
|
||||
|
||||
LAST_VALUETYPE = 54, // This always remains at the end of the list.
|
||||
LAST_VALUETYPE = 57, // This always remains at the end of the list.
|
||||
|
||||
// This is the current maximum for LAST_VALUETYPE.
|
||||
// MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
|
||||
@ -266,6 +269,7 @@ namespace llvm {
|
||||
case v16i1 :
|
||||
case v32i1 :
|
||||
case v64i1: return i1;
|
||||
case v1i8 :
|
||||
case v2i8 :
|
||||
case v4i8 :
|
||||
case v8i8 :
|
||||
@ -290,10 +294,12 @@ namespace llvm {
|
||||
case v16i64: return i64;
|
||||
case v2f16:
|
||||
case v8f16: return f16;
|
||||
case v1f32:
|
||||
case v2f32:
|
||||
case v4f32:
|
||||
case v8f32:
|
||||
case v16f32: return f32;
|
||||
case v1f64:
|
||||
case v2f64:
|
||||
case v4f64:
|
||||
case v8f64: return f64;
|
||||
@ -338,9 +344,12 @@ namespace llvm {
|
||||
case v2f16:
|
||||
case v2f32:
|
||||
case v2f64: return 2;
|
||||
case v1i8:
|
||||
case v1i16:
|
||||
case v1i32:
|
||||
case v1i64: return 1;
|
||||
case v1i64:
|
||||
case v1f32:
|
||||
case v1f64: return 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -363,6 +372,7 @@ namespace llvm {
|
||||
case v2i1: return 2;
|
||||
case v4i1: return 4;
|
||||
case i8 :
|
||||
case v1i8:
|
||||
case v8i1: return 8;
|
||||
case i16 :
|
||||
case f16:
|
||||
@ -375,6 +385,7 @@ namespace llvm {
|
||||
case v4i8:
|
||||
case v2i16:
|
||||
case v2f16:
|
||||
case v1f32:
|
||||
case v1i32: return 32;
|
||||
case x86mmx:
|
||||
case f64 :
|
||||
@ -384,7 +395,8 @@ namespace llvm {
|
||||
case v4i16:
|
||||
case v2i32:
|
||||
case v1i64:
|
||||
case v2f32: return 64;
|
||||
case v2f32:
|
||||
case v1f64: return 64;
|
||||
case f80 : return 80;
|
||||
case f128:
|
||||
case ppcf128:
|
||||
@ -494,6 +506,7 @@ namespace llvm {
|
||||
if (NumElements == 64) return MVT::v64i1;
|
||||
break;
|
||||
case MVT::i8:
|
||||
if (NumElements == 1) return MVT::v1i8;
|
||||
if (NumElements == 2) return MVT::v2i8;
|
||||
if (NumElements == 4) return MVT::v4i8;
|
||||
if (NumElements == 8) return MVT::v8i8;
|
||||
@ -528,12 +541,14 @@ namespace llvm {
|
||||
if (NumElements == 8) return MVT::v8f16;
|
||||
break;
|
||||
case MVT::f32:
|
||||
if (NumElements == 1) return MVT::v1f32;
|
||||
if (NumElements == 2) return MVT::v2f32;
|
||||
if (NumElements == 4) return MVT::v4f32;
|
||||
if (NumElements == 8) return MVT::v8f32;
|
||||
if (NumElements == 16) return MVT::v16f32;
|
||||
break;
|
||||
case MVT::f64:
|
||||
if (NumElements == 1) return MVT::v1f64;
|
||||
if (NumElements == 2) return MVT::v2f64;
|
||||
if (NumElements == 4) return MVT::v4f64;
|
||||
if (NumElements == 8) return MVT::v8f64;
|
||||
|
@ -39,44 +39,47 @@ def v8i1 : ValueType<8 , 15>; // 8 x i1 vector value
|
||||
def v16i1 : ValueType<16, 16>; // 16 x i1 vector value
|
||||
def v32i1 : ValueType<32 , 17>; // 32 x i1 vector value
|
||||
def v64i1 : ValueType<64 , 18>; // 64 x i1 vector value
|
||||
def v2i8 : ValueType<16 , 19>; // 2 x i8 vector value
|
||||
def v4i8 : ValueType<32 , 20>; // 4 x i8 vector value
|
||||
def v8i8 : ValueType<64 , 21>; // 8 x i8 vector value
|
||||
def v16i8 : ValueType<128, 22>; // 16 x i8 vector value
|
||||
def v32i8 : ValueType<256, 23>; // 32 x i8 vector value
|
||||
def v64i8 : ValueType<512, 24>; // 64 x i8 vector value
|
||||
def v1i16 : ValueType<16 , 25>; // 1 x i16 vector value
|
||||
def v2i16 : ValueType<32 , 26>; // 2 x i16 vector value
|
||||
def v4i16 : ValueType<64 , 27>; // 4 x i16 vector value
|
||||
def v8i16 : ValueType<128, 28>; // 8 x i16 vector value
|
||||
def v16i16 : ValueType<256, 29>; // 16 x i16 vector value
|
||||
def v32i16 : ValueType<512, 30>; // 32 x i16 vector value
|
||||
def v1i32 : ValueType<32 , 31>; // 1 x i32 vector value
|
||||
def v2i32 : ValueType<64 , 32>; // 2 x i32 vector value
|
||||
def v4i32 : ValueType<128, 33>; // 4 x i32 vector value
|
||||
def v8i32 : ValueType<256, 34>; // 8 x i32 vector value
|
||||
def v16i32 : ValueType<512, 35>; // 16 x i32 vector value
|
||||
def v1i64 : ValueType<64 , 36>; // 1 x i64 vector value
|
||||
def v2i64 : ValueType<128, 37>; // 2 x i64 vector value
|
||||
def v4i64 : ValueType<256, 38>; // 4 x i64 vector value
|
||||
def v8i64 : ValueType<512, 39>; // 8 x i64 vector value
|
||||
def v16i64 : ValueType<1024,40>; // 16 x i64 vector value
|
||||
def v1i8 : ValueType<16, 19>; // 1 x i8 vector value
|
||||
def v2i8 : ValueType<16 , 20>; // 2 x i8 vector value
|
||||
def v4i8 : ValueType<32 , 21>; // 4 x i8 vector value
|
||||
def v8i8 : ValueType<64 , 22>; // 8 x i8 vector value
|
||||
def v16i8 : ValueType<128, 23>; // 16 x i8 vector value
|
||||
def v32i8 : ValueType<256, 24>; // 32 x i8 vector value
|
||||
def v64i8 : ValueType<512, 25>; // 64 x i8 vector value
|
||||
def v1i16 : ValueType<16 , 26>; // 1 x i16 vector value
|
||||
def v2i16 : ValueType<32 , 27>; // 2 x i16 vector value
|
||||
def v4i16 : ValueType<64 , 28>; // 4 x i16 vector value
|
||||
def v8i16 : ValueType<128, 29>; // 8 x i16 vector value
|
||||
def v16i16 : ValueType<256, 30>; // 16 x i16 vector value
|
||||
def v32i16 : ValueType<512, 31>; // 32 x i16 vector value
|
||||
def v1i32 : ValueType<32 , 32>; // 1 x i32 vector value
|
||||
def v2i32 : ValueType<64 , 33>; // 2 x i32 vector value
|
||||
def v4i32 : ValueType<128, 34>; // 4 x i32 vector value
|
||||
def v8i32 : ValueType<256, 35>; // 8 x i32 vector value
|
||||
def v16i32 : ValueType<512, 36>; // 16 x i32 vector value
|
||||
def v1i64 : ValueType<64 , 37>; // 1 x i64 vector value
|
||||
def v2i64 : ValueType<128, 38>; // 2 x i64 vector value
|
||||
def v4i64 : ValueType<256, 39>; // 4 x i64 vector value
|
||||
def v8i64 : ValueType<512, 40>; // 8 x i64 vector value
|
||||
def v16i64 : ValueType<1024,41>; // 16 x i64 vector value
|
||||
|
||||
def v2f16 : ValueType<32 , 41>; // 2 x f16 vector value
|
||||
def v8f16 : ValueType<128, 42>; // 8 x f16 vector value
|
||||
def v2f32 : ValueType<64 , 43>; // 2 x f32 vector value
|
||||
def v4f32 : ValueType<128, 44>; // 4 x f32 vector value
|
||||
def v8f32 : ValueType<256, 45>; // 8 x f32 vector value
|
||||
def v16f32 : ValueType<512, 46>; // 16 x f32 vector value
|
||||
def v2f64 : ValueType<128, 47>; // 2 x f64 vector value
|
||||
def v4f64 : ValueType<256, 48>; // 4 x f64 vector value
|
||||
def v8f64 : ValueType<512, 49>; // 8 x f64 vector value
|
||||
def v2f16 : ValueType<32 , 42>; // 2 x f16 vector value
|
||||
def v8f16 : ValueType<128, 43>; // 8 x f16 vector value
|
||||
def v1f32 : ValueType<32 , 44>; // 1 x f32 vector value
|
||||
def v2f32 : ValueType<64 , 45>; // 2 x f32 vector value
|
||||
def v4f32 : ValueType<128, 46>; // 4 x f32 vector value
|
||||
def v8f32 : ValueType<256, 47>; // 8 x f32 vector value
|
||||
def v16f32 : ValueType<512, 48>; // 16 x f32 vector value
|
||||
def v1f64 : ValueType<64, 49>; // 1 x f64 vector value
|
||||
def v2f64 : ValueType<128, 50>; // 2 x f64 vector value
|
||||
def v4f64 : ValueType<256, 51>; // 4 x f64 vector value
|
||||
def v8f64 : ValueType<512, 52>; // 8 x f64 vector value
|
||||
|
||||
|
||||
def x86mmx : ValueType<64 , 50>; // X86 MMX value
|
||||
def FlagVT : ValueType<0 , 51>; // Pre-RA sched glue
|
||||
def isVoid : ValueType<0 , 52>; // Produces no value
|
||||
def untyped: ValueType<8 , 53>; // Produces an untyped value
|
||||
def x86mmx : ValueType<64 , 53>; // X86 MMX value
|
||||
def FlagVT : ValueType<0 , 54>; // Pre-RA sched glue
|
||||
def isVoid : ValueType<0 , 55>; // Produces no value
|
||||
def untyped: ValueType<8 , 56>; // Produces an untyped value
|
||||
def MetadataVT: ValueType<0, 250>; // Metadata
|
||||
|
||||
// Pseudo valuetype mapped to the current pointer size to any address space.
|
||||
|
@ -140,6 +140,7 @@ def llvm_v8i1_ty : LLVMType<v8i1>; // 8 x i1
|
||||
def llvm_v16i1_ty : LLVMType<v16i1>; // 16 x i1
|
||||
def llvm_v32i1_ty : LLVMType<v32i1>; // 32 x i1
|
||||
def llvm_v64i1_ty : LLVMType<v64i1>; // 64 x i1
|
||||
def llvm_v1i8_ty : LLVMType<v1i8>; // 1 x i8
|
||||
def llvm_v2i8_ty : LLVMType<v2i8>; // 2 x i8
|
||||
def llvm_v4i8_ty : LLVMType<v4i8>; // 4 x i8
|
||||
def llvm_v8i8_ty : LLVMType<v8i8>; // 8 x i8
|
||||
@ -166,10 +167,12 @@ def llvm_v8i64_ty : LLVMType<v8i64>; // 8 x i64
|
||||
def llvm_v16i64_ty : LLVMType<v16i64>; // 16 x i64
|
||||
|
||||
def llvm_v8f16_ty : LLVMType<v8f16>; // 8 x half (__fp16)
|
||||
def llvm_v1f32_ty : LLVMType<v1f32>; // 1 x float
|
||||
def llvm_v2f32_ty : LLVMType<v2f32>; // 2 x float
|
||||
def llvm_v4f32_ty : LLVMType<v4f32>; // 4 x float
|
||||
def llvm_v8f32_ty : LLVMType<v8f32>; // 8 x float
|
||||
def llvm_v16f32_ty : LLVMType<v16f32>; // 16 x float
|
||||
def llvm_v1f64_ty : LLVMType<v1f64>; // 1 x double
|
||||
def llvm_v2f64_ty : LLVMType<v2f64>; // 2 x double
|
||||
def llvm_v4f64_ty : LLVMType<v4f64>; // 4 x double
|
||||
def llvm_v8f64_ty : LLVMType<v8f64>; // 8 x double
|
||||
|
@ -17,12 +17,10 @@
|
||||
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
|
||||
|
||||
// Vector Absolute Compare (Floating Point)
|
||||
def int_aarch64_neon_vacgeq : Intrinsic<[llvm_v2i64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_aarch64_neon_vacgtq : Intrinsic<[llvm_v2i64_ty],
|
||||
[llvm_v2f64_ty, llvm_v2f64_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_aarch64_neon_vacgeq :
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_vacgtq :
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
|
||||
|
||||
// Vector maxNum (Floating Point)
|
||||
def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
|
||||
@ -66,4 +64,73 @@ def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic;
|
||||
def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic;
|
||||
def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic;
|
||||
def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic;
|
||||
|
||||
// Scalar Add
|
||||
def int_aarch64_neon_vaddds :
|
||||
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_vadddu :
|
||||
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
|
||||
|
||||
// Scalar Saturating Add (Signed, Unsigned)
|
||||
def int_aarch64_neon_vqadds : Neon_2Arg_Intrinsic;
|
||||
def int_aarch64_neon_vqaddu : Neon_2Arg_Intrinsic;
|
||||
|
||||
// Scalar Sub
|
||||
def int_aarch64_neon_vsubds :
|
||||
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_vsubdu :
|
||||
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
|
||||
|
||||
// Scalar Saturating Sub (Signed, Unsigned)
|
||||
def int_aarch64_neon_vqsubs : Neon_2Arg_Intrinsic;
|
||||
def int_aarch64_neon_vqsubu : Neon_2Arg_Intrinsic;
|
||||
|
||||
// Scalar Shift
|
||||
// Scalar Shift Left
|
||||
def int_aarch64_neon_vshlds :
|
||||
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_vshldu :
|
||||
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
|
||||
|
||||
// Scalar Saturating Shift Left
|
||||
def int_aarch64_neon_vqshls : Neon_2Arg_Intrinsic;
|
||||
def int_aarch64_neon_vqshlu : Neon_2Arg_Intrinsic;
|
||||
|
||||
// Scalar Shift Rouding Left
|
||||
def int_aarch64_neon_vrshlds :
|
||||
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_vrshldu :
|
||||
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
|
||||
|
||||
// Scalar Saturating Rounding Shift Left
|
||||
def int_aarch64_neon_vqrshls : Neon_2Arg_Intrinsic;
|
||||
def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic;
|
||||
|
||||
// Scalar Reduce Pairwise Add.
|
||||
def int_aarch64_neon_vpadd :
|
||||
Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>;
|
||||
def int_aarch64_neon_vpfadd :
|
||||
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_vpfaddq :
|
||||
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||
|
||||
// Scalar Reduce Pairwise Floating Point Max/Min.
|
||||
def int_aarch64_neon_vpmax :
|
||||
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_vpmaxq :
|
||||
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_vpmin :
|
||||
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_vpminq :
|
||||
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||
|
||||
// Scalar Reduce Pairwise Floating Point Maxnm/Minnm.
|
||||
def int_aarch64_neon_vpfmaxnm :
|
||||
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_vpfmaxnmq :
|
||||
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_vpfminnm :
|
||||
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
|
||||
def int_aarch64_neon_vpfminnmq :
|
||||
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
@ -919,7 +919,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
|
||||
// type does not have a strange size (eg: it is not i1).
|
||||
EVT VecVT = N->getValueType(0);
|
||||
unsigned NumElts = VecVT.getVectorNumElements();
|
||||
assert(!(NumElts & 1) && "Legal vector of one illegal element?");
|
||||
assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) &&
|
||||
"Legal vector of one illegal element?");
|
||||
|
||||
// Promote the inserted value. The type does not need to match the
|
||||
// vector element type. Check that any extra bits introduced will be
|
||||
|
@ -453,7 +453,8 @@ enum IIT_Info {
|
||||
IIT_STRUCT5 = 22,
|
||||
IIT_EXTEND_VEC_ARG = 23,
|
||||
IIT_TRUNC_VEC_ARG = 24,
|
||||
IIT_ANYPTR = 25
|
||||
IIT_ANYPTR = 25,
|
||||
IIT_V1 = 26
|
||||
};
|
||||
|
||||
|
||||
@ -497,6 +498,10 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
|
||||
case IIT_I64:
|
||||
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64));
|
||||
return;
|
||||
case IIT_V1:
|
||||
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 1));
|
||||
DecodeIITType(NextElt, Infos, OutputTable);
|
||||
return;
|
||||
case IIT_V2:
|
||||
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 2));
|
||||
DecodeIITType(NextElt, Infos, OutputTable);
|
||||
|
@ -134,6 +134,7 @@ std::string EVT::getEVTString() const {
|
||||
case MVT::v16i1: return "v16i1";
|
||||
case MVT::v32i1: return "v32i1";
|
||||
case MVT::v64i1: return "v64i1";
|
||||
case MVT::v1i8: return "v1i8";
|
||||
case MVT::v2i8: return "v2i8";
|
||||
case MVT::v4i8: return "v4i8";
|
||||
case MVT::v8i8: return "v8i8";
|
||||
@ -156,12 +157,14 @@ std::string EVT::getEVTString() const {
|
||||
case MVT::v4i64: return "v4i64";
|
||||
case MVT::v8i64: return "v8i64";
|
||||
case MVT::v16i64: return "v16i64";
|
||||
case MVT::v1f32: return "v1f32";
|
||||
case MVT::v2f32: return "v2f32";
|
||||
case MVT::v2f16: return "v2f16";
|
||||
case MVT::v8f16: return "v8f16";
|
||||
case MVT::v4f32: return "v4f32";
|
||||
case MVT::v8f32: return "v8f32";
|
||||
case MVT::v16f32: return "v16f32";
|
||||
case MVT::v1f64: return "v1f64";
|
||||
case MVT::v2f64: return "v2f64";
|
||||
case MVT::v4f64: return "v4f64";
|
||||
case MVT::v8f64: return "v8f64";
|
||||
@ -198,6 +201,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
|
||||
case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16);
|
||||
case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32);
|
||||
case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64);
|
||||
case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1);
|
||||
case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2);
|
||||
case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4);
|
||||
case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8);
|
||||
@ -222,10 +226,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
|
||||
case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16);
|
||||
case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2);
|
||||
case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8);
|
||||
case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1);
|
||||
case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
|
||||
case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4);
|
||||
case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8);
|
||||
case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16);
|
||||
case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1);
|
||||
case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2);
|
||||
case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4);
|
||||
case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8);
|
||||
|
@ -59,9 +59,9 @@ def CC_A64_APCS : CallingConv<[
|
||||
// Canonicalise the various types that live in different floating-point
|
||||
// registers. This makes sense because the PCS does not distinguish Short
|
||||
// Vectors and Floating-point types.
|
||||
CCIfType<[v2i8], CCBitConvertToType<f16>>,
|
||||
CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>,
|
||||
CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v1i16, v2i8], CCBitConvertToType<f16>>,
|
||||
CCIfType<[v1i32, v4i8, v2i16, v1f32], CCBitConvertToType<f32>>,
|
||||
CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType<f64>>,
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
|
||||
CCBitConvertToType<f128>>,
|
||||
|
||||
@ -70,7 +70,8 @@ def CC_A64_APCS : CallingConv<[
|
||||
// argument is allocated to the least significant bits of register
|
||||
// v[NSRN]. The NSRN is incremented by one. The argument has now been
|
||||
// allocated."
|
||||
CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
|
||||
CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
|
||||
CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
|
||||
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
|
||||
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
|
||||
CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
|
||||
|
@ -57,6 +57,12 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
|
||||
|
||||
if (Subtarget->hasNEON()) {
|
||||
// And the vectors
|
||||
addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass);
|
||||
addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass);
|
||||
addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass);
|
||||
addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
|
||||
addRegisterClass(MVT::v1f32, &AArch64::FPR32RegClass);
|
||||
addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass);
|
||||
addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
|
||||
addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
|
||||
addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
|
||||
@ -274,16 +280,21 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
|
||||
setExceptionSelectorRegister(AArch64::X1);
|
||||
|
||||
if (Subtarget->hasNEON()) {
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v1f32, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom);
|
||||
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
|
||||
|
||||
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
|
||||
|
@ -1074,8 +1074,7 @@ class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
|
||||
class NeonI_2VShiftImm<bit q, bit u, bits<5> opcode,
|
||||
dag outs, dag ins, string asmstr,
|
||||
list<dag> patterns, InstrItinClass itin>
|
||||
: A64InstRdn<outs, ins, asmstr, patterns, itin>
|
||||
{
|
||||
: A64InstRdn<outs, ins, asmstr, patterns, itin> {
|
||||
bits<7> Imm;
|
||||
let Inst{31} = 0b0;
|
||||
let Inst{30} = q;
|
||||
@ -1129,5 +1128,23 @@ class NeonI_insert<bit q, bit op,
|
||||
// Inherit Rd in 4-0
|
||||
}
|
||||
|
||||
// Format AdvSIMD scalar pairwise
|
||||
class NeonI_ScalarPair<bit u, bits<2> size, bits<5> opcode,
|
||||
dag outs, dag ins, string asmstr,
|
||||
list<dag> patterns, InstrItinClass itin>
|
||||
: A64InstRdn<outs, ins, asmstr, patterns, itin> {
|
||||
let Inst{31} = 0b0;
|
||||
let Inst{30} = 0b1;
|
||||
let Inst{29} = u;
|
||||
let Inst{28-24} = 0b11110;
|
||||
let Inst{23-22} = size;
|
||||
let Inst{21-17} = 0b11000;
|
||||
let Inst{16-12} = opcode;
|
||||
let Inst{11-10} = 0b10;
|
||||
|
||||
// Inherit Rn in 9-5
|
||||
// Inherit Rd in 4-0
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -2189,22 +2189,22 @@ def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
|
||||
// Extra patterns for when we're allowed to optimise separate multiplication and
|
||||
// addition.
|
||||
let Predicates = [UseFusedMAC] in {
|
||||
def : Pat<(fadd FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
|
||||
def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
|
||||
(FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
|
||||
def : Pat<(fsub FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)),
|
||||
def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
|
||||
(FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
|
||||
def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra),
|
||||
def : Pat<(f32 (fsub (f32 (fmul FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)),
|
||||
(FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
|
||||
def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)),
|
||||
def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
|
||||
(FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
|
||||
|
||||
def : Pat<(fadd FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)),
|
||||
def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
|
||||
(FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
def : Pat<(fsub FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)),
|
||||
def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
|
||||
(FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
def : Pat<(fsub (fmul (f64 FPR64:$Rn), FPR64:$Rm), FPR64:$Ra),
|
||||
def : Pat<(f64 (fsub (f64 (fmul FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)),
|
||||
(FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
def : Pat<(fsub (fneg (f64 FPR64:$Ra)), (fmul FPR64:$Rn, FPR64:$Rm)),
|
||||
def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
|
||||
(FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
}
|
||||
|
||||
|
@ -2504,11 +2504,12 @@ defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
|
||||
multiclass NeonI_Op_High<SDPatternOperator op>
|
||||
{
|
||||
def _16B : PatFrag<(ops node:$Rn, node:$Rm),
|
||||
(op (Neon_top16B node:$Rn), (Neon_top16B node:$Rm))>;
|
||||
(op (v8i8 (Neon_top16B node:$Rn)), (v8i8 (Neon_top16B node:$Rm)))>;
|
||||
def _8H : PatFrag<(ops node:$Rn, node:$Rm),
|
||||
(op (Neon_top8H node:$Rn), (Neon_top8H node:$Rm))>;
|
||||
(op (v4i16 (Neon_top8H node:$Rn)), (v4i16 (Neon_top8H node:$Rm)))>;
|
||||
def _4S : PatFrag<(ops node:$Rn, node:$Rm),
|
||||
(op (Neon_top4S node:$Rn), (Neon_top4S node:$Rm))>;
|
||||
(op (v2i32 (Neon_top4S node:$Rn)), (v2i32 (Neon_top4S node:$Rm)))>;
|
||||
|
||||
}
|
||||
|
||||
defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
|
||||
@ -2868,9 +2869,25 @@ multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
|
||||
}
|
||||
}
|
||||
|
||||
class Neon_Scalar_D_size_patterns<SDPatternOperator opnode, Instruction INSTD>
|
||||
: Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
|
||||
(INSTD VPR64:$Rn, VPR64:$Rm)>;
|
||||
multiclass Neon_Scalar_D_size_patterns<SDPatternOperator opnode,
|
||||
Instruction INSTD> {
|
||||
def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
|
||||
(INSTD FPR64:$Rn, FPR64:$Rm)>;
|
||||
}
|
||||
|
||||
multiclass Neon_Scalar_BHSD_size_patterns<SDPatternOperator opnode,
|
||||
Instruction INSTB, Instruction INSTH,
|
||||
Instruction INSTS, Instruction INSTD>
|
||||
: Neon_Scalar_D_size_patterns<opnode, INSTD> {
|
||||
def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
|
||||
(INSTB FPR8:$Rn, FPR8:$Rm)>;
|
||||
|
||||
def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
|
||||
(INSTH FPR16:$Rn, FPR16:$Rm)>;
|
||||
|
||||
def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
|
||||
(INSTS FPR32:$Rn, FPR32:$Rm)>;
|
||||
}
|
||||
|
||||
// Scalar Integer Add
|
||||
let isCommutable = 1 in {
|
||||
@ -2880,9 +2897,15 @@ def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
|
||||
// Scalar Integer Sub
|
||||
def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
|
||||
|
||||
// Pattern for Scalar Integer Add and Sub with D register
|
||||
def : Neon_Scalar_D_size_patterns<add, ADDddd>;
|
||||
def : Neon_Scalar_D_size_patterns<sub, SUBddd>;
|
||||
// Pattern for Scalar Integer Add and Sub with D register only
|
||||
defm : Neon_Scalar_D_size_patterns<add, ADDddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<sub, SUBddd>;
|
||||
|
||||
// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
|
||||
|
||||
// Scalar Integer Saturating Add (Signed, Unsigned)
|
||||
defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
|
||||
@ -2892,40 +2915,160 @@ defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
|
||||
defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
|
||||
defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
|
||||
|
||||
// Patterns for Scalar Integer Saturating Add, Sub with D register only
|
||||
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
|
||||
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
|
||||
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
|
||||
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
|
||||
// Patterns to match llvm.arm.* intrinsic for
|
||||
// Scalar Integer Saturating Add, Sub (Signed, Unsigned)
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
|
||||
|
||||
// Patterns to match llvm.aarch64.* intrinsic for
|
||||
// Scalar Integer Saturating Add, Sub (Signed, Unsigned)
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb, SQADDhhh,
|
||||
SQADDsss, SQADDddd>;
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb, UQADDhhh,
|
||||
UQADDsss, UQADDddd>;
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb, SQSUBhhh,
|
||||
SQSUBsss, SQSUBddd>;
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb, UQSUBhhh,
|
||||
UQSUBsss, UQSUBddd>;
|
||||
|
||||
// Scalar Integer Shift Left (Signed, Unsigned)
|
||||
def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
|
||||
def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
|
||||
|
||||
// Patterns to match llvm.arm.* intrinsic for
|
||||
// Scalar Integer Shift Left (Signed, Unsigned)
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
|
||||
|
||||
// Patterns to match llvm.aarch64.* intrinsic for
|
||||
// Scalar Integer Shift Left (Signed, Unsigned)
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
|
||||
|
||||
// Scalar Integer Saturating Shift Left (Signed, Unsigned)
|
||||
defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
|
||||
defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
|
||||
|
||||
// Scalar Integer Rouding Shift Left (Signed, Unsigned)
|
||||
// Patterns to match llvm.aarch64.* intrinsic for
|
||||
// Scalar Integer Saturating Shift Letf (Signed, Unsigned)
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb, SQSHLhhh,
|
||||
SQSHLsss, SQSHLddd>;
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb, UQSHLhhh,
|
||||
UQSHLsss, UQSHLddd>;
|
||||
|
||||
// Patterns to match llvm.arm.* intrinsic for
|
||||
// Scalar Integer Saturating Shift Letf (Signed, Unsigned)
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
|
||||
|
||||
// Scalar Integer Rounding Shift Left (Signed, Unsigned)
|
||||
def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
|
||||
def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
|
||||
|
||||
// Patterns to match llvm.aarch64.* intrinsic for
|
||||
// Scalar Integer Rounding Shift Left (Signed, Unsigned)
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
|
||||
|
||||
// Patterns to match llvm.arm.* intrinsic for
|
||||
// Scalar Integer Rounding Shift Left (Signed, Unsigned)
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
|
||||
|
||||
// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
|
||||
defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
|
||||
defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
|
||||
|
||||
// Patterns for Scalar Integer Shift Lef, Saturating Shift Left,
|
||||
// Rounding Shift Left, Rounding Saturating Shift Left with D register only
|
||||
def : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
|
||||
def : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
|
||||
def : Neon_Scalar_D_size_patterns<shl, SSHLddd>;
|
||||
def : Neon_Scalar_D_size_patterns<shl, USHLddd>;
|
||||
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
|
||||
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
|
||||
def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
|
||||
def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
|
||||
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
|
||||
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
|
||||
// Patterns to match llvm.aarch64.* intrinsic for
|
||||
// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb, SQRSHLhhh,
|
||||
SQRSHLsss, SQRSHLddd>;
|
||||
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb, UQRSHLhhh,
|
||||
UQRSHLsss, UQRSHLddd>;
|
||||
|
||||
// Patterns to match llvm.arm.* intrinsic for
|
||||
// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
|
||||
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
|
||||
|
||||
// Scalar Reduce Pairwise
|
||||
|
||||
multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
|
||||
string asmop, bit Commutable = 0> {
|
||||
let isCommutable = Commutable in {
|
||||
def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
|
||||
(outs FPR64:$Rd), (ins VPR128:$Rn),
|
||||
!strconcat(asmop, " $Rd, $Rn.2d"),
|
||||
[],
|
||||
NoItinerary>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
|
||||
string asmop, bit Commutable = 0>
|
||||
: NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
|
||||
let isCommutable = Commutable in {
|
||||
def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
|
||||
(outs FPR32:$Rd), (ins VPR64:$Rn),
|
||||
!strconcat(asmop, " $Rd, $Rn.2s"),
|
||||
[],
|
||||
NoItinerary>;
|
||||
}
|
||||
}
|
||||
|
||||
// Scalar Reduce Addition Pairwise (Integer) with
|
||||
// Pattern to match llvm.arm.* intrinsic
|
||||
defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
|
||||
|
||||
// Pattern to match llvm.aarch64.* intrinsic for
|
||||
// Scalar Reduce Addition Pairwise (Integer)
|
||||
def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
|
||||
(ADDPvv_D_2D VPR128:$Rn)>;
|
||||
|
||||
// Scalar Reduce Addition Pairwise (Floating Point)
|
||||
defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
|
||||
|
||||
// Scalar Reduce Maximum Pairwise (Floating Point)
|
||||
defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
|
||||
|
||||
// Scalar Reduce Minimum Pairwise (Floating Point)
|
||||
defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
|
||||
|
||||
// Scalar Reduce maxNum Pairwise (Floating Point)
|
||||
defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
|
||||
|
||||
// Scalar Reduce minNum Pairwise (Floating Point)
|
||||
defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
|
||||
|
||||
multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS,
|
||||
SDPatternOperator opnodeD,
|
||||
Instruction INSTS,
|
||||
Instruction INSTD> {
|
||||
def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))),
|
||||
(INSTS VPR64:$Rn)>;
|
||||
def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))),
|
||||
(INSTD VPR128:$Rn)>;
|
||||
}
|
||||
|
||||
// Patterns to match llvm.aarch64.* intrinsic for
|
||||
// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
|
||||
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
|
||||
int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>;
|
||||
|
||||
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
|
||||
int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>;
|
||||
|
||||
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
|
||||
int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>;
|
||||
|
||||
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
|
||||
int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
|
||||
|
||||
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
|
||||
int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
|
||||
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -2999,6 +3142,14 @@ def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
|
||||
|
||||
|
||||
// ...and scalar bitcasts...
|
||||
def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
|
||||
def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
|
||||
def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
|
||||
|
||||
def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
|
||||
def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
|
||||
|
||||
def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
|
||||
@ -3017,6 +3168,15 @@ def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
|
||||
def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
|
||||
def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
|
||||
|
||||
def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
|
||||
def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
|
||||
def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
|
||||
def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>;
|
||||
def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
|
||||
|
||||
def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
|
||||
def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
|
||||
|
||||
def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
|
||||
@ -3349,8 +3509,6 @@ def UMOVwh_pattern : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
|
||||
neon_uimm2_bare, UMOVwh>;
|
||||
def UMOVws_pattern : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
|
||||
neon_uimm1_bare, UMOVws>;
|
||||
def UMOVxd_pattern : Neon_UMOV_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
|
||||
neon_uimm0_bare, UMOVxd>;
|
||||
|
||||
def : Pat<(i32 (and
|
||||
(i32 (vector_extract
|
||||
@ -3389,3 +3547,40 @@ def : Pat<(i64 (zext
|
||||
(UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
|
||||
neon_uimm0_bare:$Imm)>;
|
||||
|
||||
// Additional copy patterns for scalar types
|
||||
def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
|
||||
(UMOVwb (v16i8
|
||||
(SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
|
||||
|
||||
def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
|
||||
(UMOVwh (v8i16
|
||||
(SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
|
||||
|
||||
def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
|
||||
(FMOVws FPR32:$Rn)>;
|
||||
|
||||
def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
|
||||
(FMOVxd FPR64:$Rn)>;
|
||||
|
||||
def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
|
||||
(f64 FPR64:$Rn)>;
|
||||
|
||||
def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))),
|
||||
(f32 FPR32:$Rn)>;
|
||||
|
||||
def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
|
||||
(v1i8 (EXTRACT_SUBREG (v16i8
|
||||
(INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
|
||||
sub_8))>;
|
||||
|
||||
def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
|
||||
(v1i16 (EXTRACT_SUBREG (v8i16
|
||||
(INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
|
||||
sub_16))>;
|
||||
|
||||
def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
|
||||
(FMOVsw $src)>;
|
||||
|
||||
def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
|
||||
(FMOVdx $src)>;
|
||||
|
||||
|
@ -133,19 +133,19 @@ foreach Index = 0-31 in {
|
||||
}
|
||||
|
||||
|
||||
def FPR8 : RegisterClass<"AArch64", [i8], 8,
|
||||
def FPR8 : RegisterClass<"AArch64", [i8, v1i8], 8,
|
||||
(sequence "B%u", 0, 31)> {
|
||||
}
|
||||
|
||||
def FPR16 : RegisterClass<"AArch64", [f16], 16,
|
||||
def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16,
|
||||
(sequence "H%u", 0, 31)> {
|
||||
}
|
||||
|
||||
def FPR32 : RegisterClass<"AArch64", [f32], 32,
|
||||
def FPR32 : RegisterClass<"AArch64", [f32, v1i32, v1f32], 32,
|
||||
(sequence "S%u", 0, 31)> {
|
||||
}
|
||||
|
||||
def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64],
|
||||
def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64],
|
||||
64, (sequence "D%u", 0, 31)>;
|
||||
|
||||
def FPR128 : RegisterClass<"AArch64",
|
||||
|
@ -118,15 +118,3 @@ define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) {
|
||||
ret <2 x double> %tmp3
|
||||
}
|
||||
|
||||
define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
|
||||
;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
%tmp3 = add <1 x i64> %A, %B;
|
||||
ret <1 x i64> %tmp3
|
||||
}
|
||||
|
||||
define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
|
||||
;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
%tmp3 = sub <1 x i64> %A, %B;
|
||||
ret <1 x i64> %tmp3
|
||||
}
|
||||
|
||||
|
@ -146,7 +146,7 @@ define i32 @umovw2s(<2 x i32> %tmp1) {
|
||||
}
|
||||
|
||||
define i64 @umovx1d(<1 x i64> %tmp1) {
|
||||
;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0]
|
||||
;CHECK: fmov {{x[0-31]+}}, {{d[0-31]+}}
|
||||
%tmp3 = extractelement <1 x i64> %tmp1, i32 0
|
||||
ret i64 %tmp3
|
||||
}
|
||||
|
@ -102,23 +102,6 @@ define <4 x i32> @test_srshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_urshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: urshl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_srshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: srshl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>)
|
||||
|
||||
|
@ -102,22 +102,7 @@ define <4 x i32> @test_sqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqadd_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: uqadd d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqadd_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sqadd d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
|
||||
@ -254,21 +239,3 @@ define <2 x i64> @test_sqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
|
||||
; CHECK: sqsub v0.2d, v0.2d, v1.2d
|
||||
ret <2 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqsub_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: uqsub d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqsub_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sqsub d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
|
@ -102,23 +102,6 @@ define <4 x i32> @test_sqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqrshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: uqrshl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqrshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sqrshl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>)
|
||||
|
||||
|
@ -102,23 +102,6 @@ define <4 x i32> @test_sqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: uqshl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sqshl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)
|
||||
|
||||
|
50
test/CodeGen/AArch64/neon-scalar-add-sub.ll
Normal file
50
test/CodeGen/AArch64/neon-scalar-add-sub.ll
Normal file
@ -0,0 +1,50 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
||||
|
||||
define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
|
||||
;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
%tmp3 = add <1 x i64> %A, %B;
|
||||
ret <1 x i64> %tmp3
|
||||
}
|
||||
|
||||
define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
|
||||
;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
%tmp3 = sub <1 x i64> %A, %B;
|
||||
ret <1 x i64> %tmp3
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_add_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_add_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_uadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uadd_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_sub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sub_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_usub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_usub_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
|
||||
|
103
test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
Normal file
103
test/CodeGen/AArch64/neon-scalar-reduce-pairwise.ll
Normal file
@ -0,0 +1,103 @@
|
||||
; RUN: llc -march=aarch64 -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>)
|
||||
|
||||
define <1 x i64> @test_addp_v1i64(<2 x i64> %a) {
|
||||
; CHECK: test_addp_v1i64:
|
||||
%val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a)
|
||||
; CHECK: addp d0, v0.2d
|
||||
ret <1 x i64> %val
|
||||
}
|
||||
|
||||
declare <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float>)
|
||||
|
||||
define <1 x float> @test_faddp_v1f32(<2 x float> %a) {
|
||||
; CHECK: test_faddp_v1f32:
|
||||
%val = call <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float> %a)
|
||||
; CHECK: faddp s0, v0.2s
|
||||
ret <1 x float> %val
|
||||
}
|
||||
|
||||
declare <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double>)
|
||||
|
||||
define <1 x double> @test_faddp_v1f64(<2 x double> %a) {
|
||||
; CHECK: test_faddp_v1f64:
|
||||
%val = call <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double> %a)
|
||||
; CHECK: faddp d0, v0.2d
|
||||
ret <1 x double> %val
|
||||
}
|
||||
|
||||
|
||||
declare <1 x float> @llvm.aarch64.neon.vpmax(<2 x float>)
|
||||
|
||||
define <1 x float> @test_fmaxp_v1f32(<2 x float> %a) {
|
||||
; CHECK: test_fmaxp_v1f32:
|
||||
%val = call <1 x float> @llvm.aarch64.neon.vpmax(<2 x float> %a)
|
||||
; CHECK: fmaxp s0, v0.2s
|
||||
ret <1 x float> %val
|
||||
}
|
||||
|
||||
declare <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double>)
|
||||
|
||||
define <1 x double> @test_fmaxp_v1f64(<2 x double> %a) {
|
||||
; CHECK: test_fmaxp_v1f64:
|
||||
%val = call <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double> %a)
|
||||
; CHECK: fmaxp d0, v0.2d
|
||||
ret <1 x double> %val
|
||||
}
|
||||
|
||||
|
||||
declare <1 x float> @llvm.aarch64.neon.vpmin(<2 x float>)
|
||||
|
||||
define <1 x float> @test_fminp_v1f32(<2 x float> %a) {
|
||||
; CHECK: test_fminp_v1f32:
|
||||
%val = call <1 x float> @llvm.aarch64.neon.vpmin(<2 x float> %a)
|
||||
; CHECK: fminp s0, v0.2s
|
||||
ret <1 x float> %val
|
||||
}
|
||||
|
||||
declare <1 x double> @llvm.aarch64.neon.vpminq(<2 x double>)
|
||||
|
||||
define <1 x double> @test_fminp_v1f64(<2 x double> %a) {
|
||||
; CHECK: test_fminp_v1f64:
|
||||
%val = call <1 x double> @llvm.aarch64.neon.vpminq(<2 x double> %a)
|
||||
; CHECK: fminp d0, v0.2d
|
||||
ret <1 x double> %val
|
||||
}
|
||||
|
||||
declare <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float>)
|
||||
|
||||
define <1 x float> @test_fmaxnmp_v1f32(<2 x float> %a) {
|
||||
; CHECK: test_fmaxnmp_v1f32:
|
||||
%val = call <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float> %a)
|
||||
; CHECK: fmaxnmp s0, v0.2s
|
||||
ret <1 x float> %val
|
||||
}
|
||||
|
||||
declare <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double>)
|
||||
|
||||
define <1 x double> @test_fmaxnmp_v1f64(<2 x double> %a) {
|
||||
; CHECK: test_fmaxnmp_v1f64:
|
||||
%val = call <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double> %a)
|
||||
; CHECK: fmaxnmp d0, v0.2d
|
||||
ret <1 x double> %val
|
||||
}
|
||||
|
||||
declare <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float>)
|
||||
|
||||
define <1 x float> @test_fminnmp_v1f32(<2 x float> %a) {
|
||||
; CHECK: test_fminnmp_v1f32:
|
||||
%val = call <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float> %a)
|
||||
; CHECK: fminnmp s0, v0.2s
|
||||
ret <1 x float> %val
|
||||
}
|
||||
|
||||
declare <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double>)
|
||||
|
||||
define <1 x double> @test_fminnmp_v1f64(<2 x double> %a) {
|
||||
; CHECK: test_fminnmp_v1f64:
|
||||
%val = call <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double> %a)
|
||||
; CHECK: fminnmp d0, v0.2d
|
||||
ret <1 x double> %val
|
||||
}
|
||||
|
39
test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
Normal file
39
test/CodeGen/AArch64/neon-scalar-rounding-shift.ll
Normal file
@ -0,0 +1,39 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_urshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_srshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_urshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_urshl_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_srshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_srshl_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
|
||||
|
171
test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
Normal file
171
test/CodeGen/AArch64/neon-scalar-saturating-add-sub.ll
Normal file
@ -0,0 +1,171 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqadd_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: uqadd d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqadd_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sqadd d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqsub_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: uqsub d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqsub_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sqsub d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>)
|
||||
declare <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8>, <1 x i8>)
|
||||
|
||||
define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
|
||||
; CHECK: test_uqadd_v1i8_aarch64:
|
||||
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
;CHECK: uqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
|
||||
ret <1 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
|
||||
; CHECK: test_sqadd_v1i8_aarch64:
|
||||
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
;CHECK: sqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
|
||||
ret <1 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>)
|
||||
declare <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>)
|
||||
|
||||
define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
|
||||
; CHECK: test_uqsub_v1i8_aarch64:
|
||||
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
;CHECK: uqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
|
||||
ret <1 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
|
||||
; CHECK: test_sqsub_v1i8_aarch64:
|
||||
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
;CHECK: sqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
|
||||
ret <1 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>)
|
||||
declare <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16>, <1 x i16>)
|
||||
|
||||
define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
|
||||
; CHECK: test_uqadd_v1i16_aarch64:
|
||||
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
;CHECK: uqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
|
||||
ret <1 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
|
||||
; CHECK: test_sqadd_v1i16_aarch64:
|
||||
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
;CHECK: sqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
|
||||
ret <1 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>)
|
||||
declare <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>)
|
||||
|
||||
define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
|
||||
; CHECK: test_uqsub_v1i16_aarch64:
|
||||
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
;CHECK: uqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
|
||||
ret <1 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
|
||||
; CHECK: test_sqsub_v1i16_aarch64:
|
||||
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
;CHECK: sqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
|
||||
ret <1 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>)
|
||||
declare <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32>, <1 x i32>)
|
||||
|
||||
define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
|
||||
; CHECK: test_uqadd_v1i32_aarch64:
|
||||
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
;CHECK: uqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
|
||||
ret <1 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
|
||||
; CHECK: test_sqadd_v1i32_aarch64:
|
||||
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
;CHECK: sqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
|
||||
ret <1 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>)
|
||||
declare <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>)
|
||||
|
||||
define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
|
||||
; CHECK: test_uqsub_v1i32_aarch64:
|
||||
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
;CHECK: uqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
|
||||
ret <1 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
|
||||
; CHECK: test_sqsub_v1i32_aarch64:
|
||||
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
;CHECK: sqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
|
||||
ret <1 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqadd_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: uqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqadd_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: sqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqsub_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: uqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqsub_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
@ -0,0 +1,94 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqrshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqrshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8>, <1 x i8>)
|
||||
declare <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8>, <1 x i8>)
|
||||
|
||||
define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
|
||||
; CHECK: test_uqrshl_v1i8_aarch64:
|
||||
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
;CHECK: uqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
|
||||
|
||||
ret <1 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i8> @test_sqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
|
||||
; CHECK: test_sqrshl_v1i8_aarch64:
|
||||
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
;CHECK: sqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
|
||||
ret <1 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16>, <1 x i16>)
|
||||
declare <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16>, <1 x i16>)
|
||||
|
||||
define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
|
||||
; CHECK: test_uqrshl_v1i16_aarch64:
|
||||
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
;CHECK: uqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
|
||||
|
||||
ret <1 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i16> @test_sqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
|
||||
; CHECK: test_sqrshl_v1i16_aarch64:
|
||||
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
;CHECK: sqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
|
||||
ret <1 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32>, <1 x i32>)
|
||||
declare <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32>, <1 x i32>)
|
||||
|
||||
define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
|
||||
; CHECK: test_uqrshl_v1i32_aarch64:
|
||||
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
;CHECK: uqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
|
||||
|
||||
ret <1 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i32> @test_sqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
|
||||
; CHECK: test_sqrshl_v1i32_aarch64:
|
||||
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
;CHECK: sqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
|
||||
ret <1 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqrshl_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqrshl_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
|
||||
|
88
test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
Normal file
88
test/CodeGen/AArch64/neon-scalar-saturating-shift.ll
Normal file
@ -0,0 +1,88 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8>, <1 x i8>)
|
||||
declare <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8>, <1 x i8>)
|
||||
|
||||
define <1 x i8> @test_uqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
|
||||
; CHECK: test_uqshl_v1i8_aarch64:
|
||||
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
;CHECK: uqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
|
||||
ret <1 x i8> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i8> @test_sqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
|
||||
; CHECK: test_sqshl_v1i8_aarch64:
|
||||
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
|
||||
;CHECK: sqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
|
||||
ret <1 x i8> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16>, <1 x i16>)
|
||||
declare <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16>, <1 x i16>)
|
||||
|
||||
define <1 x i16> @test_uqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
|
||||
; CHECK: test_uqshl_v1i16_aarch64:
|
||||
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
;CHECK: uqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
|
||||
ret <1 x i16> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i16> @test_sqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
|
||||
; CHECK: test_sqshl_v1i16_aarch64:
|
||||
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
|
||||
;CHECK: sqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
|
||||
ret <1 x i16> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32>, <1 x i32>)
|
||||
declare <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32>, <1 x i32>)
|
||||
|
||||
define <1 x i32> @test_uqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
|
||||
; CHECK: test_uqshl_v1i32_aarch64:
|
||||
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
;CHECK: uqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
|
||||
ret <1 x i32> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i32> @test_sqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
|
||||
; CHECK: test_sqshl_v1i32_aarch64:
|
||||
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
|
||||
;CHECK: sqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
|
||||
ret <1 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_uqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_uqshl_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sqshl_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
|
38
test/CodeGen/AArch64/neon-scalar-shift.ll
Normal file
38
test/CodeGen/AArch64/neon-scalar-shift.ll
Normal file
@ -0,0 +1,38 @@
|
||||
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_ushl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_ushl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_ushl_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sshl_v1i64_aarch64:
|
||||
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
|
@ -102,23 +102,6 @@ define <4 x i32> @test_sshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
|
||||
ret <4 x i32> %tmp1
|
||||
}
|
||||
|
||||
declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
|
||||
declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
|
||||
|
||||
define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_ushl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: ushl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
|
||||
; CHECK: test_sshl_v1i64:
|
||||
%tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
|
||||
; CHECK: sshl d0, d0, d1
|
||||
ret <1 x i64> %tmp1
|
||||
}
|
||||
|
||||
declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>)
|
||||
declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>)
|
||||
|
||||
|
@ -32,4 +32,3 @@
|
||||
// CHECK: faddp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0x22,0x2e]
|
||||
// CHECK: faddp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0x22,0x6e]
|
||||
// CHECK: faddp v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0x62,0x6e]
|
||||
|
||||
|
@ -64,19 +64,5 @@
|
||||
// CHECK: fsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0xa2,0x4e]
|
||||
// CHECK: fsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0xe2,0x4e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Add
|
||||
//------------------------------------------------------------------------------
|
||||
add d31, d0, d16
|
||||
|
||||
// CHECK: add d31, d0, d16 // encoding: [0x1f,0x84,0xf0,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Sub
|
||||
//------------------------------------------------------------------------------
|
||||
sub d1, d7, d8
|
||||
|
||||
// CHECK: sub d1, d7, d8 // encoding: [0xe1,0x84,0xe8,0x7e]
|
||||
|
||||
|
||||
|
||||
|
@ -2747,3 +2747,105 @@
|
||||
// CHECK-ERROR: rsubhn2 v0.4s, v1.2d, v2.2s
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Reduce Add Pairwise (Integer)
|
||||
//----------------------------------------------------------------------
|
||||
// invalid vector types
|
||||
addp s0, d1.2d
|
||||
addp d0, d1.2s
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: addp s0, d1.2d
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: addp d0, d1.2s
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Reduce Add Pairwise (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
// invalid vector types
|
||||
faddp s0, d1.2d
|
||||
faddp d0, d1.2s
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: faddp s0, d1.2d
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: faddp d0, d1.2s
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Reduce Maximum Pairwise (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
// mismatched and invalid vector types
|
||||
fmaxp s0, v1.2d
|
||||
fmaxp d31, v2.2s
|
||||
fmaxp h3, v2.2s
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fmaxp s0, v1.2d
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fmaxp d31, v2.2s
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fmaxp h3, v2.2s
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Reduce Minimum Pairwise (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
// mismatched and invalid vector types
|
||||
fminp s0, v1.4h
|
||||
fminp d31, v2.8h
|
||||
fminp b3, v2.2s
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fminp s0, v1.4h
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fminp d31, v2.8h
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fminp b3, v2.2s
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Reduce maxNum Pairwise (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
// mismatched and invalid vector types
|
||||
fmaxnmp s0, v1.8b
|
||||
fmaxnmp d31, v2.16b
|
||||
fmaxnmp v1.2s, v2.2s
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fmaxnmp s0, v1.8b
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fmaxnmp d31, v2.16b
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: too few operands for instruction
|
||||
// CHECK-ERROR: fmaxnmp v1.2s, v2.2s
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Reduce minNum Pairwise (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
// mismatched and invalid vector types
|
||||
fminnmp s0, v1.2d
|
||||
fminnmp d31, v2.4s
|
||||
fminnmp v1.4s, v2.2d
|
||||
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fminnmp s0, v1.2d
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fminnmp d31, v2.4s
|
||||
// CHECK-ERROR: ^
|
||||
// CHECK-ERROR: error: invalid operand for instruction
|
||||
// CHECK-ERROR: fminnmp v1.4s, v2.2d
|
||||
// CHECK-ERROR: ^
|
||||
|
||||
|
@ -41,17 +41,5 @@
|
||||
// CHECK: urshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x54,0xa2,0x6e]
|
||||
// CHECK: urshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x54,0xe2,0x6e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Rounding Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
srshl d17, d31, d8
|
||||
|
||||
// CHECK: srshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Rounding Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
urshl d17, d31, d8
|
||||
|
||||
// CHECK: urshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x7e]
|
||||
|
||||
|
@ -79,55 +79,4 @@
|
||||
// CHECK: uqsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x2c,0xa2,0x6e]
|
||||
// CHECK: uqsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0x2c,0xe2,0x6e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Add (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqadd b0, b1, b2
|
||||
sqadd h10, h11, h12
|
||||
sqadd s20, s21, s2
|
||||
sqadd d17, d31, d8
|
||||
|
||||
// CHECK: sqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x5e]
|
||||
// CHECK: sqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x5e]
|
||||
// CHECK: sqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x5e]
|
||||
// CHECK: sqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Add (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqadd b0, b1, b2
|
||||
uqadd h10, h11, h12
|
||||
uqadd s20, s21, s2
|
||||
uqadd d17, d31, d8
|
||||
|
||||
// CHECK: uqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x7e]
|
||||
// CHECK: uqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x7e]
|
||||
// CHECK: uqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x7e]
|
||||
// CHECK: uqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x7e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Sub (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqsub b0, b1, b2
|
||||
sqsub h10, h11, h12
|
||||
sqsub s20, s21, s2
|
||||
sqsub d17, d31, d8
|
||||
|
||||
// CHECK: sqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x5e]
|
||||
// CHECK: sqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x5e]
|
||||
// CHECK: sqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x5e]
|
||||
// CHECK: sqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Sub (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqsub b0, b1, b2
|
||||
uqsub h10, h11, h12
|
||||
uqsub s20, s21, s2
|
||||
uqsub d17, d31, d8
|
||||
|
||||
// CHECK: uqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x7e]
|
||||
// CHECK: uqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x7e]
|
||||
// CHECK: uqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x7e]
|
||||
// CHECK: uqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x7e]
|
||||
|
||||
|
@ -41,30 +41,3 @@
|
||||
// CHECK: uqrshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x5c,0xa2,0x6e]
|
||||
// CHECK: uqrshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x5c,0xe2,0x6e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Rounding Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqrshl b0, b1, b2
|
||||
sqrshl h10, h11, h12
|
||||
sqrshl s20, s21, s2
|
||||
sqrshl d17, d31, d8
|
||||
|
||||
// CHECK: sqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x5e]
|
||||
// CHECK: sqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x5e]
|
||||
// CHECK: sqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x5e]
|
||||
// CHECK: sqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Rounding Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqrshl b0, b1, b2
|
||||
uqrshl h10, h11, h12
|
||||
uqrshl s20, s21, s2
|
||||
uqrshl d17, d31, d8
|
||||
|
||||
// CHECK: uqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x7e]
|
||||
// CHECK: uqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x7e]
|
||||
// CHECK: uqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x7e]
|
||||
// CHECK: uqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x7e]
|
||||
|
||||
|
||||
|
@ -41,29 +41,3 @@
|
||||
// CHECK: uqshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x4c,0xa2,0x6e]
|
||||
// CHECK: uqshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x4c,0xe2,0x6e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqshl b0, b1, b2
|
||||
sqshl h10, h11, h12
|
||||
sqshl s20, s21, s2
|
||||
sqshl d17, d31, d8
|
||||
|
||||
// CHECK: sqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x5e]
|
||||
// CHECK: sqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x5e]
|
||||
// CHECK: sqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x5e]
|
||||
// CHECK: sqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqshl b0, b1, b2
|
||||
uqshl h10, h11, h12
|
||||
uqshl s20, s21, s2
|
||||
uqshl d17, d31, d8
|
||||
|
||||
// CHECK: uqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x7e]
|
||||
// CHECK: uqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x7e]
|
||||
// CHECK: uqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x7e]
|
||||
// CHECK: uqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x7e]
|
||||
|
||||
|
16
test/MC/AArch64/neon-scalar-add-sub.s
Normal file
16
test/MC/AArch64/neon-scalar-add-sub.s
Normal file
@ -0,0 +1,16 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Add
|
||||
//------------------------------------------------------------------------------
|
||||
add d31, d0, d16
|
||||
|
||||
// CHECK: add d31, d0, d16 // encoding: [0x1f,0x84,0xf0,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Sub
|
||||
//------------------------------------------------------------------------------
|
||||
sub d1, d7, d8
|
||||
|
||||
// CHECK: sub d1, d7, d8 // encoding: [0xe1,0x84,0xe8,0x7e]
|
||||
|
16
test/MC/AArch64/neon-scalar-reduce-pairwise.s
Normal file
16
test/MC/AArch64/neon-scalar-reduce-pairwise.s
Normal file
@ -0,0 +1,16 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Reduce Add Pairwise (Integer)
|
||||
//----------------------------------------------------------------------
|
||||
addp d0, v1.2d
|
||||
|
||||
// CHECK: addp d0, v1.2d // encoding: [0x20,0xb8,0xf1,0x5e]
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
// Scalar Reduce Add Pairwise (Floating Point)
|
||||
//----------------------------------------------------------------------
|
||||
faddp d20, v1.2d
|
||||
|
||||
// CHECK: faddp d20, v1.2d // encoding: [0x34,0xd8,0x70,0x7e]
|
||||
|
17
test/MC/AArch64/neon-scalar-rounding-shift.s
Normal file
17
test/MC/AArch64/neon-scalar-rounding-shift.s
Normal file
@ -0,0 +1,17 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Rounding Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
srshl d17, d31, d8
|
||||
|
||||
// CHECK: srshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Rounding Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
urshl d17, d31, d8
|
||||
|
||||
// CHECK: urshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x7e]
|
||||
|
54
test/MC/AArch64/neon-scalar-saturating-add-sub.s
Normal file
54
test/MC/AArch64/neon-scalar-saturating-add-sub.s
Normal file
@ -0,0 +1,54 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Add (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqadd b0, b1, b2
|
||||
sqadd h10, h11, h12
|
||||
sqadd s20, s21, s2
|
||||
sqadd d17, d31, d8
|
||||
|
||||
// CHECK: sqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x5e]
|
||||
// CHECK: sqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x5e]
|
||||
// CHECK: sqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x5e]
|
||||
// CHECK: sqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Add (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqadd b0, b1, b2
|
||||
uqadd h10, h11, h12
|
||||
uqadd s20, s21, s2
|
||||
uqadd d17, d31, d8
|
||||
|
||||
// CHECK: uqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x7e]
|
||||
// CHECK: uqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x7e]
|
||||
// CHECK: uqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x7e]
|
||||
// CHECK: uqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x7e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Sub (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqsub b0, b1, b2
|
||||
sqsub h10, h11, h12
|
||||
sqsub s20, s21, s2
|
||||
sqsub d17, d31, d8
|
||||
|
||||
// CHECK: sqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x5e]
|
||||
// CHECK: sqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x5e]
|
||||
// CHECK: sqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x5e]
|
||||
// CHECK: sqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Sub (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqsub b0, b1, b2
|
||||
uqsub h10, h11, h12
|
||||
uqsub s20, s21, s2
|
||||
uqsub d17, d31, d8
|
||||
|
||||
// CHECK: uqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x7e]
|
||||
// CHECK: uqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x7e]
|
||||
// CHECK: uqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x7e]
|
||||
// CHECK: uqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x7e]
|
||||
|
28
test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
Normal file
28
test/MC/AArch64/neon-scalar-saturating-rounding-shift.s
Normal file
@ -0,0 +1,28 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Rounding Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqrshl b0, b1, b2
|
||||
sqrshl h10, h11, h12
|
||||
sqrshl s20, s21, s2
|
||||
sqrshl d17, d31, d8
|
||||
|
||||
// CHECK: sqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x5e]
|
||||
// CHECK: sqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x5e]
|
||||
// CHECK: sqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x5e]
|
||||
// CHECK: sqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Rounding Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqrshl b0, b1, b2
|
||||
uqrshl h10, h11, h12
|
||||
uqrshl s20, s21, s2
|
||||
uqrshl d17, d31, d8
|
||||
|
||||
// CHECK: uqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x7e]
|
||||
// CHECK: uqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x7e]
|
||||
// CHECK: uqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x7e]
|
||||
// CHECK: uqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x7e]
|
||||
|
29
test/MC/AArch64/neon-scalar-saturating-shift.s
Normal file
29
test/MC/AArch64/neon-scalar-saturating-shift.s
Normal file
@ -0,0 +1,29 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sqshl b0, b1, b2
|
||||
sqshl h10, h11, h12
|
||||
sqshl s20, s21, s2
|
||||
sqshl d17, d31, d8
|
||||
|
||||
// CHECK: sqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x5e]
|
||||
// CHECK: sqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x5e]
|
||||
// CHECK: sqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x5e]
|
||||
// CHECK: sqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Saturating Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
uqshl b0, b1, b2
|
||||
uqshl h10, h11, h12
|
||||
uqshl s20, s21, s2
|
||||
uqshl d17, d31, d8
|
||||
|
||||
// CHECK: uqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x7e]
|
||||
// CHECK: uqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x7e]
|
||||
// CHECK: uqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x7e]
|
||||
// CHECK: uqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x7e]
|
||||
|
||||
|
16
test/MC/AArch64/neon-scalar-shift.s
Normal file
16
test/MC/AArch64/neon-scalar-shift.s
Normal file
@ -0,0 +1,16 @@
|
||||
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sshl d17, d31, d8
|
||||
|
||||
// CHECK: sshl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
ushl d17, d31, d8
|
||||
|
||||
// CHECK: ushl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x7e]
|
||||
|
@ -41,20 +41,6 @@
|
||||
// CHECK: ushl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x44,0xa2,0x6e]
|
||||
// CHECK: ushl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x44,0xe2,0x6e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Shift Lef (Signed)
|
||||
//------------------------------------------------------------------------------
|
||||
sshl d17, d31, d8
|
||||
|
||||
// CHECK: sshl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x5e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Scalar Integer Shift Lef (Unsigned)
|
||||
//------------------------------------------------------------------------------
|
||||
ushl d17, d31, d8
|
||||
|
||||
// CHECK: ushl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x7e]
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Vector Integer Shift Left by Immediate
|
||||
//------------------------------------------------------------------------------
|
||||
|
@ -75,6 +75,7 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
|
||||
case MVT::v16i1: return "MVT::v16i1";
|
||||
case MVT::v32i1: return "MVT::v32i1";
|
||||
case MVT::v64i1: return "MVT::v64i1";
|
||||
case MVT::v1i8: return "MVT::v1i8";
|
||||
case MVT::v2i8: return "MVT::v2i8";
|
||||
case MVT::v4i8: return "MVT::v4i8";
|
||||
case MVT::v8i8: return "MVT::v8i8";
|
||||
@ -99,10 +100,12 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
|
||||
case MVT::v16i64: return "MVT::v16i64";
|
||||
case MVT::v2f16: return "MVT::v2f16";
|
||||
case MVT::v8f16: return "MVT::v8f16";
|
||||
case MVT::v1f32: return "MVT::v1f32";
|
||||
case MVT::v2f32: return "MVT::v2f32";
|
||||
case MVT::v4f32: return "MVT::v4f32";
|
||||
case MVT::v8f32: return "MVT::v8f32";
|
||||
case MVT::v16f32: return "MVT::v16f32";
|
||||
case MVT::v1f64: return "MVT::v1f64";
|
||||
case MVT::v2f64: return "MVT::v2f64";
|
||||
case MVT::v4f64: return "MVT::v4f64";
|
||||
case MVT::v8f64: return "MVT::v8f64";
|
||||
|
@ -260,7 +260,8 @@ enum IIT_Info {
|
||||
IIT_STRUCT5 = 22,
|
||||
IIT_EXTEND_VEC_ARG = 23,
|
||||
IIT_TRUNC_VEC_ARG = 24,
|
||||
IIT_ANYPTR = 25
|
||||
IIT_ANYPTR = 25,
|
||||
IIT_V1 = 26
|
||||
};
|
||||
|
||||
|
||||
@ -350,6 +351,7 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
|
||||
EVT VVT = VT;
|
||||
switch (VVT.getVectorNumElements()) {
|
||||
default: PrintFatalError("unhandled vector type width in intrinsic!");
|
||||
case 1: Sig.push_back(IIT_V1); break;
|
||||
case 2: Sig.push_back(IIT_V2); break;
|
||||
case 4: Sig.push_back(IIT_V4); break;
|
||||
case 8: Sig.push_back(IIT_V8); break;
|
||||
|
Loading…
Reference in New Issue
Block a user