Initial support for Neon scalar instructions.

Patch by Ana Pazos.

1.Added support for v1ix and v1fx types.
2.Added Scalar Pairwise Reduce instructions.
3.Added initial implementation of Scalar Arithmetic instructions.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191263 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jiangning Liu 2013-09-24 02:47:27 +00:00
parent 44e8441767
commit 477fc628b3
44 changed files with 1320 additions and 388 deletions

View File

@ -67,41 +67,44 @@ namespace llvm {
v32i1 = 17, // 32 x i1 v32i1 = 17, // 32 x i1
v64i1 = 18, // 64 x i1 v64i1 = 18, // 64 x i1
v2i8 = 19, // 2 x i8 v1i8 = 19, // 1 x i8
v4i8 = 20, // 4 x i8 v2i8 = 20, // 2 x i8
v8i8 = 21, // 8 x i8 v4i8 = 21, // 4 x i8
v16i8 = 22, // 16 x i8 v8i8 = 22, // 8 x i8
v32i8 = 23, // 32 x i8 v16i8 = 23, // 16 x i8
v64i8 = 24, // 64 x i8 v32i8 = 24, // 32 x i8
v1i16 = 25, // 1 x i16 v64i8 = 25, // 64 x i8
v2i16 = 26, // 2 x i16 v1i16 = 26, // 1 x i16
v4i16 = 27, // 4 x i16 v2i16 = 27, // 2 x i16
v8i16 = 28, // 8 x i16 v4i16 = 28, // 4 x i16
v16i16 = 29, // 16 x i16 v8i16 = 29, // 8 x i16
v32i16 = 30, // 32 x i16 v16i16 = 30, // 16 x i16
v1i32 = 31, // 1 x i32 v32i16 = 31, // 32 x i16
v2i32 = 32, // 2 x i32 v1i32 = 32, // 1 x i32
v4i32 = 33, // 4 x i32 v2i32 = 33, // 2 x i32
v8i32 = 34, // 8 x i32 v4i32 = 34, // 4 x i32
v16i32 = 35, // 16 x i32 v8i32 = 35, // 8 x i32
v1i64 = 36, // 1 x i64 v16i32 = 36, // 16 x i32
v2i64 = 37, // 2 x i64 v1i64 = 37, // 1 x i64
v4i64 = 38, // 4 x i64 v2i64 = 38, // 2 x i64
v8i64 = 39, // 8 x i64 v4i64 = 39, // 4 x i64
v16i64 = 40, // 16 x i64 v8i64 = 40, // 8 x i64
v16i64 = 41, // 16 x i64
FIRST_INTEGER_VECTOR_VALUETYPE = v2i1, FIRST_INTEGER_VECTOR_VALUETYPE = v2i1,
LAST_INTEGER_VECTOR_VALUETYPE = v16i64, LAST_INTEGER_VECTOR_VALUETYPE = v16i64,
v2f16 = 41, // 2 x f16 v2f16 = 42, // 2 x f16
v8f16 = 42, // 8 x f16 v8f16 = 43, // 8 x f16
v2f32 = 43, // 2 x f32 v1f32 = 44, // 1 x f32
v4f32 = 44, // 4 x f32 v2f32 = 45, // 2 x f32
v8f32 = 45, // 8 x f32 v4f32 = 46, // 4 x f32
v16f32 = 46, // 16 x f32 v8f32 = 47, // 8 x f32
v2f64 = 47, // 2 x f64 v16f32 = 48, // 16 x f32
v4f64 = 48, // 4 x f64 v1f64 = 49, // 1 x f64
v8f64 = 49, // 8 x f64 v2f64 = 50, // 2 x f64
v4f64 = 51, // 4 x f64
v8f64 = 52, // 8 x f64
FIRST_FP_VECTOR_VALUETYPE = v2f16, FIRST_FP_VECTOR_VALUETYPE = v2f16,
LAST_FP_VECTOR_VALUETYPE = v8f64, LAST_FP_VECTOR_VALUETYPE = v8f64,
@ -109,17 +112,17 @@ namespace llvm {
FIRST_VECTOR_VALUETYPE = v2i1, FIRST_VECTOR_VALUETYPE = v2i1,
LAST_VECTOR_VALUETYPE = v8f64, LAST_VECTOR_VALUETYPE = v8f64,
x86mmx = 50, // This is an X86 MMX value x86mmx = 53, // This is an X86 MMX value
Glue = 51, // This glues nodes together during pre-RA sched Glue = 54, // This glues nodes together during pre-RA sched
isVoid = 52, // This has no value isVoid = 55, // This has no value
Untyped = 53, // This value takes a register, but has Untyped = 56, // This value takes a register, but has
// unspecified type. The register class // unspecified type. The register class
// will be determined by the opcode. // will be determined by the opcode.
LAST_VALUETYPE = 54, // This always remains at the end of the list. LAST_VALUETYPE = 57, // This always remains at the end of the list.
// This is the current maximum for LAST_VALUETYPE. // This is the current maximum for LAST_VALUETYPE.
// MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@ -266,6 +269,7 @@ namespace llvm {
case v16i1 : case v16i1 :
case v32i1 : case v32i1 :
case v64i1: return i1; case v64i1: return i1;
case v1i8 :
case v2i8 : case v2i8 :
case v4i8 : case v4i8 :
case v8i8 : case v8i8 :
@ -290,10 +294,12 @@ namespace llvm {
case v16i64: return i64; case v16i64: return i64;
case v2f16: case v2f16:
case v8f16: return f16; case v8f16: return f16;
case v1f32:
case v2f32: case v2f32:
case v4f32: case v4f32:
case v8f32: case v8f32:
case v16f32: return f32; case v16f32: return f32;
case v1f64:
case v2f64: case v2f64:
case v4f64: case v4f64:
case v8f64: return f64; case v8f64: return f64;
@ -338,9 +344,12 @@ namespace llvm {
case v2f16: case v2f16:
case v2f32: case v2f32:
case v2f64: return 2; case v2f64: return 2;
case v1i8:
case v1i16: case v1i16:
case v1i32: case v1i32:
case v1i64: return 1; case v1i64:
case v1f32:
case v1f64: return 1;
} }
} }
@ -363,6 +372,7 @@ namespace llvm {
case v2i1: return 2; case v2i1: return 2;
case v4i1: return 4; case v4i1: return 4;
case i8 : case i8 :
case v1i8:
case v8i1: return 8; case v8i1: return 8;
case i16 : case i16 :
case f16: case f16:
@ -375,6 +385,7 @@ namespace llvm {
case v4i8: case v4i8:
case v2i16: case v2i16:
case v2f16: case v2f16:
case v1f32:
case v1i32: return 32; case v1i32: return 32;
case x86mmx: case x86mmx:
case f64 : case f64 :
@ -384,7 +395,8 @@ namespace llvm {
case v4i16: case v4i16:
case v2i32: case v2i32:
case v1i64: case v1i64:
case v2f32: return 64; case v2f32:
case v1f64: return 64;
case f80 : return 80; case f80 : return 80;
case f128: case f128:
case ppcf128: case ppcf128:
@ -494,6 +506,7 @@ namespace llvm {
if (NumElements == 64) return MVT::v64i1; if (NumElements == 64) return MVT::v64i1;
break; break;
case MVT::i8: case MVT::i8:
if (NumElements == 1) return MVT::v1i8;
if (NumElements == 2) return MVT::v2i8; if (NumElements == 2) return MVT::v2i8;
if (NumElements == 4) return MVT::v4i8; if (NumElements == 4) return MVT::v4i8;
if (NumElements == 8) return MVT::v8i8; if (NumElements == 8) return MVT::v8i8;
@ -528,12 +541,14 @@ namespace llvm {
if (NumElements == 8) return MVT::v8f16; if (NumElements == 8) return MVT::v8f16;
break; break;
case MVT::f32: case MVT::f32:
if (NumElements == 1) return MVT::v1f32;
if (NumElements == 2) return MVT::v2f32; if (NumElements == 2) return MVT::v2f32;
if (NumElements == 4) return MVT::v4f32; if (NumElements == 4) return MVT::v4f32;
if (NumElements == 8) return MVT::v8f32; if (NumElements == 8) return MVT::v8f32;
if (NumElements == 16) return MVT::v16f32; if (NumElements == 16) return MVT::v16f32;
break; break;
case MVT::f64: case MVT::f64:
if (NumElements == 1) return MVT::v1f64;
if (NumElements == 2) return MVT::v2f64; if (NumElements == 2) return MVT::v2f64;
if (NumElements == 4) return MVT::v4f64; if (NumElements == 4) return MVT::v4f64;
if (NumElements == 8) return MVT::v8f64; if (NumElements == 8) return MVT::v8f64;

View File

@ -39,44 +39,47 @@ def v8i1 : ValueType<8 , 15>; // 8 x i1 vector value
def v16i1 : ValueType<16, 16>; // 16 x i1 vector value def v16i1 : ValueType<16, 16>; // 16 x i1 vector value
def v32i1 : ValueType<32 , 17>; // 32 x i1 vector value def v32i1 : ValueType<32 , 17>; // 32 x i1 vector value
def v64i1 : ValueType<64 , 18>; // 64 x i1 vector value def v64i1 : ValueType<64 , 18>; // 64 x i1 vector value
def v2i8 : ValueType<16 , 19>; // 2 x i8 vector value def v1i8 : ValueType<16, 19>; // 1 x i8 vector value
def v4i8 : ValueType<32 , 20>; // 4 x i8 vector value def v2i8 : ValueType<16 , 20>; // 2 x i8 vector value
def v8i8 : ValueType<64 , 21>; // 8 x i8 vector value def v4i8 : ValueType<32 , 21>; // 4 x i8 vector value
def v16i8 : ValueType<128, 22>; // 16 x i8 vector value def v8i8 : ValueType<64 , 22>; // 8 x i8 vector value
def v32i8 : ValueType<256, 23>; // 32 x i8 vector value def v16i8 : ValueType<128, 23>; // 16 x i8 vector value
def v64i8 : ValueType<512, 24>; // 64 x i8 vector value def v32i8 : ValueType<256, 24>; // 32 x i8 vector value
def v1i16 : ValueType<16 , 25>; // 1 x i16 vector value def v64i8 : ValueType<512, 25>; // 64 x i8 vector value
def v2i16 : ValueType<32 , 26>; // 2 x i16 vector value def v1i16 : ValueType<16 , 26>; // 1 x i16 vector value
def v4i16 : ValueType<64 , 27>; // 4 x i16 vector value def v2i16 : ValueType<32 , 27>; // 2 x i16 vector value
def v8i16 : ValueType<128, 28>; // 8 x i16 vector value def v4i16 : ValueType<64 , 28>; // 4 x i16 vector value
def v16i16 : ValueType<256, 29>; // 16 x i16 vector value def v8i16 : ValueType<128, 29>; // 8 x i16 vector value
def v32i16 : ValueType<512, 30>; // 32 x i16 vector value def v16i16 : ValueType<256, 30>; // 16 x i16 vector value
def v1i32 : ValueType<32 , 31>; // 1 x i32 vector value def v32i16 : ValueType<512, 31>; // 32 x i16 vector value
def v2i32 : ValueType<64 , 32>; // 2 x i32 vector value def v1i32 : ValueType<32 , 32>; // 1 x i32 vector value
def v4i32 : ValueType<128, 33>; // 4 x i32 vector value def v2i32 : ValueType<64 , 33>; // 2 x i32 vector value
def v8i32 : ValueType<256, 34>; // 8 x i32 vector value def v4i32 : ValueType<128, 34>; // 4 x i32 vector value
def v16i32 : ValueType<512, 35>; // 16 x i32 vector value def v8i32 : ValueType<256, 35>; // 8 x i32 vector value
def v1i64 : ValueType<64 , 36>; // 1 x i64 vector value def v16i32 : ValueType<512, 36>; // 16 x i32 vector value
def v2i64 : ValueType<128, 37>; // 2 x i64 vector value def v1i64 : ValueType<64 , 37>; // 1 x i64 vector value
def v4i64 : ValueType<256, 38>; // 4 x i64 vector value def v2i64 : ValueType<128, 38>; // 2 x i64 vector value
def v8i64 : ValueType<512, 39>; // 8 x i64 vector value def v4i64 : ValueType<256, 39>; // 4 x i64 vector value
def v16i64 : ValueType<1024,40>; // 16 x i64 vector value def v8i64 : ValueType<512, 40>; // 8 x i64 vector value
def v16i64 : ValueType<1024,41>; // 16 x i64 vector value
def v2f16 : ValueType<32 , 41>; // 2 x f16 vector value def v2f16 : ValueType<32 , 42>; // 2 x f16 vector value
def v8f16 : ValueType<128, 42>; // 8 x f16 vector value def v8f16 : ValueType<128, 43>; // 8 x f16 vector value
def v2f32 : ValueType<64 , 43>; // 2 x f32 vector value def v1f32 : ValueType<32 , 44>; // 1 x f32 vector value
def v4f32 : ValueType<128, 44>; // 4 x f32 vector value def v2f32 : ValueType<64 , 45>; // 2 x f32 vector value
def v8f32 : ValueType<256, 45>; // 8 x f32 vector value def v4f32 : ValueType<128, 46>; // 4 x f32 vector value
def v16f32 : ValueType<512, 46>; // 16 x f32 vector value def v8f32 : ValueType<256, 47>; // 8 x f32 vector value
def v2f64 : ValueType<128, 47>; // 2 x f64 vector value def v16f32 : ValueType<512, 48>; // 16 x f32 vector value
def v4f64 : ValueType<256, 48>; // 4 x f64 vector value def v1f64 : ValueType<64, 49>; // 1 x f64 vector value
def v8f64 : ValueType<512, 49>; // 8 x f64 vector value def v2f64 : ValueType<128, 50>; // 2 x f64 vector value
def v4f64 : ValueType<256, 51>; // 4 x f64 vector value
def v8f64 : ValueType<512, 52>; // 8 x f64 vector value
def x86mmx : ValueType<64 , 50>; // X86 MMX value def x86mmx : ValueType<64 , 53>; // X86 MMX value
def FlagVT : ValueType<0 , 51>; // Pre-RA sched glue def FlagVT : ValueType<0 , 54>; // Pre-RA sched glue
def isVoid : ValueType<0 , 52>; // Produces no value def isVoid : ValueType<0 , 55>; // Produces no value
def untyped: ValueType<8 , 53>; // Produces an untyped value def untyped: ValueType<8 , 56>; // Produces an untyped value
def MetadataVT: ValueType<0, 250>; // Metadata def MetadataVT: ValueType<0, 250>; // Metadata
// Pseudo valuetype mapped to the current pointer size to any address space. // Pseudo valuetype mapped to the current pointer size to any address space.

View File

@ -140,6 +140,7 @@ def llvm_v8i1_ty : LLVMType<v8i1>; // 8 x i1
def llvm_v16i1_ty : LLVMType<v16i1>; // 16 x i1 def llvm_v16i1_ty : LLVMType<v16i1>; // 16 x i1
def llvm_v32i1_ty : LLVMType<v32i1>; // 32 x i1 def llvm_v32i1_ty : LLVMType<v32i1>; // 32 x i1
def llvm_v64i1_ty : LLVMType<v64i1>; // 64 x i1 def llvm_v64i1_ty : LLVMType<v64i1>; // 64 x i1
def llvm_v1i8_ty : LLVMType<v1i8>; // 1 x i8
def llvm_v2i8_ty : LLVMType<v2i8>; // 2 x i8 def llvm_v2i8_ty : LLVMType<v2i8>; // 2 x i8
def llvm_v4i8_ty : LLVMType<v4i8>; // 4 x i8 def llvm_v4i8_ty : LLVMType<v4i8>; // 4 x i8
def llvm_v8i8_ty : LLVMType<v8i8>; // 8 x i8 def llvm_v8i8_ty : LLVMType<v8i8>; // 8 x i8
@ -166,10 +167,12 @@ def llvm_v8i64_ty : LLVMType<v8i64>; // 8 x i64
def llvm_v16i64_ty : LLVMType<v16i64>; // 16 x i64 def llvm_v16i64_ty : LLVMType<v16i64>; // 16 x i64
def llvm_v8f16_ty : LLVMType<v8f16>; // 8 x half (__fp16) def llvm_v8f16_ty : LLVMType<v8f16>; // 8 x half (__fp16)
def llvm_v1f32_ty : LLVMType<v1f32>; // 1 x float
def llvm_v2f32_ty : LLVMType<v2f32>; // 2 x float def llvm_v2f32_ty : LLVMType<v2f32>; // 2 x float
def llvm_v4f32_ty : LLVMType<v4f32>; // 4 x float def llvm_v4f32_ty : LLVMType<v4f32>; // 4 x float
def llvm_v8f32_ty : LLVMType<v8f32>; // 8 x float def llvm_v8f32_ty : LLVMType<v8f32>; // 8 x float
def llvm_v16f32_ty : LLVMType<v16f32>; // 16 x float def llvm_v16f32_ty : LLVMType<v16f32>; // 16 x float
def llvm_v1f64_ty : LLVMType<v1f64>; // 1 x double
def llvm_v2f64_ty : LLVMType<v2f64>; // 2 x double def llvm_v2f64_ty : LLVMType<v2f64>; // 2 x double
def llvm_v4f64_ty : LLVMType<v4f64>; // 4 x double def llvm_v4f64_ty : LLVMType<v4f64>; // 4 x double
def llvm_v8f64_ty : LLVMType<v8f64>; // 8 x double def llvm_v8f64_ty : LLVMType<v8f64>; // 8 x double

View File

@ -17,12 +17,10 @@
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
// Vector Absolute Compare (Floating Point) // Vector Absolute Compare (Floating Point)
def int_aarch64_neon_vacgeq : Intrinsic<[llvm_v2i64_ty], def int_aarch64_neon_vacgeq :
[llvm_v2f64_ty, llvm_v2f64_ty], Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
[IntrNoMem]>; def int_aarch64_neon_vacgtq :
def int_aarch64_neon_vacgtq : Intrinsic<[llvm_v2i64_ty], Intrinsic<[llvm_v2i64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
[llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
// Vector maxNum (Floating Point) // Vector maxNum (Floating Point)
def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic; def int_aarch64_neon_vmaxnm : Neon_2Arg_Intrinsic;
@ -66,4 +64,73 @@ def int_aarch64_neon_vsqshrn : Neon_N2V_Narrow_Intrinsic;
def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic; def int_aarch64_neon_vuqshrn : Neon_N2V_Narrow_Intrinsic;
def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic; def int_aarch64_neon_vsqrshrn : Neon_N2V_Narrow_Intrinsic;
def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic; def int_aarch64_neon_vuqrshrn : Neon_N2V_Narrow_Intrinsic;
// Scalar Add
def int_aarch64_neon_vaddds :
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
def int_aarch64_neon_vadddu :
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
// Scalar Saturating Add (Signed, Unsigned)
def int_aarch64_neon_vqadds : Neon_2Arg_Intrinsic;
def int_aarch64_neon_vqaddu : Neon_2Arg_Intrinsic;
// Scalar Sub
def int_aarch64_neon_vsubds :
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
def int_aarch64_neon_vsubdu :
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
// Scalar Saturating Sub (Signed, Unsigned)
def int_aarch64_neon_vqsubs : Neon_2Arg_Intrinsic;
def int_aarch64_neon_vqsubu : Neon_2Arg_Intrinsic;
// Scalar Shift
// Scalar Shift Left
def int_aarch64_neon_vshlds :
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
def int_aarch64_neon_vshldu :
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
// Scalar Saturating Shift Left
def int_aarch64_neon_vqshls : Neon_2Arg_Intrinsic;
def int_aarch64_neon_vqshlu : Neon_2Arg_Intrinsic;
// Scalar Shift Rouding Left
def int_aarch64_neon_vrshlds :
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
def int_aarch64_neon_vrshldu :
Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, llvm_v1i64_ty], [IntrNoMem]>;
// Scalar Saturating Rounding Shift Left
def int_aarch64_neon_vqrshls : Neon_2Arg_Intrinsic;
def int_aarch64_neon_vqrshlu : Neon_2Arg_Intrinsic;
// Scalar Reduce Pairwise Add.
def int_aarch64_neon_vpadd :
Intrinsic<[llvm_v1i64_ty], [llvm_v2i64_ty],[IntrNoMem]>;
def int_aarch64_neon_vpfadd :
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
def int_aarch64_neon_vpfaddq :
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
// Scalar Reduce Pairwise Floating Point Max/Min.
def int_aarch64_neon_vpmax :
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
def int_aarch64_neon_vpmaxq :
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_aarch64_neon_vpmin :
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
def int_aarch64_neon_vpminq :
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
// Scalar Reduce Pairwise Floating Point Maxnm/Minnm.
def int_aarch64_neon_vpfmaxnm :
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
def int_aarch64_neon_vpfmaxnmq :
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
def int_aarch64_neon_vpfminnm :
Intrinsic<[llvm_v1f32_ty], [llvm_v2f32_ty], [IntrNoMem]>;
def int_aarch64_neon_vpfminnmq :
Intrinsic<[llvm_v1f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
} }

View File

@ -919,7 +919,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
// type does not have a strange size (eg: it is not i1). // type does not have a strange size (eg: it is not i1).
EVT VecVT = N->getValueType(0); EVT VecVT = N->getValueType(0);
unsigned NumElts = VecVT.getVectorNumElements(); unsigned NumElts = VecVT.getVectorNumElements();
assert(!(NumElts & 1) && "Legal vector of one illegal element?"); assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) &&
"Legal vector of one illegal element?");
// Promote the inserted value. The type does not need to match the // Promote the inserted value. The type does not need to match the
// vector element type. Check that any extra bits introduced will be // vector element type. Check that any extra bits introduced will be

View File

@ -453,7 +453,8 @@ enum IIT_Info {
IIT_STRUCT5 = 22, IIT_STRUCT5 = 22,
IIT_EXTEND_VEC_ARG = 23, IIT_EXTEND_VEC_ARG = 23,
IIT_TRUNC_VEC_ARG = 24, IIT_TRUNC_VEC_ARG = 24,
IIT_ANYPTR = 25 IIT_ANYPTR = 25,
IIT_V1 = 26
}; };
@ -497,6 +498,10 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
case IIT_I64: case IIT_I64:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64)); OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64));
return; return;
case IIT_V1:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 1));
DecodeIITType(NextElt, Infos, OutputTable);
return;
case IIT_V2: case IIT_V2:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 2)); OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 2));
DecodeIITType(NextElt, Infos, OutputTable); DecodeIITType(NextElt, Infos, OutputTable);

View File

@ -134,6 +134,7 @@ std::string EVT::getEVTString() const {
case MVT::v16i1: return "v16i1"; case MVT::v16i1: return "v16i1";
case MVT::v32i1: return "v32i1"; case MVT::v32i1: return "v32i1";
case MVT::v64i1: return "v64i1"; case MVT::v64i1: return "v64i1";
case MVT::v1i8: return "v1i8";
case MVT::v2i8: return "v2i8"; case MVT::v2i8: return "v2i8";
case MVT::v4i8: return "v4i8"; case MVT::v4i8: return "v4i8";
case MVT::v8i8: return "v8i8"; case MVT::v8i8: return "v8i8";
@ -156,12 +157,14 @@ std::string EVT::getEVTString() const {
case MVT::v4i64: return "v4i64"; case MVT::v4i64: return "v4i64";
case MVT::v8i64: return "v8i64"; case MVT::v8i64: return "v8i64";
case MVT::v16i64: return "v16i64"; case MVT::v16i64: return "v16i64";
case MVT::v1f32: return "v1f32";
case MVT::v2f32: return "v2f32"; case MVT::v2f32: return "v2f32";
case MVT::v2f16: return "v2f16"; case MVT::v2f16: return "v2f16";
case MVT::v8f16: return "v8f16"; case MVT::v8f16: return "v8f16";
case MVT::v4f32: return "v4f32"; case MVT::v4f32: return "v4f32";
case MVT::v8f32: return "v8f32"; case MVT::v8f32: return "v8f32";
case MVT::v16f32: return "v16f32"; case MVT::v16f32: return "v16f32";
case MVT::v1f64: return "v1f64";
case MVT::v2f64: return "v2f64"; case MVT::v2f64: return "v2f64";
case MVT::v4f64: return "v4f64"; case MVT::v4f64: return "v4f64";
case MVT::v8f64: return "v8f64"; case MVT::v8f64: return "v8f64";
@ -198,6 +201,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16); case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16);
case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32); case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32);
case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64); case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64);
case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1);
case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2); case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2);
case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4); case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4);
case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8); case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8);
@ -222,10 +226,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16); case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16);
case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2); case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2);
case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8); case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8);
case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1);
case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4); case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4);
case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8); case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8);
case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16); case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16);
case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1);
case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2); case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2);
case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4); case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4);
case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8); case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8);

View File

@ -59,9 +59,9 @@ def CC_A64_APCS : CallingConv<[
// Canonicalise the various types that live in different floating-point // Canonicalise the various types that live in different floating-point
// registers. This makes sense because the PCS does not distinguish Short // registers. This makes sense because the PCS does not distinguish Short
// Vectors and Floating-point types. // Vectors and Floating-point types.
CCIfType<[v2i8], CCBitConvertToType<f16>>, CCIfType<[v1i16, v2i8], CCBitConvertToType<f16>>,
CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>, CCIfType<[v1i32, v4i8, v2i16, v1f32], CCBitConvertToType<f32>>,
CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64], CCBitConvertToType<f64>>, CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType<f64>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCBitConvertToType<f128>>, CCBitConvertToType<f128>>,
@ -70,7 +70,8 @@ def CC_A64_APCS : CallingConv<[
// argument is allocated to the least significant bits of register // argument is allocated to the least significant bits of register
// v[NSRN]. The NSRN is incremented by one. The argument has now been // v[NSRN]. The NSRN is incremented by one. The argument has now been
// allocated." // allocated."
CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,

View File

@ -57,6 +57,12 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
if (Subtarget->hasNEON()) { if (Subtarget->hasNEON()) {
// And the vectors // And the vectors
addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass);
addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass);
addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass);
addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v1f32, &AArch64::FPR32RegClass);
addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass); addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass); addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass); addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
@ -274,16 +280,21 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setExceptionSelectorRegister(AArch64::X1); setExceptionSelectorRegister(AArch64::X1);
if (Subtarget->hasNEON()) { if (Subtarget->hasNEON()) {
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);

View File

@ -1074,8 +1074,7 @@ class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
class NeonI_2VShiftImm<bit q, bit u, bits<5> opcode, class NeonI_2VShiftImm<bit q, bit u, bits<5> opcode,
dag outs, dag ins, string asmstr, dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin> list<dag> patterns, InstrItinClass itin>
: A64InstRdn<outs, ins, asmstr, patterns, itin> : A64InstRdn<outs, ins, asmstr, patterns, itin> {
{
bits<7> Imm; bits<7> Imm;
let Inst{31} = 0b0; let Inst{31} = 0b0;
let Inst{30} = q; let Inst{30} = q;
@ -1129,5 +1128,23 @@ class NeonI_insert<bit q, bit op,
// Inherit Rd in 4-0 // Inherit Rd in 4-0
} }
// Format AdvSIMD scalar pairwise
class NeonI_ScalarPair<bit u, bits<2> size, bits<5> opcode,
dag outs, dag ins, string asmstr,
list<dag> patterns, InstrItinClass itin>
: A64InstRdn<outs, ins, asmstr, patterns, itin> {
let Inst{31} = 0b0;
let Inst{30} = 0b1;
let Inst{29} = u;
let Inst{28-24} = 0b11110;
let Inst{23-22} = size;
let Inst{21-17} = 0b11000;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
// Inherit Rn in 9-5
// Inherit Rd in 4-0
}
} }

View File

@ -2189,22 +2189,22 @@ def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
// Extra patterns for when we're allowed to optimise separate multiplication and // Extra patterns for when we're allowed to optimise separate multiplication and
// addition. // addition.
let Predicates = [UseFusedMAC] in { let Predicates = [UseFusedMAC] in {
def : Pat<(fadd FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)), def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(fsub FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)), def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra), def : Pat<(f32 (fsub (f32 (fmul FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)),
(FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)), def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul FPR32:$Rn, FPR32:$Rm)))),
(FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(fadd FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)), def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
def : Pat<(fsub FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)), def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
def : Pat<(fsub (fmul (f64 FPR64:$Rn), FPR64:$Rm), FPR64:$Ra), def : Pat<(f64 (fsub (f64 (fmul FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)),
(FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
def : Pat<(fsub (fneg (f64 FPR64:$Ra)), (fmul FPR64:$Rn, FPR64:$Rm)), def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul FPR64:$Rn, FPR64:$Rm)))),
(FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
} }

View File

@ -2504,11 +2504,12 @@ defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
multiclass NeonI_Op_High<SDPatternOperator op> multiclass NeonI_Op_High<SDPatternOperator op>
{ {
def _16B : PatFrag<(ops node:$Rn, node:$Rm), def _16B : PatFrag<(ops node:$Rn, node:$Rm),
(op (Neon_top16B node:$Rn), (Neon_top16B node:$Rm))>; (op (v8i8 (Neon_top16B node:$Rn)), (v8i8 (Neon_top16B node:$Rm)))>;
def _8H : PatFrag<(ops node:$Rn, node:$Rm), def _8H : PatFrag<(ops node:$Rn, node:$Rm),
(op (Neon_top8H node:$Rn), (Neon_top8H node:$Rm))>; (op (v4i16 (Neon_top8H node:$Rn)), (v4i16 (Neon_top8H node:$Rm)))>;
def _4S : PatFrag<(ops node:$Rn, node:$Rm), def _4S : PatFrag<(ops node:$Rn, node:$Rm),
(op (Neon_top4S node:$Rn), (Neon_top4S node:$Rm))>; (op (v2i32 (Neon_top4S node:$Rn)), (v2i32 (Neon_top4S node:$Rm)))>;
} }
defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>; defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
@ -2868,9 +2869,25 @@ multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
} }
} }
class Neon_Scalar_D_size_patterns<SDPatternOperator opnode, Instruction INSTD> multiclass Neon_Scalar_D_size_patterns<SDPatternOperator opnode,
: Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), Instruction INSTD> {
(INSTD VPR64:$Rn, VPR64:$Rm)>; def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
(INSTD FPR64:$Rn, FPR64:$Rm)>;
}
multiclass Neon_Scalar_BHSD_size_patterns<SDPatternOperator opnode,
Instruction INSTB, Instruction INSTH,
Instruction INSTS, Instruction INSTD>
: Neon_Scalar_D_size_patterns<opnode, INSTD> {
def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
(INSTB FPR8:$Rn, FPR8:$Rm)>;
def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
(INSTH FPR16:$Rn, FPR16:$Rm)>;
def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
(INSTS FPR32:$Rn, FPR32:$Rm)>;
}
// Scalar Integer Add // Scalar Integer Add
let isCommutable = 1 in { let isCommutable = 1 in {
@ -2880,9 +2897,15 @@ def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
// Scalar Integer Sub // Scalar Integer Sub
def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">; def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
// Pattern for Scalar Integer Add and Sub with D register // Pattern for Scalar Integer Add and Sub with D register only
def : Neon_Scalar_D_size_patterns<add, ADDddd>; defm : Neon_Scalar_D_size_patterns<add, ADDddd>;
def : Neon_Scalar_D_size_patterns<sub, SUBddd>; defm : Neon_Scalar_D_size_patterns<sub, SUBddd>;
// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
// Scalar Integer Saturating Add (Signed, Unsigned) // Scalar Integer Saturating Add (Signed, Unsigned)
defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>; defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
@ -2892,40 +2915,160 @@ defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>; defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>; defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
// Patterns for Scalar Integer Saturating Add, Sub with D register only // Patterns to match llvm.arm.* intrinsic for
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>; // Scalar Integer Saturating Add, Sub (Signed, Unsigned)
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>; defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>;
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>; defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>;
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>; defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>;
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>;
// Patterns to match llvm.aarch64.* intrinsic for
// Scalar Integer Saturating Add, Sub (Signed, Unsigned)
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqadds, SQADDbbb, SQADDhhh,
SQADDsss, SQADDddd>;
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqaddu, UQADDbbb, UQADDhhh,
UQADDsss, UQADDddd>;
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubs, SQSUBbbb, SQSUBhhh,
SQSUBsss, SQSUBddd>;
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqsubu, UQSUBbbb, UQSUBhhh,
UQSUBsss, UQSUBddd>;
// Scalar Integer Shift Left (Signed, Unsigned) // Scalar Integer Shift Left (Signed, Unsigned)
def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">; def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">; def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
// Patterns to match llvm.arm.* intrinsic for
// Scalar Integer Shift Left (Signed, Unsigned)
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
// Patterns to match llvm.aarch64.* intrinsic for
// Scalar Integer Shift Left (Signed, Unsigned)
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
// Scalar Integer Saturating Shift Left (Signed, Unsigned) // Scalar Integer Saturating Shift Left (Signed, Unsigned)
defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>; defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>; defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
// Scalar Integer Rouding Shift Left (Signed, Unsigned) // Patterns to match llvm.aarch64.* intrinsic for
// Scalar Integer Saturating Shift Letf (Signed, Unsigned)
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb, SQSHLhhh,
SQSHLsss, SQSHLddd>;
defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb, UQSHLhhh,
UQSHLsss, UQSHLddd>;
// Patterns to match llvm.arm.* intrinsic for
// Scalar Integer Saturating Shift Letf (Signed, Unsigned)
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
// Scalar Integer Rounding Shift Left (Signed, Unsigned)
def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">; def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">; def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
// Patterns to match llvm.aarch64.* intrinsic for
// Scalar Integer Rounding Shift Left (Signed, Unsigned)
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
defm : Neon_Scalar_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
// Patterns to match llvm.arm.* intrinsic for
// Scalar Integer Rounding Shift Left (Signed, Unsigned)
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
defm : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>; defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>; defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
// Patterns for Scalar Integer Shift Lef, Saturating Shift Left, // Patterns to match llvm.aarch64.* intrinsic for
// Rounding Shift Left, Rounding Saturating Shift Left with D register only // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
def : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>; defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb, SQRSHLhhh,
def : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>; SQRSHLsss, SQRSHLddd>;
def : Neon_Scalar_D_size_patterns<shl, SSHLddd>; defm : Neon_Scalar_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb, UQRSHLhhh,
def : Neon_Scalar_D_size_patterns<shl, USHLddd>; UQRSHLsss, UQRSHLddd>;
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>; // Patterns to match llvm.arm.* intrinsic for
def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>; // Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>; defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>; defm : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
// Scalar Reduce Pairwise
multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
string asmop, bit Commutable = 0> {
let isCommutable = Commutable in {
def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
(outs FPR64:$Rd), (ins VPR128:$Rn),
!strconcat(asmop, " $Rd, $Rn.2d"),
[],
NoItinerary>;
}
}
multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
string asmop, bit Commutable = 0>
: NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
let isCommutable = Commutable in {
def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
(outs FPR32:$Rd), (ins VPR64:$Rn),
!strconcat(asmop, " $Rd, $Rn.2s"),
[],
NoItinerary>;
}
}
// Scalar Reduce Addition Pairwise (Integer) with
// Pattern to match llvm.arm.* intrinsic
defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
// Pattern to match llvm.aarch64.* intrinsic for
// Scalar Reduce Addition Pairwise (Integer)
def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
(ADDPvv_D_2D VPR128:$Rn)>;
// Scalar Reduce Addition Pairwise (Floating Point)
defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
// Scalar Reduce Maximum Pairwise (Floating Point)
defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
// Scalar Reduce Minimum Pairwise (Floating Point)
defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
// Scalar Reduce maxNum Pairwise (Floating Point)
defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
// Scalar Reduce minNum Pairwise (Floating Point)
defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnodeS,
SDPatternOperator opnodeD,
Instruction INSTS,
Instruction INSTD> {
def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))),
(INSTS VPR64:$Rn)>;
def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))),
(INSTD VPR128:$Rn)>;
}
// Patterns to match llvm.aarch64.* intrinsic for
// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
int_aarch64_neon_vpfaddq, FADDPvv_S_2S, FADDPvv_D_2D>;
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
int_aarch64_neon_vpmaxq, FMAXPvv_S_2S, FMAXPvv_D_2D>;
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
int_aarch64_neon_vpminq, FMINPvv_S_2S, FMINPvv_D_2D>;
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
int_aarch64_neon_vpfmaxnmq, FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
int_aarch64_neon_vpfminnmq, FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -2999,6 +3142,14 @@ def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
// ...and scalar bitcasts... // ...and scalar bitcasts...
def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>;
def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
@ -3017,6 +3168,15 @@ def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>; def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>; def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>;
def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
@ -3349,8 +3509,6 @@ def UMOVwh_pattern : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
neon_uimm2_bare, UMOVwh>; neon_uimm2_bare, UMOVwh>;
def UMOVws_pattern : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare, def UMOVws_pattern : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
neon_uimm1_bare, UMOVws>; neon_uimm1_bare, UMOVws>;
def UMOVxd_pattern : Neon_UMOV_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
neon_uimm0_bare, UMOVxd>;
def : Pat<(i32 (and def : Pat<(i32 (and
(i32 (vector_extract (i32 (vector_extract
@ -3389,3 +3547,40 @@ def : Pat<(i64 (zext
(UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
neon_uimm0_bare:$Imm)>; neon_uimm0_bare:$Imm)>;
// Additional copy patterns for scalar types
def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
(UMOVwb (v16i8
(SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
(UMOVwh (v8i16
(SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
(FMOVws FPR32:$Rn)>;
def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
(FMOVxd FPR64:$Rn)>;
def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
(f64 FPR64:$Rn)>;
def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))),
(f32 FPR32:$Rn)>;
def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
(v1i8 (EXTRACT_SUBREG (v16i8
(INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
sub_8))>;
def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
(v1i16 (EXTRACT_SUBREG (v8i16
(INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
sub_16))>;
def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
(FMOVsw $src)>;
def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
(FMOVdx $src)>;

View File

@ -133,19 +133,19 @@ foreach Index = 0-31 in {
} }
def FPR8 : RegisterClass<"AArch64", [i8], 8, def FPR8 : RegisterClass<"AArch64", [i8, v1i8], 8,
(sequence "B%u", 0, 31)> { (sequence "B%u", 0, 31)> {
} }
def FPR16 : RegisterClass<"AArch64", [f16], 16, def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16,
(sequence "H%u", 0, 31)> { (sequence "H%u", 0, 31)> {
} }
def FPR32 : RegisterClass<"AArch64", [f32], 32, def FPR32 : RegisterClass<"AArch64", [f32, v1i32, v1f32], 32,
(sequence "S%u", 0, 31)> { (sequence "S%u", 0, 31)> {
} }
def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64], def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64],
64, (sequence "D%u", 0, 31)>; 64, (sequence "D%u", 0, 31)>;
def FPR128 : RegisterClass<"AArch64", def FPR128 : RegisterClass<"AArch64",

View File

@ -118,15 +118,3 @@ define <2 x double> @sub2xdouble(<2 x double> %A, <2 x double> %B) {
ret <2 x double> %tmp3 ret <2 x double> %tmp3
} }
define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
%tmp3 = add <1 x i64> %A, %B;
ret <1 x i64> %tmp3
}
define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
%tmp3 = sub <1 x i64> %A, %B;
ret <1 x i64> %tmp3
}

View File

@ -146,7 +146,7 @@ define i32 @umovw2s(<2 x i32> %tmp1) {
} }
define i64 @umovx1d(<1 x i64> %tmp1) { define i64 @umovx1d(<1 x i64> %tmp1) {
;CHECK: umov {{x[0-31]+}}, {{v[0-31]+}}.d[0] ;CHECK: fmov {{x[0-31]+}}, {{d[0-31]+}}
%tmp3 = extractelement <1 x i64> %tmp1, i32 0 %tmp3 = extractelement <1 x i64> %tmp1, i32 0
ret i64 %tmp3 ret i64 %tmp3
} }

View File

@ -102,23 +102,6 @@ define <4 x i32> @test_srshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
ret <4 x i32> %tmp1 ret <4 x i32> %tmp1
} }
declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_urshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: urshl d0, d0, d1
ret <1 x i64> %tmp1
}
define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_srshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: srshl d0, d0, d1
ret <1 x i64> %tmp1
}
declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>)

View File

@ -102,22 +102,7 @@ define <4 x i32> @test_sqadd_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
ret <4 x i32> %tmp1 ret <4 x i32> %tmp1
} }
declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqadd_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: uqadd d0, d0, d1
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqadd_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sqadd d0, d0, d1
ret <1 x i64> %tmp1
}
declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>)
@ -254,21 +239,3 @@ define <2 x i64> @test_sqsub_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) {
; CHECK: sqsub v0.2d, v0.2d, v1.2d ; CHECK: sqsub v0.2d, v0.2d, v1.2d
ret <2 x i64> %tmp1 ret <2 x i64> %tmp1
} }
declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqsub_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: uqsub d0, d0, d1
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqsub_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sqsub d0, d0, d1
ret <1 x i64> %tmp1
}

View File

@ -102,23 +102,6 @@ define <4 x i32> @test_sqrshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
ret <4 x i32> %tmp1 ret <4 x i32> %tmp1
} }
declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqrshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: uqrshl d0, d0, d1
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqrshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sqrshl d0, d0, d1
ret <1 x i64> %tmp1
}
declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>)

View File

@ -102,23 +102,6 @@ define <4 x i32> @test_sqshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
ret <4 x i32> %tmp1 ret <4 x i32> %tmp1
} }
declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: uqshl d0, d0, d1
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sqshl d0, d0, d1
ret <1 x i64> %tmp1
}
declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>)

View File

@ -0,0 +1,50 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
define <1 x i64> @add1xi64(<1 x i64> %A, <1 x i64> %B) {
;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
%tmp3 = add <1 x i64> %A, %B;
ret <1 x i64> %tmp3
}
define <1 x i64> @sub1xi64(<1 x i64> %A, <1 x i64> %B) {
;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
%tmp3 = sub <1 x i64> %A, %B;
ret <1 x i64> %tmp3
}
declare <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64>, <1 x i64>)
define <1 x i64> @test_add_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_add_v1i64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vaddds(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
define <1 x i64> @test_uadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uadd_v1i64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vadddu(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: add {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
declare <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64>, <1 x i64>)
define <1 x i64> @test_sub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sub_v1i64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubds(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
define <1 x i64> @test_usub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_usub_v1i64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vsubdu(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: sub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}

View File

@ -0,0 +1,103 @@
; RUN: llc -march=aarch64 -mattr=+neon < %s | FileCheck %s
declare <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64>)
define <1 x i64> @test_addp_v1i64(<2 x i64> %a) {
; CHECK: test_addp_v1i64:
%val = call <1 x i64> @llvm.aarch64.neon.vpadd(<2 x i64> %a)
; CHECK: addp d0, v0.2d
ret <1 x i64> %val
}
declare <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float>)
define <1 x float> @test_faddp_v1f32(<2 x float> %a) {
; CHECK: test_faddp_v1f32:
%val = call <1 x float> @llvm.aarch64.neon.vpfadd(<2 x float> %a)
; CHECK: faddp s0, v0.2s
ret <1 x float> %val
}
declare <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double>)
define <1 x double> @test_faddp_v1f64(<2 x double> %a) {
; CHECK: test_faddp_v1f64:
%val = call <1 x double> @llvm.aarch64.neon.vpfaddq(<2 x double> %a)
; CHECK: faddp d0, v0.2d
ret <1 x double> %val
}
declare <1 x float> @llvm.aarch64.neon.vpmax(<2 x float>)
define <1 x float> @test_fmaxp_v1f32(<2 x float> %a) {
; CHECK: test_fmaxp_v1f32:
%val = call <1 x float> @llvm.aarch64.neon.vpmax(<2 x float> %a)
; CHECK: fmaxp s0, v0.2s
ret <1 x float> %val
}
declare <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double>)
define <1 x double> @test_fmaxp_v1f64(<2 x double> %a) {
; CHECK: test_fmaxp_v1f64:
%val = call <1 x double> @llvm.aarch64.neon.vpmaxq(<2 x double> %a)
; CHECK: fmaxp d0, v0.2d
ret <1 x double> %val
}
declare <1 x float> @llvm.aarch64.neon.vpmin(<2 x float>)
define <1 x float> @test_fminp_v1f32(<2 x float> %a) {
; CHECK: test_fminp_v1f32:
%val = call <1 x float> @llvm.aarch64.neon.vpmin(<2 x float> %a)
; CHECK: fminp s0, v0.2s
ret <1 x float> %val
}
declare <1 x double> @llvm.aarch64.neon.vpminq(<2 x double>)
define <1 x double> @test_fminp_v1f64(<2 x double> %a) {
; CHECK: test_fminp_v1f64:
%val = call <1 x double> @llvm.aarch64.neon.vpminq(<2 x double> %a)
; CHECK: fminp d0, v0.2d
ret <1 x double> %val
}
declare <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float>)
define <1 x float> @test_fmaxnmp_v1f32(<2 x float> %a) {
; CHECK: test_fmaxnmp_v1f32:
%val = call <1 x float> @llvm.aarch64.neon.vpfmaxnm(<2 x float> %a)
; CHECK: fmaxnmp s0, v0.2s
ret <1 x float> %val
}
declare <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double>)
define <1 x double> @test_fmaxnmp_v1f64(<2 x double> %a) {
; CHECK: test_fmaxnmp_v1f64:
%val = call <1 x double> @llvm.aarch64.neon.vpfmaxnmq(<2 x double> %a)
; CHECK: fmaxnmp d0, v0.2d
ret <1 x double> %val
}
declare <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float>)
define <1 x float> @test_fminnmp_v1f32(<2 x float> %a) {
; CHECK: test_fminnmp_v1f32:
%val = call <1 x float> @llvm.aarch64.neon.vpfminnm(<2 x float> %a)
; CHECK: fminnmp s0, v0.2s
ret <1 x float> %val
}
declare <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double>)
define <1 x double> @test_fminnmp_v1f64(<2 x double> %a) {
; CHECK: test_fminnmp_v1f64:
%val = call <1 x double> @llvm.aarch64.neon.vpfminnmq(<2 x double> %a)
; CHECK: fminnmp d0, v0.2d
ret <1 x double> %val
}

View File

@ -0,0 +1,39 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_urshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_urshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
define <1 x i64> @test_srshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_srshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
declare <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64>, <1 x i64>)
define <1 x i64> @test_urshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_urshl_v1i64_aarch64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshldu(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: urshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
define <1 x i64> @test_srshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_srshl_v1i64_aarch64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vrshlds(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: srshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}

View File

@ -0,0 +1,171 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqadd_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: uqadd d0, d0, d1
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqadd_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqadd_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sqadd d0, d0, d1
ret <1 x i64> %tmp1
}
declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqsub_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: uqsub d0, d0, d1
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqsub_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqsub_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sqsub d0, d0, d1
ret <1 x i64> %tmp1
}
declare <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8>, <1 x i8>)
declare <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8>, <1 x i8>)
define <1 x i8> @test_uqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
; CHECK: test_uqadd_v1i8_aarch64:
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqaddu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
;CHECK: uqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
ret <1 x i8> %tmp1
}
define <1 x i8> @test_sqadd_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
; CHECK: test_sqadd_v1i8_aarch64:
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqadds.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
;CHECK: sqadd {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
ret <1 x i8> %tmp1
}
declare <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8>, <1 x i8>)
declare <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8>, <1 x i8>)
define <1 x i8> @test_uqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
; CHECK: test_uqsub_v1i8_aarch64:
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
;CHECK: uqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
ret <1 x i8> %tmp1
}
define <1 x i8> @test_sqsub_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
; CHECK: test_sqsub_v1i8_aarch64:
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqsubs.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
;CHECK: sqsub {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
ret <1 x i8> %tmp1
}
declare <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16>, <1 x i16>)
declare <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16>, <1 x i16>)
define <1 x i16> @test_uqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
; CHECK: test_uqadd_v1i16_aarch64:
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqaddu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
;CHECK: uqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
ret <1 x i16> %tmp1
}
define <1 x i16> @test_sqadd_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
; CHECK: test_sqadd_v1i16_aarch64:
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqadds.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
;CHECK: sqadd {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
ret <1 x i16> %tmp1
}
declare <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16>, <1 x i16>)
declare <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16>, <1 x i16>)
define <1 x i16> @test_uqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
; CHECK: test_uqsub_v1i16_aarch64:
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
;CHECK: uqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
ret <1 x i16> %tmp1
}
define <1 x i16> @test_sqsub_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
; CHECK: test_sqsub_v1i16_aarch64:
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqsubs.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
;CHECK: sqsub {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
ret <1 x i16> %tmp1
}
declare <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32>, <1 x i32>)
declare <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32>, <1 x i32>)
define <1 x i32> @test_uqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
; CHECK: test_uqadd_v1i32_aarch64:
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqaddu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
;CHECK: uqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
ret <1 x i32> %tmp1
}
define <1 x i32> @test_sqadd_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
; CHECK: test_sqadd_v1i32_aarch64:
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqadds.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
;CHECK: sqadd {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
ret <1 x i32> %tmp1
}
declare <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32>, <1 x i32>)
declare <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32>, <1 x i32>)
define <1 x i32> @test_uqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
; CHECK: test_uqsub_v1i32_aarch64:
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
;CHECK: uqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
ret <1 x i32> %tmp1
}
define <1 x i32> @test_sqsub_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
; CHECK: test_sqsub_v1i32_aarch64:
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqsubs.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
;CHECK: sqsub {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
ret <1 x i32> %tmp1
}
declare <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqadd_v1i64_aarch64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqaddu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: uqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqadd_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqadd_v1i64_aarch64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqadds.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: sqadd {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
declare <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqsub_v1i64_aarch64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: uqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqsub_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqsub_v1i64_aarch64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqsubs.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: sqsub {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}

View File

@ -0,0 +1,94 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqrshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqrshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqrshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
declare <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8>, <1 x i8>)
declare <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8>, <1 x i8>)
define <1 x i8> @test_uqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
; CHECK: test_uqrshl_v1i8_aarch64:
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
;CHECK: uqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
ret <1 x i8> %tmp1
}
define <1 x i8> @test_sqrshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
; CHECK: test_sqrshl_v1i8_aarch64:
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqrshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
;CHECK: sqrshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
ret <1 x i8> %tmp1
}
declare <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16>, <1 x i16>)
declare <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16>, <1 x i16>)
define <1 x i16> @test_uqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
; CHECK: test_uqrshl_v1i16_aarch64:
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
;CHECK: uqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
ret <1 x i16> %tmp1
}
define <1 x i16> @test_sqrshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
; CHECK: test_sqrshl_v1i16_aarch64:
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqrshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
;CHECK: sqrshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
ret <1 x i16> %tmp1
}
declare <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32>, <1 x i32>)
declare <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32>, <1 x i32>)
define <1 x i32> @test_uqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
; CHECK: test_uqrshl_v1i32_aarch64:
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
;CHECK: uqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
ret <1 x i32> %tmp1
}
define <1 x i32> @test_sqrshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
; CHECK: test_sqrshl_v1i32_aarch64:
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqrshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
;CHECK: sqrshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
ret <1 x i32> %tmp1
}
declare <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqrshl_v1i64_aarch64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: uqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqrshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqrshl_v1i64_aarch64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqrshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: sqrshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}

View File

@ -0,0 +1,88 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
declare <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8>, <1 x i8>)
declare <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8>, <1 x i8>)
define <1 x i8> @test_uqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
; CHECK: test_uqshl_v1i8_aarch64:
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshlu.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
;CHECK: uqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
ret <1 x i8> %tmp1
}
define <1 x i8> @test_sqshl_v1i8_aarch64(<1 x i8> %lhs, <1 x i8> %rhs) {
; CHECK: test_sqshl_v1i8_aarch64:
%tmp1 = call <1 x i8> @llvm.aarch64.neon.vqshls.v1i8(<1 x i8> %lhs, <1 x i8> %rhs)
;CHECK: sqshl {{b[0-31]+}}, {{b[0-31]+}}, {{b[0-31]+}}
ret <1 x i8> %tmp1
}
declare <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16>, <1 x i16>)
declare <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16>, <1 x i16>)
define <1 x i16> @test_uqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
; CHECK: test_uqshl_v1i16_aarch64:
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshlu.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
;CHECK: uqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
ret <1 x i16> %tmp1
}
define <1 x i16> @test_sqshl_v1i16_aarch64(<1 x i16> %lhs, <1 x i16> %rhs) {
; CHECK: test_sqshl_v1i16_aarch64:
%tmp1 = call <1 x i16> @llvm.aarch64.neon.vqshls.v1i16(<1 x i16> %lhs, <1 x i16> %rhs)
;CHECK: sqshl {{h[0-31]+}}, {{h[0-31]+}}, {{h[0-31]+}}
ret <1 x i16> %tmp1
}
declare <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32>, <1 x i32>)
declare <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32>, <1 x i32>)
define <1 x i32> @test_uqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
; CHECK: test_uqshl_v1i32_aarch64:
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshlu.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
;CHECK: uqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
ret <1 x i32> %tmp1
}
define <1 x i32> @test_sqshl_v1i32_aarch64(<1 x i32> %lhs, <1 x i32> %rhs) {
; CHECK: test_sqshl_v1i32_aarch64:
%tmp1 = call <1 x i32> @llvm.aarch64.neon.vqshls.v1i32(<1 x i32> %lhs, <1 x i32> %rhs)
;CHECK: sqshl {{s[0-31]+}}, {{s[0-31]+}}, {{s[0-31]+}}
ret <1 x i32> %tmp1
}
declare <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_uqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_uqshl_v1i64_aarch64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshlu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: uqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sqshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sqshl_v1i64_aarch64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vqshls.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
;CHECK: sqshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}

View File

@ -0,0 +1,38 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s
declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_ushl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
declare <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64>, <1 x i64>)
define <1 x i64> @test_ushl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_ushl_v1i64_aarch64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vshldu(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: ushl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sshl_v1i64_aarch64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sshl_v1i64_aarch64:
%tmp1 = call <1 x i64> @llvm.aarch64.neon.vshlds(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sshl {{d[0-31]+}}, {{d[0-31]+}}, {{d[0-31]+}}
ret <1 x i64> %tmp1
}

View File

@ -102,23 +102,6 @@ define <4 x i32> @test_sshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
ret <4 x i32> %tmp1 ret <4 x i32> %tmp1
} }
declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>)
define <1 x i64> @test_ushl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_ushl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: ushl d0, d0, d1
ret <1 x i64> %tmp1
}
define <1 x i64> @test_sshl_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) {
; CHECK: test_sshl_v1i64:
%tmp1 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %lhs, <1 x i64> %rhs)
; CHECK: sshl d0, d0, d1
ret <1 x i64> %tmp1
}
declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>)
declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>) declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>)

View File

@ -32,4 +32,3 @@
// CHECK: faddp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0x22,0x2e] // CHECK: faddp v0.2s, v1.2s, v2.2s // encoding: [0x20,0xd4,0x22,0x2e]
// CHECK: faddp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0x22,0x6e] // CHECK: faddp v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0x22,0x6e]
// CHECK: faddp v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0x62,0x6e] // CHECK: faddp v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0x62,0x6e]

View File

@ -64,19 +64,5 @@
// CHECK: fsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0xa2,0x4e] // CHECK: fsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0xd4,0xa2,0x4e]
// CHECK: fsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0xe2,0x4e] // CHECK: fsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0xd4,0xe2,0x4e]
//------------------------------------------------------------------------------
// Scalar Integer Add
//------------------------------------------------------------------------------
add d31, d0, d16
// CHECK: add d31, d0, d16 // encoding: [0x1f,0x84,0xf0,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Sub
//------------------------------------------------------------------------------
sub d1, d7, d8
// CHECK: sub d1, d7, d8 // encoding: [0xe1,0x84,0xe8,0x7e]

View File

@ -2747,3 +2747,105 @@
// CHECK-ERROR: rsubhn2 v0.4s, v1.2d, v2.2s // CHECK-ERROR: rsubhn2 v0.4s, v1.2d, v2.2s
// CHECK-ERROR: ^ // CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Reduce Add Pairwise (Integer)
//----------------------------------------------------------------------
// invalid vector types
addp s0, d1.2d
addp d0, d1.2s
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: addp s0, d1.2d
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: addp d0, d1.2s
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Reduce Add Pairwise (Floating Point)
//----------------------------------------------------------------------
// invalid vector types
faddp s0, d1.2d
faddp d0, d1.2s
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: faddp s0, d1.2d
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: faddp d0, d1.2s
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Reduce Maximum Pairwise (Floating Point)
//----------------------------------------------------------------------
// mismatched and invalid vector types
fmaxp s0, v1.2d
fmaxp d31, v2.2s
fmaxp h3, v2.2s
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmaxp s0, v1.2d
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmaxp d31, v2.2s
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmaxp h3, v2.2s
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Reduce Minimum Pairwise (Floating Point)
//----------------------------------------------------------------------
// mismatched and invalid vector types
fminp s0, v1.4h
fminp d31, v2.8h
fminp b3, v2.2s
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fminp s0, v1.4h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fminp d31, v2.8h
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fminp b3, v2.2s
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Reduce maxNum Pairwise (Floating Point)
//----------------------------------------------------------------------
// mismatched and invalid vector types
fmaxnmp s0, v1.8b
fmaxnmp d31, v2.16b
fmaxnmp v1.2s, v2.2s
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmaxnmp s0, v1.8b
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fmaxnmp d31, v2.16b
// CHECK-ERROR: ^
// CHECK-ERROR: error: too few operands for instruction
// CHECK-ERROR: fmaxnmp v1.2s, v2.2s
// CHECK-ERROR: ^
//----------------------------------------------------------------------
// Scalar Reduce minNum Pairwise (Floating Point)
//----------------------------------------------------------------------
// mismatched and invalid vector types
fminnmp s0, v1.2d
fminnmp d31, v2.4s
fminnmp v1.4s, v2.2d
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fminnmp s0, v1.2d
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fminnmp d31, v2.4s
// CHECK-ERROR: ^
// CHECK-ERROR: error: invalid operand for instruction
// CHECK-ERROR: fminnmp v1.4s, v2.2d
// CHECK-ERROR: ^

View File

@ -41,17 +41,5 @@
// CHECK: urshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x54,0xa2,0x6e] // CHECK: urshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x54,0xa2,0x6e]
// CHECK: urshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x54,0xe2,0x6e] // CHECK: urshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x54,0xe2,0x6e]
//------------------------------------------------------------------------------
// Scalar Integer Rounding Shift Lef (Signed)
//------------------------------------------------------------------------------
srshl d17, d31, d8
// CHECK: srshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Rounding Shift Lef (Unsigned)
//------------------------------------------------------------------------------
urshl d17, d31, d8
// CHECK: urshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x7e]

View File

@ -79,55 +79,4 @@
// CHECK: uqsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x2c,0xa2,0x6e] // CHECK: uqsub v0.4s, v1.4s, v2.4s // encoding: [0x20,0x2c,0xa2,0x6e]
// CHECK: uqsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0x2c,0xe2,0x6e] // CHECK: uqsub v0.2d, v1.2d, v2.2d // encoding: [0x20,0x2c,0xe2,0x6e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Add (Signed)
//------------------------------------------------------------------------------
sqadd b0, b1, b2
sqadd h10, h11, h12
sqadd s20, s21, s2
sqadd d17, d31, d8
// CHECK: sqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x5e]
// CHECK: sqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x5e]
// CHECK: sqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x5e]
// CHECK: sqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Add (Unsigned)
//------------------------------------------------------------------------------
uqadd b0, b1, b2
uqadd h10, h11, h12
uqadd s20, s21, s2
uqadd d17, d31, d8
// CHECK: uqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x7e]
// CHECK: uqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x7e]
// CHECK: uqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x7e]
// CHECK: uqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x7e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Sub (Signed)
//------------------------------------------------------------------------------
sqsub b0, b1, b2
sqsub h10, h11, h12
sqsub s20, s21, s2
sqsub d17, d31, d8
// CHECK: sqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x5e]
// CHECK: sqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x5e]
// CHECK: sqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x5e]
// CHECK: sqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Sub (Unsigned)
//------------------------------------------------------------------------------
uqsub b0, b1, b2
uqsub h10, h11, h12
uqsub s20, s21, s2
uqsub d17, d31, d8
// CHECK: uqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x7e]
// CHECK: uqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x7e]
// CHECK: uqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x7e]
// CHECK: uqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x7e]

View File

@ -41,30 +41,3 @@
// CHECK: uqrshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x5c,0xa2,0x6e] // CHECK: uqrshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x5c,0xa2,0x6e]
// CHECK: uqrshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x5c,0xe2,0x6e] // CHECK: uqrshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x5c,0xe2,0x6e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Rounding Shift Lef (Signed)
//------------------------------------------------------------------------------
sqrshl b0, b1, b2
sqrshl h10, h11, h12
sqrshl s20, s21, s2
sqrshl d17, d31, d8
// CHECK: sqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x5e]
// CHECK: sqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x5e]
// CHECK: sqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x5e]
// CHECK: sqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Rounding Shift Lef (Unsigned)
//------------------------------------------------------------------------------
uqrshl b0, b1, b2
uqrshl h10, h11, h12
uqrshl s20, s21, s2
uqrshl d17, d31, d8
// CHECK: uqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x7e]
// CHECK: uqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x7e]
// CHECK: uqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x7e]
// CHECK: uqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x7e]

View File

@ -41,29 +41,3 @@
// CHECK: uqshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x4c,0xa2,0x6e] // CHECK: uqshl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x4c,0xa2,0x6e]
// CHECK: uqshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x4c,0xe2,0x6e] // CHECK: uqshl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x4c,0xe2,0x6e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Shift Lef (Signed)
//------------------------------------------------------------------------------
sqshl b0, b1, b2
sqshl h10, h11, h12
sqshl s20, s21, s2
sqshl d17, d31, d8
// CHECK: sqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x5e]
// CHECK: sqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x5e]
// CHECK: sqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x5e]
// CHECK: sqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Shift Lef (Unsigned)
//------------------------------------------------------------------------------
uqshl b0, b1, b2
uqshl h10, h11, h12
uqshl s20, s21, s2
uqshl d17, d31, d8
// CHECK: uqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x7e]
// CHECK: uqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x7e]
// CHECK: uqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x7e]
// CHECK: uqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x7e]

View File

@ -0,0 +1,16 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
//------------------------------------------------------------------------------
// Scalar Integer Add
//------------------------------------------------------------------------------
add d31, d0, d16
// CHECK: add d31, d0, d16 // encoding: [0x1f,0x84,0xf0,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Sub
//------------------------------------------------------------------------------
sub d1, d7, d8
// CHECK: sub d1, d7, d8 // encoding: [0xe1,0x84,0xe8,0x7e]

View File

@ -0,0 +1,16 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
//----------------------------------------------------------------------
// Scalar Reduce Add Pairwise (Integer)
//----------------------------------------------------------------------
addp d0, v1.2d
// CHECK: addp d0, v1.2d // encoding: [0x20,0xb8,0xf1,0x5e]
//----------------------------------------------------------------------
// Scalar Reduce Add Pairwise (Floating Point)
//----------------------------------------------------------------------
faddp d20, v1.2d
// CHECK: faddp d20, v1.2d // encoding: [0x34,0xd8,0x70,0x7e]

View File

@ -0,0 +1,17 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
//------------------------------------------------------------------------------
// Scalar Integer Rounding Shift Lef (Signed)
//------------------------------------------------------------------------------
srshl d17, d31, d8
// CHECK: srshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Rounding Shift Lef (Unsigned)
//------------------------------------------------------------------------------
urshl d17, d31, d8
// CHECK: urshl d17, d31, d8 // encoding: [0xf1,0x57,0xe8,0x7e]

View File

@ -0,0 +1,54 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
//------------------------------------------------------------------------------
// Scalar Integer Saturating Add (Signed)
//------------------------------------------------------------------------------
sqadd b0, b1, b2
sqadd h10, h11, h12
sqadd s20, s21, s2
sqadd d17, d31, d8
// CHECK: sqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x5e]
// CHECK: sqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x5e]
// CHECK: sqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x5e]
// CHECK: sqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Add (Unsigned)
//------------------------------------------------------------------------------
uqadd b0, b1, b2
uqadd h10, h11, h12
uqadd s20, s21, s2
uqadd d17, d31, d8
// CHECK: uqadd b0, b1, b2 // encoding: [0x20,0x0c,0x22,0x7e]
// CHECK: uqadd h10, h11, h12 // encoding: [0x6a,0x0d,0x6c,0x7e]
// CHECK: uqadd s20, s21, s2 // encoding: [0xb4,0x0e,0xa2,0x7e]
// CHECK: uqadd d17, d31, d8 // encoding: [0xf1,0x0f,0xe8,0x7e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Sub (Signed)
//------------------------------------------------------------------------------
sqsub b0, b1, b2
sqsub h10, h11, h12
sqsub s20, s21, s2
sqsub d17, d31, d8
// CHECK: sqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x5e]
// CHECK: sqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x5e]
// CHECK: sqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x5e]
// CHECK: sqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Sub (Unsigned)
//------------------------------------------------------------------------------
uqsub b0, b1, b2
uqsub h10, h11, h12
uqsub s20, s21, s2
uqsub d17, d31, d8
// CHECK: uqsub b0, b1, b2 // encoding: [0x20,0x2c,0x22,0x7e]
// CHECK: uqsub h10, h11, h12 // encoding: [0x6a,0x2d,0x6c,0x7e]
// CHECK: uqsub s20, s21, s2 // encoding: [0xb4,0x2e,0xa2,0x7e]
// CHECK: uqsub d17, d31, d8 // encoding: [0xf1,0x2f,0xe8,0x7e]

View File

@ -0,0 +1,28 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
//------------------------------------------------------------------------------
// Scalar Integer Saturating Rounding Shift Lef (Signed)
//------------------------------------------------------------------------------
sqrshl b0, b1, b2
sqrshl h10, h11, h12
sqrshl s20, s21, s2
sqrshl d17, d31, d8
// CHECK: sqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x5e]
// CHECK: sqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x5e]
// CHECK: sqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x5e]
// CHECK: sqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Rounding Shift Lef (Unsigned)
//------------------------------------------------------------------------------
uqrshl b0, b1, b2
uqrshl h10, h11, h12
uqrshl s20, s21, s2
uqrshl d17, d31, d8
// CHECK: uqrshl b0, b1, b2 // encoding: [0x20,0x5c,0x22,0x7e]
// CHECK: uqrshl h10, h11, h12 // encoding: [0x6a,0x5d,0x6c,0x7e]
// CHECK: uqrshl s20, s21, s2 // encoding: [0xb4,0x5e,0xa2,0x7e]
// CHECK: uqrshl d17, d31, d8 // encoding: [0xf1,0x5f,0xe8,0x7e]

View File

@ -0,0 +1,29 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
//------------------------------------------------------------------------------
// Scalar Integer Saturating Shift Lef (Signed)
//------------------------------------------------------------------------------
sqshl b0, b1, b2
sqshl h10, h11, h12
sqshl s20, s21, s2
sqshl d17, d31, d8
// CHECK: sqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x5e]
// CHECK: sqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x5e]
// CHECK: sqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x5e]
// CHECK: sqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Saturating Shift Lef (Unsigned)
//------------------------------------------------------------------------------
uqshl b0, b1, b2
uqshl h10, h11, h12
uqshl s20, s21, s2
uqshl d17, d31, d8
// CHECK: uqshl b0, b1, b2 // encoding: [0x20,0x4c,0x22,0x7e]
// CHECK: uqshl h10, h11, h12 // encoding: [0x6a,0x4d,0x6c,0x7e]
// CHECK: uqshl s20, s21, s2 // encoding: [0xb4,0x4e,0xa2,0x7e]
// CHECK: uqshl d17, d31, d8 // encoding: [0xf1,0x4f,0xe8,0x7e]

View File

@ -0,0 +1,16 @@
// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
//------------------------------------------------------------------------------
// Scalar Integer Shift Lef (Signed)
//------------------------------------------------------------------------------
sshl d17, d31, d8
// CHECK: sshl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Shift Lef (Unsigned)
//------------------------------------------------------------------------------
ushl d17, d31, d8
// CHECK: ushl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x7e]

View File

@ -41,20 +41,6 @@
// CHECK: ushl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x44,0xa2,0x6e] // CHECK: ushl v0.4s, v1.4s, v2.4s // encoding: [0x20,0x44,0xa2,0x6e]
// CHECK: ushl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x44,0xe2,0x6e] // CHECK: ushl v0.2d, v1.2d, v2.2d // encoding: [0x20,0x44,0xe2,0x6e]
//------------------------------------------------------------------------------
// Scalar Integer Shift Lef (Signed)
//------------------------------------------------------------------------------
sshl d17, d31, d8
// CHECK: sshl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x5e]
//------------------------------------------------------------------------------
// Scalar Integer Shift Lef (Unsigned)
//------------------------------------------------------------------------------
ushl d17, d31, d8
// CHECK: ushl d17, d31, d8 // encoding: [0xf1,0x47,0xe8,0x7e]
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
// Vector Integer Shift Left by Immediate // Vector Integer Shift Left by Immediate
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------

View File

@ -75,6 +75,7 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v16i1: return "MVT::v16i1"; case MVT::v16i1: return "MVT::v16i1";
case MVT::v32i1: return "MVT::v32i1"; case MVT::v32i1: return "MVT::v32i1";
case MVT::v64i1: return "MVT::v64i1"; case MVT::v64i1: return "MVT::v64i1";
case MVT::v1i8: return "MVT::v1i8";
case MVT::v2i8: return "MVT::v2i8"; case MVT::v2i8: return "MVT::v2i8";
case MVT::v4i8: return "MVT::v4i8"; case MVT::v4i8: return "MVT::v4i8";
case MVT::v8i8: return "MVT::v8i8"; case MVT::v8i8: return "MVT::v8i8";
@ -99,10 +100,12 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v16i64: return "MVT::v16i64"; case MVT::v16i64: return "MVT::v16i64";
case MVT::v2f16: return "MVT::v2f16"; case MVT::v2f16: return "MVT::v2f16";
case MVT::v8f16: return "MVT::v8f16"; case MVT::v8f16: return "MVT::v8f16";
case MVT::v1f32: return "MVT::v1f32";
case MVT::v2f32: return "MVT::v2f32"; case MVT::v2f32: return "MVT::v2f32";
case MVT::v4f32: return "MVT::v4f32"; case MVT::v4f32: return "MVT::v4f32";
case MVT::v8f32: return "MVT::v8f32"; case MVT::v8f32: return "MVT::v8f32";
case MVT::v16f32: return "MVT::v16f32"; case MVT::v16f32: return "MVT::v16f32";
case MVT::v1f64: return "MVT::v1f64";
case MVT::v2f64: return "MVT::v2f64"; case MVT::v2f64: return "MVT::v2f64";
case MVT::v4f64: return "MVT::v4f64"; case MVT::v4f64: return "MVT::v4f64";
case MVT::v8f64: return "MVT::v8f64"; case MVT::v8f64: return "MVT::v8f64";

View File

@ -260,7 +260,8 @@ enum IIT_Info {
IIT_STRUCT5 = 22, IIT_STRUCT5 = 22,
IIT_EXTEND_VEC_ARG = 23, IIT_EXTEND_VEC_ARG = 23,
IIT_TRUNC_VEC_ARG = 24, IIT_TRUNC_VEC_ARG = 24,
IIT_ANYPTR = 25 IIT_ANYPTR = 25,
IIT_V1 = 26
}; };
@ -350,6 +351,7 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
EVT VVT = VT; EVT VVT = VT;
switch (VVT.getVectorNumElements()) { switch (VVT.getVectorNumElements()) {
default: PrintFatalError("unhandled vector type width in intrinsic!"); default: PrintFatalError("unhandled vector type width in intrinsic!");
case 1: Sig.push_back(IIT_V1); break;
case 2: Sig.push_back(IIT_V2); break; case 2: Sig.push_back(IIT_V2); break;
case 4: Sig.push_back(IIT_V4); break; case 4: Sig.push_back(IIT_V4); break;
case 8: Sig.push_back(IIT_V8); break; case 8: Sig.push_back(IIT_V8); break;