From 7b4b522ec8d5b9fc947435f4eb15928aad2bce45 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 18 Apr 2014 12:50:58 +0000 Subject: [PATCH] AArch64/ARM64: improve spotting of EXT instructions from VECTOR_SHUFFLE. We couldn't cope if the first mask element was UNDEF before, which isn't ideal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206588 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM64/ARM64ISelLowering.cpp | 12 +- .../CodeGen/AArch64/neon-copyPhysReg-tuple.ll | 1 + test/CodeGen/AArch64/neon-extract.ll | 107 +++++++++--------- .../ARM64/aarch64-neon-copyPhysReg-tuple.ll | 48 ++++++++ 4 files changed, 110 insertions(+), 58 deletions(-) create mode 100644 test/CodeGen/ARM64/aarch64-neon-copyPhysReg-tuple.ll diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp index efd979b5e2e..503e44b0dc0 100644 --- a/lib/Target/ARM64/ARM64ISelLowering.cpp +++ b/lib/Target/ARM64/ARM64ISelLowering.cpp @@ -3945,11 +3945,13 @@ static bool isEXTMask(ArrayRef M, EVT VT, bool &ReverseEXT, unsigned NumElts = VT.getVectorNumElements(); ReverseEXT = false; - // Assume that the first shuffle index is not UNDEF. Fail if it is. - if (M[0] < 0) - return false; - - Imm = M[0]; + // Look for the first non-undef choice and count backwards from + // that. E.g. <-1, -1, 3, ...> means that an EXT must start at 3 - 2 = 1. This + // guarantees that at least one index is correct. + const int *FirstRealElt = + std::find_if(M.begin(), M.end(), [](int Elt) { return Elt >= 0; }); + assert(FirstRealElt != M.end() && "Completely UNDEF shuffle? Why bother?"); + Imm = *FirstRealElt - (FirstRealElt - M.begin()); // If this is a VEXT shuffle, the immediate value is the index of the first // element. The other shuffle indices must be the successive elements after diff --git a/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll b/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll index 8b9fde3f851..9f2f876d340 100644 --- a/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll +++ b/test/CodeGen/AArch64/neon-copyPhysReg-tuple.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; arm64 has a separate copy due to intrinsics define <4 x i32> @copyTuple.QPair(i8* %a, i8* %b) { ; CHECK-LABEL: copyTuple.QPair: diff --git a/test/CodeGen/AArch64/neon-extract.ll b/test/CodeGen/AArch64/neon-extract.ll index cddc2267d77..f16b0365c8e 100644 --- a/test/CodeGen/AArch64/neon-extract.ll +++ b/test/CodeGen/AArch64/neon-extract.ll @@ -1,221 +1,222 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s define <8 x i8> @test_vext_s8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vext_s8: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2 +; CHECK-LABEL: test_vext_s8: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x2|2}} entry: %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> ret <8 x i8> %vext } define <4 x i16> @test_vext_s16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vext_s16: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6 +; CHECK-LABEL: test_vext_s16: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x6|6}} entry: %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> ret <4 x i16> %vext } define <2 x i32> @test_vext_s32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vext_s32: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4 +; CHECK-LABEL: test_vext_s32: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x4|4}} entry: %vext = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %vext } define <1 x i64> @test_vext_s64(<1 x i64> %a, <1 x i64> %b) { -; CHECK: test_vext_s64: +; CHECK-LABEL: test_vext_s64: entry: %vext = shufflevector <1 x i64> %a, <1 x i64> %b, <1 x i32> ret <1 x i64> %vext } define <16 x i8> @test_vextq_s8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vextq_s8: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2 +; CHECK-LABEL: test_vextq_s8: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x2|2}} entry: %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vext } define <8 x i16> @test_vextq_s16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vextq_s16: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +; CHECK-LABEL: test_vextq_s16: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}} entry: %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vext } define <4 x i32> @test_vextq_s32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vextq_s32: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4 +; CHECK-LABEL: test_vextq_s32: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x4|4}} entry: %vext = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %vext } define <2 x i64> @test_vextq_s64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vextq_s64: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8 +; CHECK-LABEL: test_vextq_s64: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x8|8}} entry: %vext = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %vext } define <8 x i8> @test_vext_u8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vext_u8: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2 +; CHECK-LABEL: test_vext_u8: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x2|2}} entry: %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> ret <8 x i8> %vext } define <4 x i16> @test_vext_u16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vext_u16: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6 +; CHECK-LABEL: test_vext_u16: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x6|6}} entry: %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> ret <4 x i16> %vext } define <2 x i32> @test_vext_u32(<2 x i32> %a, <2 x i32> %b) { -; CHECK: test_vext_u32: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4 +; CHECK-LABEL: test_vext_u32: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x4|4}} entry: %vext = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> ret <2 x i32> %vext } define <1 x i64> @test_vext_u64(<1 x i64> %a, <1 x i64> %b) { -; CHECK: test_vext_u64: +; CHECK-LABEL: test_vext_u64: entry: %vext = shufflevector <1 x i64> %a, <1 x i64> %b, <1 x i32> ret <1 x i64> %vext } define <16 x i8> @test_vextq_u8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vextq_u8: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2 +; CHECK-LABEL: test_vextq_u8: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x2|2}} entry: %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vext } define <8 x i16> @test_vextq_u16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vextq_u16: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +; CHECK-LABEL: test_vextq_u16: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}} entry: %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vext } define <4 x i32> @test_vextq_u32(<4 x i32> %a, <4 x i32> %b) { -; CHECK: test_vextq_u32: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4 +; CHECK-LABEL: test_vextq_u32: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x4|4}} entry: %vext = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ret <4 x i32> %vext } define <2 x i64> @test_vextq_u64(<2 x i64> %a, <2 x i64> %b) { -; CHECK: test_vextq_u64: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8 +; CHECK-LABEL: test_vextq_u64: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x8|8}} entry: %vext = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> ret <2 x i64> %vext } define <2 x float> @test_vext_f32(<2 x float> %a, <2 x float> %b) { -; CHECK: test_vext_f32: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4 +; CHECK-LABEL: test_vext_f32: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x4|4}} entry: %vext = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> ret <2 x float> %vext } define <1 x double> @test_vext_f64(<1 x double> %a, <1 x double> %b) { -; CHECK: test_vext_f64: +; CHECK-LABEL: test_vext_f64: entry: %vext = shufflevector <1 x double> %a, <1 x double> %b, <1 x i32> ret <1 x double> %vext } define <4 x float> @test_vextq_f32(<4 x float> %a, <4 x float> %b) { -; CHECK: test_vextq_f32: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x4 +; CHECK-LABEL: test_vextq_f32: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x4|4}} entry: %vext = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ret <4 x float> %vext } define <2 x double> @test_vextq_f64(<2 x double> %a, <2 x double> %b) { -; CHECK: test_vextq_f64: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x8 +; CHECK-LABEL: test_vextq_f64: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x8|8}} entry: %vext = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> ret <2 x double> %vext } define <8 x i8> @test_vext_p8(<8 x i8> %a, <8 x i8> %b) { -; CHECK: test_vext_p8: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2 +; CHECK-LABEL: test_vext_p8: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x2|2}} entry: %vext = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> ret <8 x i8> %vext } define <4 x i16> @test_vext_p16(<4 x i16> %a, <4 x i16> %b) { -; CHECK: test_vext_p16: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x6 +; CHECK-LABEL: test_vext_p16: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x6|6}} entry: %vext = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> ret <4 x i16> %vext } define <16 x i8> @test_vextq_p8(<16 x i8> %a, <16 x i8> %b) { -; CHECK: test_vextq_p8: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x2 +; CHECK-LABEL: test_vextq_p8: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x2|2}} entry: %vext = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vext } define <8 x i16> @test_vextq_p16(<8 x i16> %a, <8 x i16> %b) { -; CHECK: test_vextq_p16: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +; CHECK-LABEL: test_vextq_p16: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}} entry: %vext = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vext } define <8 x i8> @test_undef_vext_s8(<8 x i8> %a) { -; CHECK: test_undef_vext_s8: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x2 +; CHECK-LABEL: test_undef_vext_s8: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x2|2}} entry: %vext = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> ret <8 x i8> %vext } define <16 x i8> @test_undef_vextq_s8(<16 x i8> %a) { -; CHECK: test_undef_vextq_s8: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +; CHECK-LABEL: test_undef_vextq_s8: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}} entry: %vext = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> ret <16 x i8> %vext } define <4 x i16> @test_undef_vext_s16(<4 x i16> %a) { -; CHECK: test_undef_vext_s16: -; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #0x4 +; CHECK-LABEL: test_undef_vext_s16: +; CHECK: ext {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #{{0x4|4}} entry: %vext = shufflevector <4 x i16> %a, <4 x i16> undef, <4 x i32> ret <4 x i16> %vext } define <8 x i16> @test_undef_vextq_s16(<8 x i16> %a) { -; CHECK: test_undef_vextq_s16: -; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #0x6 +; CHECK-LABEL: test_undef_vextq_s16: +; CHECK: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #{{0x6|6}} entry: %vext = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> ret <8 x i16> %vext diff --git a/test/CodeGen/ARM64/aarch64-neon-copyPhysReg-tuple.ll b/test/CodeGen/ARM64/aarch64-neon-copyPhysReg-tuple.ll new file mode 100644 index 00000000000..76e704736b7 --- /dev/null +++ b/test/CodeGen/ARM64/aarch64-neon-copyPhysReg-tuple.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s +; arm64 has a separate copy due to intrinsics + +define <4 x i32> @copyTuple.QPair(i32* %a, i32* %b) { +; CHECK-LABEL: copyTuple.QPair: +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: ld2 { {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}] +entry: + %vld = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> , <4 x i32> , i64 1, i32* %a) + %extract = extractvalue { <4 x i32>, <4 x i32> } %vld, 0 + %vld1 = tail call { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> , i64 1, i32* %b) + %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32> } %vld1, 0 + ret <4 x i32> %vld1.fca.0.extract +} + +define <4 x i32> @copyTuple.QTriple(i32* %a, i32* %b, <4 x i32> %c) { +; CHECK-LABEL: copyTuple.QTriple: +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: ld3 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}] +entry: + %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> , <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a) + %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0 + %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> , <4 x i32> %c, i64 1, i32* %b) + %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0 + ret <4 x i32> %vld1.fca.0.extract +} + +define <4 x i32> @copyTuple.QQuad(i32* %a, i32* %b, <4 x i32> %c) { +; CHECK-LABEL: copyTuple.QQuad: +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: orr v{{[0-9]+}}.16b, v{{[0-9]+}}.16b, v{{[0-9]+}}.16b +; CHECK: ld4 { {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s, {{v[0-9]+}}.s }[{{[0-9]+}}], [x{{[0-9]+|sp}}] +entry: + %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> , <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, i32* %a) + %extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld, 0 + %vld1 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32> %extract, <4 x i32> , <4 x i32> %c, <4 x i32> %c, i64 1, i32* %b) + %vld1.fca.0.extract = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %vld1, 0 + ret <4 x i32> %vld1.fca.0.extract +} + +declare { <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) +declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) +declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.arm64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)