mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-15 04:30:12 +00:00
29f94c7201
This commit starts with a "git mv ARM64 AArch64" and continues out from there, renaming the C++ classes, intrinsics, and other target-local objects for consistency. "ARM64" test directories are also moved, and tests that began their life in ARM64 use an arm64 triple, those from AArch64 use an aarch64 triple. Both should be equivalent though. This finishes the AArch64 merge, and everyone should feel free to continue committing as normal now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209577 91177308-0d34-0410-b5e6-96231b3b80d8
602 lines
22 KiB
LLVM
602 lines
22 KiB
LLVM
; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
|
|
|
|
; rdar://9428579
|
|
|
|
%type1 = type { <16 x i8> }
|
|
%type2 = type { <8 x i8> }
|
|
%type3 = type { <4 x i16> }
|
|
|
|
|
|
define hidden fastcc void @t1(%type1** %argtable) nounwind {
|
|
entry:
|
|
; CHECK-LABEL: t1:
|
|
; CHECK: ldr x[[REG:[0-9]+]], [x0]
|
|
; CHECK: str q0, [x[[REG]]]
|
|
%tmp1 = load %type1** %argtable, align 8
|
|
%tmp2 = getelementptr inbounds %type1* %tmp1, i64 0, i32 0
|
|
store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16
|
|
ret void
|
|
}
|
|
|
|
define hidden fastcc void @t2(%type2** %argtable) nounwind {
|
|
entry:
|
|
; CHECK-LABEL: t2:
|
|
; CHECK: ldr x[[REG:[0-9]+]], [x0]
|
|
; CHECK: str d0, [x[[REG]]]
|
|
%tmp1 = load %type2** %argtable, align 8
|
|
%tmp2 = getelementptr inbounds %type2* %tmp1, i64 0, i32 0
|
|
store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8
|
|
ret void
|
|
}
|
|
|
|
; add a bunch of tests for rdar://11246289
|
|
|
|
@globalArray64x2 = common global <2 x i64>* null, align 8
|
|
@globalArray32x4 = common global <4 x i32>* null, align 8
|
|
@globalArray16x8 = common global <8 x i16>* null, align 8
|
|
@globalArray8x16 = common global <16 x i8>* null, align 8
|
|
@globalArray64x1 = common global <1 x i64>* null, align 8
|
|
@globalArray32x2 = common global <2 x i32>* null, align 8
|
|
@globalArray16x4 = common global <4 x i16>* null, align 8
|
|
@globalArray8x8 = common global <8 x i8>* null, align 8
|
|
@floatglobalArray64x2 = common global <2 x double>* null, align 8
|
|
@floatglobalArray32x4 = common global <4 x float>* null, align 8
|
|
@floatglobalArray64x1 = common global <1 x double>* null, align 8
|
|
@floatglobalArray32x2 = common global <2 x float>* null, align 8
|
|
|
|
define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct1_64x2:
|
|
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
|
|
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
|
|
%arrayidx = getelementptr inbounds <2 x i64>* %array, i64 %offset
|
|
%tmp = load <2 x i64>* %arrayidx, align 16
|
|
%tmp1 = load <2 x i64>** @globalArray64x2, align 8
|
|
%arrayidx1 = getelementptr inbounds <2 x i64>* %tmp1, i64 %offset
|
|
store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct2_64x2:
|
|
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
|
|
%arrayidx = getelementptr inbounds <2 x i64>* %array, i64 3
|
|
%tmp = load <2 x i64>* %arrayidx, align 16
|
|
%tmp1 = load <2 x i64>** @globalArray64x2, align 8
|
|
%arrayidx1 = getelementptr inbounds <2 x i64>* %tmp1, i64 5
|
|
store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct1_32x4:
|
|
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
|
|
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
|
|
%arrayidx = getelementptr inbounds <4 x i32>* %array, i64 %offset
|
|
%tmp = load <4 x i32>* %arrayidx, align 16
|
|
%tmp1 = load <4 x i32>** @globalArray32x4, align 8
|
|
%arrayidx1 = getelementptr inbounds <4 x i32>* %tmp1, i64 %offset
|
|
store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct2_32x4:
|
|
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
|
|
%arrayidx = getelementptr inbounds <4 x i32>* %array, i64 3
|
|
%tmp = load <4 x i32>* %arrayidx, align 16
|
|
%tmp1 = load <4 x i32>** @globalArray32x4, align 8
|
|
%arrayidx1 = getelementptr inbounds <4 x i32>* %tmp1, i64 5
|
|
store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct1_16x8:
|
|
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
|
|
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
|
|
%arrayidx = getelementptr inbounds <8 x i16>* %array, i64 %offset
|
|
%tmp = load <8 x i16>* %arrayidx, align 16
|
|
%tmp1 = load <8 x i16>** @globalArray16x8, align 8
|
|
%arrayidx1 = getelementptr inbounds <8 x i16>* %tmp1, i64 %offset
|
|
store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct2_16x8:
|
|
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
|
|
%arrayidx = getelementptr inbounds <8 x i16>* %array, i64 3
|
|
%tmp = load <8 x i16>* %arrayidx, align 16
|
|
%tmp1 = load <8 x i16>** @globalArray16x8, align 8
|
|
%arrayidx1 = getelementptr inbounds <8 x i16>* %tmp1, i64 5
|
|
store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct1_8x16:
|
|
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
|
|
; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
|
|
%arrayidx = getelementptr inbounds <16 x i8>* %array, i64 %offset
|
|
%tmp = load <16 x i8>* %arrayidx, align 16
|
|
%tmp1 = load <16 x i8>** @globalArray8x16, align 8
|
|
%arrayidx1 = getelementptr inbounds <16 x i8>* %tmp1, i64 %offset
|
|
store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct2_8x16:
|
|
; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
|
|
%arrayidx = getelementptr inbounds <16 x i8>* %array, i64 3
|
|
%tmp = load <16 x i8>* %arrayidx, align 16
|
|
%tmp1 = load <16 x i8>** @globalArray8x16, align 8
|
|
%arrayidx1 = getelementptr inbounds <16 x i8>* %tmp1, i64 5
|
|
store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
|
|
ret void
|
|
}
|
|
|
|
define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct1_64x1:
|
|
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
|
|
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
|
|
%arrayidx = getelementptr inbounds <1 x i64>* %array, i64 %offset
|
|
%tmp = load <1 x i64>* %arrayidx, align 8
|
|
%tmp1 = load <1 x i64>** @globalArray64x1, align 8
|
|
%arrayidx1 = getelementptr inbounds <1 x i64>* %tmp1, i64 %offset
|
|
store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct2_64x1:
|
|
; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
|
|
%arrayidx = getelementptr inbounds <1 x i64>* %array, i64 3
|
|
%tmp = load <1 x i64>* %arrayidx, align 8
|
|
%tmp1 = load <1 x i64>** @globalArray64x1, align 8
|
|
%arrayidx1 = getelementptr inbounds <1 x i64>* %tmp1, i64 5
|
|
store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct1_32x2:
|
|
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
|
|
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
|
|
%arrayidx = getelementptr inbounds <2 x i32>* %array, i64 %offset
|
|
%tmp = load <2 x i32>* %arrayidx, align 8
|
|
%tmp1 = load <2 x i32>** @globalArray32x2, align 8
|
|
%arrayidx1 = getelementptr inbounds <2 x i32>* %tmp1, i64 %offset
|
|
store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct2_32x2:
|
|
; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
|
|
%arrayidx = getelementptr inbounds <2 x i32>* %array, i64 3
|
|
%tmp = load <2 x i32>* %arrayidx, align 8
|
|
%tmp1 = load <2 x i32>** @globalArray32x2, align 8
|
|
%arrayidx1 = getelementptr inbounds <2 x i32>* %tmp1, i64 5
|
|
store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct1_16x4:
|
|
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
|
|
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
|
|
%arrayidx = getelementptr inbounds <4 x i16>* %array, i64 %offset
|
|
%tmp = load <4 x i16>* %arrayidx, align 8
|
|
%tmp1 = load <4 x i16>** @globalArray16x4, align 8
|
|
%arrayidx1 = getelementptr inbounds <4 x i16>* %tmp1, i64 %offset
|
|
store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct2_16x4:
|
|
; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
|
|
%arrayidx = getelementptr inbounds <4 x i16>* %array, i64 3
|
|
%tmp = load <4 x i16>* %arrayidx, align 8
|
|
%tmp1 = load <4 x i16>** @globalArray16x4, align 8
|
|
%arrayidx1 = getelementptr inbounds <4 x i16>* %tmp1, i64 5
|
|
store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
|
|
ret void
|
|
}
|
|
|
|
define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct1_8x8:
|
|
; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
|
|
; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
|
|
; CHECK: ldr [[BASE:x[0-9]+]],
|
|
; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
|
|
%arrayidx = getelementptr inbounds <8 x i8>* %array, i64 %offset
|
|
%tmp = load <8 x i8>* %arrayidx, align 8
|
|
%tmp1 = load <8 x i8>** @globalArray8x8, align 8
|
|
%arrayidx1 = getelementptr inbounds <8 x i8>* %tmp1, i64 %offset
|
|
store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8
|
|
ret void
|
|
}
|
|
|
|
; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
|
|
; registers for unscaled vector accesses
|
|
@str = global [63 x i8] c"Test case for rdar://13258794: LDUR/STUR for D and Q registers\00", align 1
|
|
|
|
define <1 x i64> @fct0() nounwind readonly ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct0:
|
|
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
|
|
%0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
|
|
ret <1 x i64> %0
|
|
}
|
|
|
|
define <2 x i32> @fct1() nounwind readonly ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct1:
|
|
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
|
|
%0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
|
|
ret <2 x i32> %0
|
|
}
|
|
|
|
define <4 x i16> @fct2() nounwind readonly ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct2:
|
|
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
|
|
%0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
|
|
ret <4 x i16> %0
|
|
}
|
|
|
|
define <8 x i8> @fct3() nounwind readonly ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct3:
|
|
; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
|
|
%0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
|
|
ret <8 x i8> %0
|
|
}
|
|
|
|
define <2 x i64> @fct4() nounwind readonly ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct4:
|
|
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
|
|
%0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
|
|
ret <2 x i64> %0
|
|
}
|
|
|
|
define <4 x i32> @fct5() nounwind readonly ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct5:
|
|
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
|
|
%0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
|
|
ret <4 x i32> %0
|
|
}
|
|
|
|
define <8 x i16> @fct6() nounwind readonly ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct6:
|
|
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
|
|
%0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
|
|
ret <8 x i16> %0
|
|
}
|
|
|
|
define <16 x i8> @fct7() nounwind readonly ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct7:
|
|
; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
|
|
%0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
|
|
ret <16 x i8> %0
|
|
}
|
|
|
|
define void @fct8() nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct8:
|
|
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
|
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
|
%0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
|
|
store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @fct9() nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct9:
|
|
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
|
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
|
%0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
|
|
store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @fct10() nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct10:
|
|
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
|
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
|
%0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
|
|
store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @fct11() nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct11:
|
|
; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
|
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
|
%0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
|
|
store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
|
|
ret void
|
|
}
|
|
|
|
define void @fct12() nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct12:
|
|
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
|
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
|
%0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
|
|
store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
|
|
ret void
|
|
}
|
|
|
|
define void @fct13() nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct13:
|
|
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
|
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
|
%0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
|
|
store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
|
|
ret void
|
|
}
|
|
|
|
define void @fct14() nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct14:
|
|
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
|
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
|
%0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
|
|
store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
|
|
ret void
|
|
}
|
|
|
|
define void @fct15() nounwind ssp {
|
|
entry:
|
|
; CHECK-LABEL: fct15:
|
|
; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
|
|
; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
|
|
%0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
|
|
store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
|
|
ret void
|
|
}
|
|
|
|
; Check the building of vector from a single loaded value.
|
|
; Part of <rdar://problem/14170854>
|
|
;
|
|
; Single loads with immediate offset.
|
|
define <8 x i8> @fct16(i8* nocapture %sp0) {
|
|
; CHECK-LABEL: fct16:
|
|
; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
|
|
; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
|
|
entry:
|
|
%addr = getelementptr i8* %sp0, i64 1
|
|
%pix_sp0.0.copyload = load i8* %addr, align 1
|
|
%vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
|
|
%vmull.i = mul <8 x i8> %vec, %vec
|
|
ret <8 x i8> %vmull.i
|
|
}
|
|
|
|
define <16 x i8> @fct17(i8* nocapture %sp0) {
|
|
; CHECK-LABEL: fct17:
|
|
; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
|
|
; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
|
|
entry:
|
|
%addr = getelementptr i8* %sp0, i64 1
|
|
%pix_sp0.0.copyload = load i8* %addr, align 1
|
|
%vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
|
|
%vmull.i = mul <16 x i8> %vec, %vec
|
|
ret <16 x i8> %vmull.i
|
|
}
|
|
|
|
define <4 x i16> @fct18(i16* nocapture %sp0) {
|
|
; CHECK-LABEL: fct18:
|
|
; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
|
|
; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
|
|
entry:
|
|
%addr = getelementptr i16* %sp0, i64 1
|
|
%pix_sp0.0.copyload = load i16* %addr, align 1
|
|
%vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
|
|
%vmull.i = mul <4 x i16> %vec, %vec
|
|
ret <4 x i16> %vmull.i
|
|
}
|
|
|
|
define <8 x i16> @fct19(i16* nocapture %sp0) {
|
|
; CHECK-LABEL: fct19:
|
|
; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
|
|
; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
|
|
entry:
|
|
%addr = getelementptr i16* %sp0, i64 1
|
|
%pix_sp0.0.copyload = load i16* %addr, align 1
|
|
%vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
|
|
%vmull.i = mul <8 x i16> %vec, %vec
|
|
ret <8 x i16> %vmull.i
|
|
}
|
|
|
|
define <2 x i32> @fct20(i32* nocapture %sp0) {
|
|
; CHECK-LABEL: fct20:
|
|
; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
|
|
; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
|
|
entry:
|
|
%addr = getelementptr i32* %sp0, i64 1
|
|
%pix_sp0.0.copyload = load i32* %addr, align 1
|
|
%vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
|
|
%vmull.i = mul <2 x i32> %vec, %vec
|
|
ret <2 x i32> %vmull.i
|
|
}
|
|
|
|
define <4 x i32> @fct21(i32* nocapture %sp0) {
|
|
; CHECK-LABEL: fct21:
|
|
; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
|
|
; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
|
|
entry:
|
|
%addr = getelementptr i32* %sp0, i64 1
|
|
%pix_sp0.0.copyload = load i32* %addr, align 1
|
|
%vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
|
|
%vmull.i = mul <4 x i32> %vec, %vec
|
|
ret <4 x i32> %vmull.i
|
|
}
|
|
|
|
define <1 x i64> @fct22(i64* nocapture %sp0) {
|
|
; CHECK-LABEL: fct22:
|
|
; CHECK: ldr d0, [x0, #8]
|
|
entry:
|
|
%addr = getelementptr i64* %sp0, i64 1
|
|
%pix_sp0.0.copyload = load i64* %addr, align 1
|
|
%vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
|
|
ret <1 x i64> %vec
|
|
}
|
|
|
|
define <2 x i64> @fct23(i64* nocapture %sp0) {
|
|
; CHECK-LABEL: fct23:
|
|
; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
|
|
entry:
|
|
%addr = getelementptr i64* %sp0, i64 1
|
|
%pix_sp0.0.copyload = load i64* %addr, align 1
|
|
%vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
|
|
ret <2 x i64> %vec
|
|
}
|
|
|
|
;
|
|
; Single loads with register offset.
|
|
define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) {
|
|
; CHECK-LABEL: fct24:
|
|
; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
|
|
; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
|
|
entry:
|
|
%addr = getelementptr i8* %sp0, i64 %offset
|
|
%pix_sp0.0.copyload = load i8* %addr, align 1
|
|
%vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
|
|
%vmull.i = mul <8 x i8> %vec, %vec
|
|
ret <8 x i8> %vmull.i
|
|
}
|
|
|
|
define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) {
|
|
; CHECK-LABEL: fct25:
|
|
; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
|
|
; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
|
|
entry:
|
|
%addr = getelementptr i8* %sp0, i64 %offset
|
|
%pix_sp0.0.copyload = load i8* %addr, align 1
|
|
%vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
|
|
%vmull.i = mul <16 x i8> %vec, %vec
|
|
ret <16 x i8> %vmull.i
|
|
}
|
|
|
|
define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) {
|
|
; CHECK-LABEL: fct26:
|
|
; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
|
|
; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
|
|
entry:
|
|
%addr = getelementptr i16* %sp0, i64 %offset
|
|
%pix_sp0.0.copyload = load i16* %addr, align 1
|
|
%vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
|
|
%vmull.i = mul <4 x i16> %vec, %vec
|
|
ret <4 x i16> %vmull.i
|
|
}
|
|
|
|
define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) {
|
|
; CHECK-LABEL: fct27:
|
|
; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
|
|
; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
|
|
entry:
|
|
%addr = getelementptr i16* %sp0, i64 %offset
|
|
%pix_sp0.0.copyload = load i16* %addr, align 1
|
|
%vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
|
|
%vmull.i = mul <8 x i16> %vec, %vec
|
|
ret <8 x i16> %vmull.i
|
|
}
|
|
|
|
define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) {
|
|
; CHECK-LABEL: fct28:
|
|
; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
|
|
; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
|
|
entry:
|
|
%addr = getelementptr i32* %sp0, i64 %offset
|
|
%pix_sp0.0.copyload = load i32* %addr, align 1
|
|
%vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
|
|
%vmull.i = mul <2 x i32> %vec, %vec
|
|
ret <2 x i32> %vmull.i
|
|
}
|
|
|
|
define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) {
|
|
; CHECK-LABEL: fct29:
|
|
; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
|
|
; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
|
|
entry:
|
|
%addr = getelementptr i32* %sp0, i64 %offset
|
|
%pix_sp0.0.copyload = load i32* %addr, align 1
|
|
%vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
|
|
%vmull.i = mul <4 x i32> %vec, %vec
|
|
ret <4 x i32> %vmull.i
|
|
}
|
|
|
|
define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) {
|
|
; CHECK-LABEL: fct30:
|
|
; CHECK: ldr d0, [x0, x1, lsl #3]
|
|
entry:
|
|
%addr = getelementptr i64* %sp0, i64 %offset
|
|
%pix_sp0.0.copyload = load i64* %addr, align 1
|
|
%vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
|
|
ret <1 x i64> %vec
|
|
}
|
|
|
|
define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) {
|
|
; CHECK-LABEL: fct31:
|
|
; CHECK: ldr d0, [x0, x1, lsl #3]
|
|
entry:
|
|
%addr = getelementptr i64* %sp0, i64 %offset
|
|
%pix_sp0.0.copyload = load i64* %addr, align 1
|
|
%vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
|
|
ret <2 x i64> %vec
|
|
}
|