mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-06 21:05:51 +00:00
98eac0a244
has settled without incident, removing the x86-specific and overly strict 'isVectorSplat' routine in favor of generic and more powerful splat detection. The primary motivation and result of this is that the x86 backend can now see through splats which contain undef elements. This is essential if we are using a widening form of legalization and I've updated a test case to also run in that mode as before this change the generated code for the test case was completely scalarized. This version of the patch much more carefully handles the undef lanes. - We aren't overly conservative about them in the shift lowering (where we will never use the splat itself). - One place where the splat would have been re-used by the existing code now explicitly constructs a new constant splat that will be safe. - The broadcast lowering is much more reasonable with undefs by doing a correct check of whether the splat is the only user of a loaded value, checking that the splat actually crosses multiple lanes before using a broadcast, and handling broadcasts of non-constant splats. As a consequence of the last bullet, the weird usage of vpshufd instead of vbroadcast is gone, and we actually can lower an AVX splat with vbroadcastss where before we emitted a really strange pattern of a vector load and a manual splat across the vector. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@212602 91177308-0d34-0410-b5e6-96231b3b80d8
101 lines
3.9 KiB
LLVM
101 lines
3.9 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
|
|
|
|
|
|
; CHECK: vpunpcklbw %xmm
|
|
; CHECK-NEXT: vpunpckhbw %xmm
|
|
; CHECK-NEXT: vpshufd $85
|
|
; CHECK-NEXT: vinsertf128 $1
|
|
define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
|
|
entry:
|
|
%shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
|
|
ret <32 x i8> %shuffle
|
|
}
|
|
|
|
; CHECK: vpunpckhwd %xmm
|
|
; CHECK-NEXT: vpshufd $85
|
|
; CHECK-NEXT: vinsertf128 $1
|
|
define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
|
|
entry:
|
|
%shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
|
|
ret <16 x i16> %shuffle
|
|
}
|
|
|
|
; CHECK: vmovq
|
|
; CHECK-NEXT: vmovlhps %xmm
|
|
; CHECK-NEXT: vinsertf128 $1
|
|
define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
|
|
entry:
|
|
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
|
|
%vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
|
|
%vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
|
|
%vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
|
|
ret <4 x i64> %vecinit6.i
|
|
}
|
|
|
|
; CHECK: vpermilpd $0
|
|
; CHECK-NEXT: vinsertf128 $1
|
|
define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
|
|
entry:
|
|
%vecinit.i = insertelement <4 x double> undef, double %q, i32 0
|
|
%vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
|
|
%vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
|
|
%vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
|
|
ret <4 x double> %vecinit6.i
|
|
}
|
|
|
|
; Test this turns into a broadcast:
|
|
; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
|
|
;
|
|
; CHECK: vbroadcastss
|
|
define <8 x float> @funcE() nounwind {
|
|
allocas:
|
|
%udx495 = alloca [18 x [18 x float]], align 32
|
|
br label %for_test505.preheader
|
|
|
|
for_test505.preheader: ; preds = %for_test505.preheader, %allocas
|
|
br i1 undef, label %for_exit499, label %for_test505.preheader
|
|
|
|
for_exit499: ; preds = %for_test505.preheader
|
|
br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
|
|
|
|
load.i1247: ; preds = %for_exit499
|
|
%ptr1227 = getelementptr [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
|
|
%ptr.i1237 = bitcast float* %ptr1227 to i32*
|
|
%val.i1238 = load i32* %ptr.i1237, align 4
|
|
%ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
|
|
%ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
|
|
%phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
|
|
br label %__load_and_broadcast_32.exit1249
|
|
|
|
__load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499
|
|
%load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
|
|
ret <8 x float> %load_broadcast12281250
|
|
}
|
|
|
|
; CHECK: vpshufd $0
|
|
; CHECK-NEXT: vinsertf128 $1
|
|
define <8 x float> @funcF(i32 %val) nounwind {
|
|
%ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
|
|
%ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
|
|
%tmp = bitcast <8 x i32> %ret7 to <8 x float>
|
|
ret <8 x float> %tmp
|
|
}
|
|
|
|
; CHECK: vpshufd $0
|
|
; CHECK-NEXT: vinsertf128 $1
|
|
define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
|
|
entry:
|
|
%shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
ret <8 x float> %shuffle
|
|
}
|
|
|
|
; CHECK: vextractf128 $1
|
|
; CHECK-NEXT: vpshufd
|
|
; CHECK-NEXT: vinsertf128 $1
|
|
define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
|
|
entry:
|
|
%shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
|
|
ret <8 x float> %shuffle
|
|
}
|
|
|