mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-29 10:32:47 +00:00
9e2fe46484
in exposing the scalar value to the broadcast DAG fragment so that we can catch even reloads and fold them into the broadcast. This is somewhat magical I'm afraid but seems to work. It is also what the old lowering did, and I've switched an old test to run both lowerings demonstrating that we get the same result. Unlike the old code, I'm not lowering f32 or f64 scalars through this path when we only have AVX1. The target patterns include pretty heinous code to re-cast those as shuffles when the scalar happens to not be spilled because AVX1 provides no broadcast mechanism from registers what-so-ever. This is terribly brittle. I'd much rather go through our generic lowering code to get this. If needed, we can add a peephole to get even more opportunities to broadcast-from-spill-slots that are exposed post-RA, but my suspicion is this just doesn't matter that much. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218734 91177308-0d34-0410-b5e6-96231b3b80d8
25 lines
773 B
LLVM
25 lines
773 B
LLVM
; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx2 | FileCheck %s
|
|
; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s
|
|
|
|
declare x86_fastcallcc i64 @barrier()
|
|
|
|
;CHECK-LABEL: bcast_fold:
|
|
;CHECK: vmov{{[au]}}ps %xmm{{[0-9]+}}, [[SPILLED:[^\)]+\)]]
|
|
;CHECK: barrier
|
|
;CHECK: vbroadcastss [[SPILLED]], %ymm0
|
|
;CHECK: ret
|
|
define <8 x float> @bcast_fold( float* %A) {
|
|
BB:
|
|
%A0 = load float* %A
|
|
%tt3 = call x86_fastcallcc i64 @barrier()
|
|
br i1 undef, label %work, label %exit
|
|
|
|
work:
|
|
%A1 = insertelement <8 x float> undef, float %A0, i32 0
|
|
%A2 = shufflevector <8 x float> %A1, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
|
ret <8 x float> %A2
|
|
|
|
exit:
|
|
ret <8 x float> undef
|
|
}
|