mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-07-25 13:24:46 +00:00
ARM: Improve pattern for isel mul of vector by scalar.
In addition to recognizing when the multiply's second argument is coming from an explicit VDUPLANE, also look for a plain scalar f32 reference and reference it via the corresponding vector lane. rdar://14870054 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189619 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -623,3 +623,21 @@ entry:
|
||||
store <4 x i32> %predphi290.v.i, <4 x i32>* undef, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @foo(<4 x float> * %a, <4 x float>* nocapture %dst, float* nocapture readonly %src) nounwind {
|
||||
; Look for doing a normal scalar FP load rather than an to-all-lanes load.
|
||||
; e.g., "ldr s0, [r2]" rathern than "vld1.32 {d18[], d19[]}, [r2:32]"
|
||||
; Then check that the vector multiply has folded the splat to all lanes
|
||||
; and used a vector * scalar instruction.
|
||||
; CHECK: vldr {{s[0-9]+}}, [r2]
|
||||
; CHECK: vmul.f32 q8, q8, d0[0]
|
||||
%tmp = load float* %src, align 4
|
||||
%tmp5 = load <4 x float>* %a, align 4
|
||||
%tmp6 = insertelement <4 x float> undef, float %tmp, i32 0
|
||||
%tmp7 = insertelement <4 x float> %tmp6, float %tmp, i32 1
|
||||
%tmp8 = insertelement <4 x float> %tmp7, float %tmp, i32 2
|
||||
%tmp9 = insertelement <4 x float> %tmp8, float %tmp, i32 3
|
||||
%tmp10 = fmul <4 x float> %tmp9, %tmp5
|
||||
store <4 x float> %tmp10, <4 x float>* %dst, align 4
|
||||
ret void
|
||||
}
|
||||
|
Reference in New Issue
Block a user