mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 04:30:23 +00:00
Using target specific nodes for shuffle nodes makes the mask
check more strict, breaking some cases not checked in the testsuite, but also exposes some foldings not done before, as this example: movaps (%rdi), %xmm0 movaps (%rax), %xmm1 movaps %xmm0, %xmm2 movss %xmm1, %xmm2 shufps $36, %xmm2, %xmm0 now is generated as: movaps (%rdi), %xmm0 movaps %xmm0, %xmm1 movlps (%rax), %xmm1 shufps $36, %xmm1, %xmm0 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112753 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
543cf05b9c
commit
29c353b9c3
@ -5909,6 +5909,9 @@ def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
|
|||||||
def : Pat<(X86Movlps VR128:$src1,
|
def : Pat<(X86Movlps VR128:$src1,
|
||||||
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
|
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
|
||||||
(MOVLPSrm VR128:$src1, addr:$src2)>;
|
(MOVLPSrm VR128:$src1, addr:$src2)>;
|
||||||
|
def : Pat<(X86Movlps VR128:$src1,
|
||||||
|
(bc_v4i32 (v2i64 (load addr:$src2)))),
|
||||||
|
(MOVLPSrm VR128:$src1, addr:$src2)>;
|
||||||
|
|
||||||
// Shuffle with MOVLPD
|
// Shuffle with MOVLPD
|
||||||
def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
|
def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
|
||||||
|
14
test/CodeGen/X86/vec_shuffle-37.ll
Normal file
14
test/CodeGen/X86/vec_shuffle-37.ll
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
||||||
|
|
||||||
|
define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
; CHECK: movaps (%rdi), %xmm0
|
||||||
|
; CHECK-NEXT: movaps %xmm0, %xmm1
|
||||||
|
; CHECK-NEXT: movlps (%rax), %xmm1
|
||||||
|
; CHECK-NEXT: shufps $36, %xmm1, %xmm0
|
||||||
|
%0 = load <4 x i32>* undef, align 16
|
||||||
|
%1 = load <4 x i32>* %a0, align 16
|
||||||
|
%2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
|
||||||
|
ret <4 x i32> %2
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user