mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-15 23:31:37 +00:00
Add x86 isel logic and patterns to match movlps from clang generated IR for _mm_loadl_pi(). rdar://10134392, rdar://10050222
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144052 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0d69097779
commit
7bc389b6b0
@ -6190,6 +6190,10 @@ static bool MayFoldVectorLoad(SDValue V) {
|
||||
V = V.getOperand(0);
|
||||
if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
|
||||
V = V.getOperand(0);
|
||||
if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR &&
|
||||
V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF)
|
||||
// BUILD_VECTOR (load), undef
|
||||
V = V.getOperand(0);
|
||||
if (MayFoldLoad(V))
|
||||
return true;
|
||||
return false;
|
||||
@ -6372,15 +6376,10 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
|
||||
// turns into:
|
||||
// (MOVLPSmr addr:$src1, VR128:$src2)
|
||||
// So, recognize this potential and also use MOVLPS or MOVLPD
|
||||
if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
|
||||
else if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
|
||||
CanFoldLoad = true;
|
||||
|
||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
||||
|
||||
// Both of them can't be memory operations though.
|
||||
if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2))
|
||||
CanFoldLoad = false;
|
||||
|
||||
if (CanFoldLoad) {
|
||||
if (HasXMMInt && NumElems == 2)
|
||||
return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
|
||||
|
@ -1035,6 +1035,9 @@ let Predicates = [HasSSE1] in {
|
||||
}
|
||||
|
||||
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
|
||||
def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)),
|
||||
(iPTR 0))), addr:$src1),
|
||||
(MOVLPSmr addr:$src1, VR128:$src2)>;
|
||||
def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
|
||||
(MOVLPSmr addr:$src1, VR128:$src2)>;
|
||||
def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)),
|
||||
@ -1049,6 +1052,9 @@ let Predicates = [HasSSE1] in {
|
||||
def : Pat<(X86Movlps VR128:$src1,
|
||||
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
|
||||
(MOVLPSrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(X86Movlps VR128:$src1,
|
||||
(bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
|
||||
(MOVLPSrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
// Store patterns
|
||||
def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
|
||||
|
@ -48,8 +48,7 @@ entry:
|
||||
; CHECK: f
|
||||
define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind uwtable readonly ssp {
|
||||
entry:
|
||||
; CHECK: movsd (%
|
||||
; CHECK-NEXT: movsd %xmm
|
||||
; CHECK: movlps (%rdi), %xmm0
|
||||
%u110.i = load double* %y, align 1
|
||||
%tmp8.i = insertelement <2 x double> undef, double %u110.i, i32 0
|
||||
%tmp9.i = bitcast <2 x double> %tmp8.i to <4 x float>
|
||||
|
51
test/CodeGen/X86/vec_shuffle-39.ll
Normal file
51
test/CodeGen/X86/vec_shuffle-39.ll
Normal file
@ -0,0 +1,51 @@
|
||||
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
||||
; rdar://10050222, rdar://10134392
|
||||
|
||||
define <4 x float> @t1(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
|
||||
entry:
|
||||
; CHECK: t1:
|
||||
; CHECK: movlps (%rdi), %xmm0
|
||||
; CHECK: ret
|
||||
%p.val = load <1 x i64>* %p, align 1
|
||||
%0 = bitcast <1 x i64> %p.val to <2 x float>
|
||||
%shuffle.i = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
%shuffle1.i = shufflevector <4 x float> %a, <4 x float> %shuffle.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
||||
ret <4 x float> %shuffle1.i
|
||||
}
|
||||
|
||||
define <4 x float> @t1a(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
|
||||
entry:
|
||||
; CHECK: t1a:
|
||||
; CHECK: movlps (%rdi), %xmm0
|
||||
; CHECK: ret
|
||||
%0 = bitcast <1 x i64>* %p to double*
|
||||
%1 = load double* %0
|
||||
%2 = insertelement <2 x double> undef, double %1, i32 0
|
||||
%3 = bitcast <2 x double> %2 to <4 x float>
|
||||
%4 = shufflevector <4 x float> %a, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
|
||||
ret <4 x float> %4
|
||||
}
|
||||
|
||||
define void @t2(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
|
||||
entry:
|
||||
; CHECK: t2:
|
||||
; CHECK: movlps %xmm0, (%rdi)
|
||||
; CHECK: ret
|
||||
%cast.i = bitcast <4 x float> %a to <2 x i64>
|
||||
%extract.i = extractelement <2 x i64> %cast.i, i32 0
|
||||
%0 = getelementptr inbounds <1 x i64>* %p, i64 0, i64 0
|
||||
store i64 %extract.i, i64* %0, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @t2a(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
|
||||
entry:
|
||||
; CHECK: t2a:
|
||||
; CHECK: movlps %xmm0, (%rdi)
|
||||
; CHECK: ret
|
||||
%0 = bitcast <1 x i64>* %p to double*
|
||||
%1 = bitcast <4 x float> %a to <2 x double>
|
||||
%2 = extractelement <2 x double> %1, i32 0
|
||||
store double %2, double* %0
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user