mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-16 00:33:10 +00:00
Revert "[x86] Combine x86mmx/i64 to v2i64 conversion to use scalar_to_vector"
This reverts commits r226953 and r226974. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227248 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
fb04c23aeb
commit
00b7a940e7
@ -24761,8 +24761,6 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
|
||||
LoadSDNode *Ld = cast<LoadSDNode>(N);
|
||||
EVT RegVT = Ld->getValueType(0);
|
||||
EVT MemVT = Ld->getMemoryVT();
|
||||
SDValue Ptr = Ld->getBasePtr();
|
||||
SDValue Chain = Ld->getChain();
|
||||
SDLoc dl(Ld);
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
|
||||
@ -24801,33 +24799,6 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return DCI.CombineTo(N, NewVec, TF, true);
|
||||
}
|
||||
|
||||
// Conversion from x86mmx/i64 to v2i64 types is often done via stack
|
||||
// store/load. Under certain conditions we can bypass the memory access and
|
||||
// combine this load to use a scalar_to_vector instead. This leads to
|
||||
// a reduction in the stack use, redundant emission of shuffles and create
|
||||
// isel matching candidates for movq2dq instructions.
|
||||
if (RegVT == MVT::v2i64 && Subtarget->hasSSE2() && Ext == ISD::EXTLOAD &&
|
||||
!Ld->isVolatile() && ISD::isNON_TRUNCStore(Chain.getNode())) {
|
||||
|
||||
// If this load is directly stored, get the original source value.
|
||||
StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
|
||||
EVT SrcTy = PrevST->getValue().getValueType();
|
||||
if (PrevST->getBasePtr() != Ptr ||
|
||||
!(SrcTy == MVT::i64 || SrcTy == MVT::x86mmx))
|
||||
return SDValue();
|
||||
SDValue SrcVal = Chain.getOperand(1);
|
||||
|
||||
// On 32bit systems, we can't store 64bit integers, use f64 instead.
|
||||
bool Usef64 = TLI.isTypeLegal(MVT::f64) && !Subtarget->is64Bit();
|
||||
if (Usef64)
|
||||
SrcVal = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SrcVal);
|
||||
SrcVal = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, Usef64 ? MVT::v2f64 : RegVT,
|
||||
SrcVal);
|
||||
|
||||
return DCI.CombineTo(N, Usef64 ?
|
||||
DAG.getNode(ISD::BITCAST, dl, RegVT, SrcVal) : SrcVal, Chain);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -1,15 +1,14 @@
|
||||
; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: vcast:
|
||||
;CHECK-LABEL: vcast:
|
||||
define <2 x i32> @vcast(<2 x float> %a, <2 x float> %b) {
|
||||
; CHECK-NOT: pmovzxdq
|
||||
; CHECK-NOT: pmovzxdq
|
||||
; CHECK: movdqa (%{{.*}}), %[[R0:xmm[0-9]+]]
|
||||
;CHECK: pmovzxdq
|
||||
;CHECK: pmovzxdq
|
||||
%af = bitcast <2 x float> %a to <2 x i32>
|
||||
%bf = bitcast <2 x float> %b to <2 x i32>
|
||||
; CHECK-NEXT: psubq (%{{.*}}), %[[R0]]
|
||||
%x = sub <2 x i32> %af, %bf
|
||||
; CHECK: ret
|
||||
;CHECK: psubq
|
||||
ret <2 x i32> %x
|
||||
;CHECK: ret
|
||||
}
|
||||
|
||||
|
@ -68,13 +68,12 @@ define i64 @test4(i64 %A) {
|
||||
%2 = bitcast <2 x i32> %add to i64
|
||||
ret i64 %2
|
||||
}
|
||||
; FIXME: At the moment we still produce the sequence paddd+pshufd.
|
||||
; FIXME: At the moment we still produce the sequence pshufd+paddd+pshufd.
|
||||
; Ideally, we should fold that sequence into a single paddd. This is fixed with
|
||||
; the widening legalization.
|
||||
;
|
||||
; CHECK-LABEL: test4
|
||||
; CHECK: movd
|
||||
; CHECK-NOT: pshufd
|
||||
; CHECK: pshufd
|
||||
; CHECK-NEXT: paddd
|
||||
; CHECK-NEXT: pshufd
|
||||
; CHECK: ret
|
||||
|
@ -1,29 +0,0 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | FileCheck %s -check-prefix=X86-32
|
||||
; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s -check-prefix=X86-64
|
||||
|
||||
; X86-32-LABEL: test0
|
||||
; X86-64-LABEL: test0
|
||||
define i32 @test0(<1 x i64>* %v4) {
|
||||
%v5 = load <1 x i64>* %v4, align 8
|
||||
%v12 = bitcast <1 x i64> %v5 to <4 x i16>
|
||||
%v13 = bitcast <4 x i16> %v12 to x86_mmx
|
||||
; X86-32: pshufw $238
|
||||
; X86-32-NOT: movq
|
||||
; X86-32-NOT: movsd
|
||||
; X86-32: movq2dq
|
||||
; X86-64: pshufw $238
|
||||
; X86-64-NOT: movq
|
||||
; X86-64-NOT: pshufd
|
||||
; X86-64: movq2dq
|
||||
; X86-64-NEXT: movd
|
||||
%v14 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v13, i8 -18)
|
||||
%v15 = bitcast x86_mmx %v14 to <4 x i16>
|
||||
%v16 = bitcast <4 x i16> %v15 to <1 x i64>
|
||||
%v17 = extractelement <1 x i64> %v16, i32 0
|
||||
%v18 = bitcast i64 %v17 to <2 x i32>
|
||||
%v19 = extractelement <2 x i32> %v18, i32 0
|
||||
%v20 = add i32 %v19, 32
|
||||
ret i32 %v20
|
||||
}
|
||||
|
||||
declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8)
|
@ -78,7 +78,8 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
|
||||
; CHECK-NEXT: paddd %[[R0]], %[[R1]]
|
||||
; CHECK-NEXT: pextrw $4, %[[R1]], 4(%{{.*}})
|
||||
; CHECK-NEXT: pshufb {{.*}}, %[[R1]]
|
||||
; CHECK-NEXT: movd %[[R1]], (%{{.*}})
|
||||
; CHECK-NEXT: pmovzxdq %[[R1]], %[[R0]]
|
||||
; CHECK-NEXT: movd %[[R0]], (%{{.*}})
|
||||
%a = load %i16vec3* %ap, align 16
|
||||
%b = load %i16vec3* %bp, align 16
|
||||
%x = add %i16vec3 %a, %b
|
||||
|
Loading…
x
Reference in New Issue
Block a user