mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-28 06:32:09 +00:00
Enhance DAGCombine for transforming 128->256 casts into a vmovaps, rather
then a vxorps + vinsertf128 pair if the original vector came from a load. rdar://10594409 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147481 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
48a09aec60
commit
3d1161e9ae
@ -12731,6 +12731,20 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
|
||||
!isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
|
||||
return SDValue();
|
||||
|
||||
// If V1 is coming from a vector load then just fold to a VZEXT_LOAD.
|
||||
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(V1.getOperand(0))) {
|
||||
SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
|
||||
SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
|
||||
SDValue ResNode =
|
||||
DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2,
|
||||
Ld->getMemoryVT(),
|
||||
Ld->getPointerInfo(),
|
||||
Ld->getAlignment(),
|
||||
false/*isVolatile*/, true/*ReadMem*/,
|
||||
false/*WriteMem*/);
|
||||
return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
|
||||
}
|
||||
|
||||
// Emit a zeroed vector and insert the desired subvector on its
|
||||
// first half.
|
||||
SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl);
|
||||
|
@ -4719,6 +4719,11 @@ let Predicates = [HasAVX], AddedComplexity = 20 in {
|
||||
(VMOVZQI2PQIrm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(v4i64 (X86vzload addr:$src)),
|
||||
(SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
|
||||
// IA32 document. movq xmm1, xmm2 does clear the high bits.
|
||||
|
@ -31,4 +31,27 @@ define <8 x float> @test4(float %a) nounwind {
|
||||
ret <8 x float> %b
|
||||
; CHECK: test4:
|
||||
; CHECK: vinsertf128
|
||||
}
|
||||
|
||||
; rdar://10594409
|
||||
define <8 x float> @test5(float* nocapture %f) nounwind uwtable readonly ssp {
|
||||
entry:
|
||||
%0 = bitcast float* %f to <4 x float>*
|
||||
%1 = load <4 x float>* %0, align 16
|
||||
; CHECK: vmovaps
|
||||
; CHECK-NOT: vxorps
|
||||
; CHECK-NOT: vinsertf128
|
||||
%shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
|
||||
ret <8 x float> %shuffle.i
|
||||
}
|
||||
|
||||
define <4 x double> @test6(double* nocapture %d) nounwind uwtable readonly ssp {
|
||||
entry:
|
||||
%0 = bitcast double* %d to <2 x double>*
|
||||
%1 = load <2 x double>* %0, align 16
|
||||
; CHECK: vmovaps
|
||||
; CHECK-NOT: vxorps
|
||||
; CHECK-NOT: vinsertf128
|
||||
%shuffle.i = shufflevector <2 x double> %1, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
|
||||
ret <4 x double> %shuffle.i
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user