mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-05 12:31:33 +00:00
[DAGCombine] Produce better code for constant splats
This solves PR22276. Splats of constants would sometimes produce redundant shuffles, sometimes ridiculously so (see the PR for details). Fold these shuffles into BUILD_VECTORs early on instead. Differential Revision: http://reviews.llvm.org/D7093 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226811 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
75ead67b4e
commit
0a979a09ae
@ -11490,7 +11490,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// If it is a splat, check if the argument vector is another splat or a
|
// If it is a splat, check if the argument vector is another splat or a
|
||||||
// build_vector with all scalar elements the same.
|
// build_vector.
|
||||||
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
|
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
|
||||||
SDNode *V = N0.getNode();
|
SDNode *V = N0.getNode();
|
||||||
|
|
||||||
@ -11527,6 +11527,24 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
|
|||||||
// Splat of <x, x, x, x>, return <x, x, x, x>
|
// Splat of <x, x, x, x>, return <x, x, x, x>
|
||||||
if (AllSame)
|
if (AllSame)
|
||||||
return N0;
|
return N0;
|
||||||
|
|
||||||
|
// If the splatted element is a constant, just build the vector out of
|
||||||
|
// constants directly.
|
||||||
|
const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
|
||||||
|
if (isa<ConstantSDNode>(Splatted) || isa<ConstantFPSDNode>(Splatted)) {
|
||||||
|
SmallVector<SDValue, 8> Ops;
|
||||||
|
for (unsigned i = 0; i != NumElts; ++i) {
|
||||||
|
Ops.push_back(Splatted);
|
||||||
|
}
|
||||||
|
SDValue &NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
|
||||||
|
V->getValueType(0), Ops);
|
||||||
|
|
||||||
|
// We may have jumped through bitcasts, so the type of the
|
||||||
|
// BUILD_VECTOR may not match the type of the shuffle.
|
||||||
|
if (V->getValueType(0) != VT)
|
||||||
|
NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
|
||||||
|
return NewBV;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1513,9 +1513,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
|
|||||||
return getUNDEF(VT);
|
return getUNDEF(VT);
|
||||||
|
|
||||||
// If Identity shuffle return that node.
|
// If Identity shuffle return that node.
|
||||||
bool Identity = true;
|
bool Identity = true, AllSame = true;
|
||||||
for (unsigned i = 0; i != NElts; ++i) {
|
for (unsigned i = 0; i != NElts; ++i) {
|
||||||
if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
|
if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
|
||||||
|
if (MaskVec[i] != MaskVec[0]) AllSame = false;
|
||||||
}
|
}
|
||||||
if (Identity && NElts)
|
if (Identity && NElts)
|
||||||
return N1;
|
return N1;
|
||||||
@ -1549,6 +1550,26 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
|
|||||||
if (C->isNullValue())
|
if (C->isNullValue())
|
||||||
return N1;
|
return N1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the shuffle itself creates a constant splat, build the vector
|
||||||
|
// directly.
|
||||||
|
if (AllSame) {
|
||||||
|
const SDValue &Splatted = BV->getOperand(MaskVec[0]);
|
||||||
|
if (isa<ConstantSDNode>(Splatted) || isa<ConstantFPSDNode>(Splatted)) {
|
||||||
|
SmallVector<SDValue, 8> Ops;
|
||||||
|
for (unsigned i = 0; i != NElts; ++i) {
|
||||||
|
Ops.push_back(Splatted);
|
||||||
|
}
|
||||||
|
SDValue &NewBV = getNode(ISD::BUILD_VECTOR, dl,
|
||||||
|
BV->getValueType(0), Ops);
|
||||||
|
|
||||||
|
// We may have jumped through bitcasts, so the type of the
|
||||||
|
// BUILD_VECTOR may not match the type of the shuffle.
|
||||||
|
if (BV->getValueType(0) != VT)
|
||||||
|
NewBV = getNode(ISD::BITCAST, dl, VT, NewBV);
|
||||||
|
return NewBV;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
40
test/CodeGen/X86/splat-const.ll
Normal file
40
test/CodeGen/X86/splat-const.ll
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
; RUN: llc < %s -mcpu=penryn | FileCheck %s --check-prefix=SSE
|
||||||
|
; RUN: llc < %s -mcpu=sandybridge | FileCheck %s --check-prefix=AVX
|
||||||
|
; RUN: llc < %s -mcpu=haswell | FileCheck %s --check-prefix=AVX2
|
||||||
|
; This checks that lowering for creation of constant vectors is sane and
|
||||||
|
; doesn't use redundant shuffles. (fixes PR22276)
|
||||||
|
target triple = "x86_64-unknown-unknown"
|
||||||
|
|
||||||
|
define <4 x i32> @zero_vector() {
|
||||||
|
; SSE-LABEL: zero_vector:
|
||||||
|
; SSE: xorps %xmm0, %xmm0
|
||||||
|
; SSE-NEXT: retq
|
||||||
|
; AVX-LABEL: zero_vector:
|
||||||
|
; AVX: vxorps %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX-NEXT: retq
|
||||||
|
; AVX2-LABEL: zero_vector:
|
||||||
|
; AVX2: vxorps %xmm0, %xmm0, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
|
%zero = insertelement <4 x i32> undef, i32 0, i32 0
|
||||||
|
%splat = shufflevector <4 x i32> %zero, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||||
|
ret <4 x i32> %splat
|
||||||
|
}
|
||||||
|
|
||||||
|
; Note that for the "const_vector" versions, lowering that uses a shuffle
|
||||||
|
; instead of a load would be legitimate, if it's a single broadcast shuffle.
|
||||||
|
; (as opposed to the previous mess)
|
||||||
|
; However, this is not the current preferred lowering.
|
||||||
|
define <4 x i32> @const_vector() {
|
||||||
|
; SSE-LABEL: const_vector:
|
||||||
|
; SSE: movaps {{.*}}, %xmm0 # xmm0 = [42,42,42,42]
|
||||||
|
; SSE-NEXT: retq
|
||||||
|
; AVX-LABEL: const_vector:
|
||||||
|
; AVX: vmovaps {{.*}}, %xmm0 # xmm0 = [42,42,42,42]
|
||||||
|
; AVX-NEXT: retq
|
||||||
|
; AVX2-LABEL: const_vector:
|
||||||
|
; AVX2: vbroadcastss {{[^%].*}}, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
|
%const = insertelement <4 x i32> undef, i32 42, i32 0
|
||||||
|
%splat = shufflevector <4 x i32> %const, <4 x i32> undef, <4 x i32> zeroinitializer
|
||||||
|
ret <4 x i32> %splat
|
||||||
|
}
|
@ -1003,14 +1003,14 @@ define void @insertps_pr20411(i32* noalias nocapture %RET) #1 {
|
|||||||
; X32-LABEL: insertps_pr20411:
|
; X32-LABEL: insertps_pr20411:
|
||||||
; X32: ## BB#0:
|
; X32: ## BB#0:
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X32-NEXT: pshufd {{.*#+}} xmm0 = mem[3,1,2,3]
|
; X32-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3]
|
||||||
; X32-NEXT: insertps $-36, LCPI49_1+12, %xmm0
|
; X32-NEXT: insertps $-36, LCPI49_1+12, %xmm0
|
||||||
; X32-NEXT: movups %xmm0, (%eax)
|
; X32-NEXT: movups %xmm0, (%eax)
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: insertps_pr20411:
|
; X64-LABEL: insertps_pr20411:
|
||||||
; X64: ## BB#0:
|
; X64: ## BB#0:
|
||||||
; X64-NEXT: pshufd {{.*#+}} xmm0 = mem[3,1,2,3]
|
; X64-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3]
|
||||||
; X64-NEXT: insertps $-36, LCPI49_1+{{.*}}(%rip), %xmm0
|
; X64-NEXT: insertps $-36, LCPI49_1+{{.*}}(%rip), %xmm0
|
||||||
; X64-NEXT: movups %xmm0, (%rdi)
|
; X64-NEXT: movups %xmm0, (%rdi)
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
@ -82,8 +82,8 @@ define void @shuf5(<8 x i8>* %p) nounwind {
|
|||||||
; CHECK-LABEL: shuf5:
|
; CHECK-LABEL: shuf5:
|
||||||
; CHECK: # BB#0:
|
; CHECK: # BB#0:
|
||||||
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = <4,33,u,u,u,u,u,u>
|
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [33,33,33,33,33,33,33,33]
|
||||||
; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
|
; CHECK-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
|
||||||
; CHECK-NEXT: movlpd %xmm0, (%eax)
|
; CHECK-NEXT: movlpd %xmm0, (%eax)
|
||||||
; CHECK-NEXT: retl
|
; CHECK-NEXT: retl
|
||||||
%v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
%v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||||
|
Loading…
Reference in New Issue
Block a user