Fix a broadcast related regression on the vector shuffle lowering.

Summary: Test by Robert Lougher!

Reviewers: chandlerc

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D5745

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219617 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Filipe Cabecinhas 2014-10-13 16:16:16 +00:00
parent 162415e8db
commit 40251eb0b0
3 changed files with 87 additions and 3 deletions

View File

@ -7895,10 +7895,42 @@ static SDValue lowerVectorShuffleAsBroadcast(MVT VT, SDLoc DL, SDValue V,
"a sorted mask where the broadcast "
"comes from V1.");
// Check if this is a broadcast of a scalar. We special case lowering for
// scalars so that we can more effectively fold with loads.
// Go up the chain of (vector) values to try and find a scalar load that
// we can combine with the broadcast.
for (;;) {
switch (V.getOpcode()) {
case ISD::CONCAT_VECTORS: {
int OperandSize = Mask.size() / V.getNumOperands();
V = V.getOperand(BroadcastIdx / OperandSize);
BroadcastIdx %= OperandSize;
continue;
}
case ISD::INSERT_SUBVECTOR: {
SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1);
auto ConstantIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
if (!ConstantIdx)
break;
int BeginIdx = (int)ConstantIdx->getZExtValue();
int EndIdx =
BeginIdx + (int)VInner.getValueType().getVectorNumElements();
if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) {
BroadcastIdx -= BeginIdx;
V = VInner;
} else {
V = VOuter;
}
continue;
}
}
break;
}
// Check if this is a broadcast of a scalar. We special case lowering
// for scalars so that we can more effectively fold with loads.
if (V.getOpcode() == ISD::BUILD_VECTOR ||
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
V = V.getOperand(BroadcastIdx);
// If the scalar isn't a load we can't broadcast from it in AVX1, only with

View File

@ -734,3 +734,29 @@ define <4 x i64> @splat_mem_v4i64(i64* %ptr) {
%shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
ret <4 x i64> %shuffle
}
define <4 x double> @splat_mem_v4f64_2(double* %p) {
; ALL-LABEL: splat_mem_v4f64_2:
; ALL: # BB#0:
; ALL-NEXT: vbroadcastsd (%rdi), %ymm0
; ALL-NEXT: retq
%1 = load double* %p
%2 = insertelement <2 x double> undef, double %1, i32 0
%3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %3
}
define <4 x double> @splat_v4f64(<2 x double> %r) {
; AVX1-LABEL: splat_v4f64:
; AVX1: # BB#0:
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splat_v4f64:
; AVX2: # BB#0:
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
; AVX2-NEXT: retq
%1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %1
}

View File

@ -1579,3 +1579,29 @@ define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
ret <8 x i32> %shuffle
}
define <8 x float> @splat_mem_v8f32_2(float* %p) {
; ALL-LABEL: splat_mem_v8f32_2:
; ALL: # BB#0:
; ALL-NEXT: vbroadcastss (%rdi), %ymm0
; ALL-NEXT: retq
%1 = load float* %p
%2 = insertelement <4 x float> undef, float %1, i32 0
%3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
ret <8 x float> %3
}
define <8 x float> @splat_v8f32(<4 x float> %r) {
; AVX1-LABEL: splat_v8f32:
; AVX1: # BB#0:
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: splat_v8f32:
; AVX2: # BB#0:
; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
; AVX2-NEXT: retq
%1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
ret <8 x float> %1
}