From bbfda9c1254bdd95d9705a5d7f82e254e689bba2 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Wed, 4 Mar 2015 07:27:39 +0000 Subject: [PATCH] [DAGCombine] Fix a bug in a BUILD_VECTOR combine When trying to convert a BUILD_VECTOR into a shuffle, we try to split a single source vector that is twice as wide as the destination vector. We can not do this when we also need the zero vector to create a blend. This fixes PR22774. Differential Revision: http://reviews.llvm.org/D8040 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231219 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +++-- test/CodeGen/X86/pr22774.ll | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/X86/pr22774.ll diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c7a9df543a0..4a074a03d9a 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11358,7 +11358,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { // If the input vector is too large, try to split it. // We don't support having two input vectors that are too large. - if (VecIn2.getNode()) + // If the zero vector was used, we can not split the vector, + // since we'd need 3 inputs. + if (UsesZeroVector || VecIn2.getNode()) return SDValue(); if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) @@ -11370,7 +11372,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, DAG.getConstant(0, TLI.getVectorIdxTy())); - UsesZeroVector = false; } else return SDValue(); } diff --git a/test/CodeGen/X86/pr22774.ll b/test/CodeGen/X86/pr22774.ll new file mode 100644 index 00000000000..3ea2addaafd --- /dev/null +++ b/test/CodeGen/X86/pr22774.ll @@ -0,0 +1,20 @@ +; RUN: llc -mattr=avx %s -o - | FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +@in = global <4 x i64> , align 32 +@out = global <2 x i64> zeroinitializer, align 16 + +define i32 @_Z3foov() { +entry: +; CHECK: vmovdqa in(%rip), %ymm0 +; CHECK-NEXT: vmovq %xmm0, %xmm0 +; CHECK-NEXT: vmovdqa %xmm0, out(%rip) + %0 = load <4 x i64>, <4 x i64>* @in, align 32 + %vecext = extractelement <4 x i64> %0, i32 0 + %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 + %vecinit1 = insertelement <2 x i64> %vecinit, i64 0, i32 1 + store <2 x i64> %vecinit1, <2 x i64>* @out, align 16 + ret i32 0 +}