From 1e663cf69c9d9c91e1c48ca7541be29b6a4a5475 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sat, 4 Oct 2014 02:51:03 +0000 Subject: [PATCH] [x86] Fix a bug in the VZEXT DAG combine that I just made more powerful. It turns out this combine was always somewhat flawed -- there are cases where nested VZEXT nodes *can't* be combined: if their types have a mismatch that can be observed in the result. While none of these show up in currently, once I switch to the new vector shuffle lowering a few test cases actually form such nested VZEXT nodes. I've not come up with any IR pattern that I can sensible write to exercise this, but it will be covered by tests once I flip the switch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219044 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 783abab5423..ef5592fe033 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -24596,19 +24596,39 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG, SDValue Op = N->getOperand(0); MVT OpVT = Op.getSimpleValueType(); MVT OpEltVT = OpVT.getVectorElementType(); + unsigned InputBits = OpEltVT.getSizeInBits() * VT.getVectorNumElements(); // (vzext (bitcast (vzext (x)) -> (vzext x) SDValue V = Op; while (V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); - if (V != Op && V.getOpcode() == X86ISD::VZEXT) - return DAG.getNode(X86ISD::VZEXT, DL, VT, V.getOperand(0)); + if (V != Op && V.getOpcode() == X86ISD::VZEXT) { + MVT InnerVT = V.getSimpleValueType(); + MVT InnerEltVT = InnerVT.getVectorElementType(); + + // If the element sizes match exactly, we can just do one larger vzext. This + // is always an exact type match as vzext operates on integer types. + if (OpEltVT == InnerEltVT) { + assert(OpVT == InnerVT && "Types must match for vzext!"); + return DAG.getNode(X86ISD::VZEXT, DL, VT, V.getOperand(0)); + } + + // The only other way we can combine them is if only a single element of the + // inner vzext is used in the input to the outer vzext. + if (InnerEltVT.getSizeInBits() < InputBits) + return SDValue(); + + // In this case, the inner vzext is completely dead because we're going to + // only look at bits inside of the low element. Just do the outer vzext on + // a bitcast of the input to the inner. + return DAG.getNode(X86ISD::VZEXT, DL, VT, + DAG.getNode(ISD::BITCAST, DL, OpVT, V)); + } // Check if we can bypass extracting and re-inserting an element of an input // vector. Essentialy: // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x) - unsigned InputBits = OpEltVT.getSizeInBits() * VT.getVectorNumElements(); if (V.getOpcode() == ISD::SCALAR_TO_VECTOR && V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {