From b4f98ea1213c866f39aa5b341ec0116f9c2335d7 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Mon, 25 Mar 2013 23:47:35 +0000 Subject: [PATCH] Enhance folding of (extract_subvec (insert_subvec V1, V2, IIdx), EIdx) - Handle the case where the result of 'insert_subvect' is bitcasted before 'extract_subvec'. This removes the redundant insertf128/extractf128 pair on unaligned 256-bit vector load/store on vectors of non 64-bit integer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177945 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 62 +++++++++++++----------- test/CodeGen/X86/avx-load-store.ll | 13 +++++ 2 files changed, 48 insertions(+), 27 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b2f970b4475..44d43a1d076 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8990,33 +8990,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); - if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { - // Handle only simple case where vector being inserted and vector - // being extracted are of same type, and are half size of larger vectors. - EVT BigVT = V->getOperand(0).getValueType(); - EVT SmallVT = V->getOperand(1).getValueType(); - if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) - return SDValue(); - - // Only handle cases where both indexes are constants with the same type. - ConstantSDNode *ExtIdx = dyn_cast(N->getOperand(1)); - ConstantSDNode *InsIdx = dyn_cast(V->getOperand(2)); - - if (InsIdx && ExtIdx && - InsIdx->getValueType(0).getSizeInBits() <= 64 && - ExtIdx->getValueType(0).getSizeInBits() <= 64) { - // Combine: - // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) - // Into: - // indices are equal => V1 - // otherwise => (extract_subvec V1, ExtIdx) - if (InsIdx->getZExtValue() == ExtIdx->getZExtValue()) - return V->getOperand(1); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, - V->getOperand(0), N->getOperand(1)); - } - } - if (V->getOpcode() == ISD::CONCAT_VECTORS) { // Combine: // (extract_subvec (concat V1, V2, ...), i) @@ -9032,6 +9005,41 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return V->getOperand(Idx / NumElems); } + // Skip bitcasting + if (V->getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + + if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { + DebugLoc dl = N->getDebugLoc(); + // Handle only simple case where vector being inserted and vector + // being extracted are of same type, and are half size of larger vectors. + EVT BigVT = V->getOperand(0).getValueType(); + EVT SmallVT = V->getOperand(1).getValueType(); + if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) + return SDValue(); + + // Only handle cases where both indexes are constants with the same type. + ConstantSDNode *ExtIdx = dyn_cast(N->getOperand(1)); + ConstantSDNode *InsIdx = dyn_cast(V->getOperand(2)); + + if (InsIdx && ExtIdx && + InsIdx->getValueType(0).getSizeInBits() <= 64 && + ExtIdx->getValueType(0).getSizeInBits() <= 64) { + // Combine: + // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) + // Into: + // indices are equal or bit offsets are equal => V1 + // otherwise => (extract_subvec V1, ExtIdx) + if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == + ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) + return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, + DAG.getNode(ISD::BITCAST, dl, + N->getOperand(0).getValueType(), + V->getOperand(0)), N->getOperand(1)); + } + } + return SDValue(); } diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll index 432852d47d3..0afaff830df 100644 --- a/test/CodeGen/X86/avx-load-store.ll +++ b/test/CodeGen/X86/avx-load-store.ll @@ -114,3 +114,16 @@ cif_mixed_test_any_check: ; preds = %cif_mask_mixed unreachable } +; CHECK: add8i32 +; CHECK: vmovups +; CHECK: vmovups +; CHECK-NOT: vinsertf128 +; CHECK-NOT: vextractf128 +; CHECK: vmovups +; CHECK: vmovups +define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind { + %b = load <8 x i32>* %bp, align 1 + %x = add <8 x i32> zeroinitializer, %b + store <8 x i32> %x, <8 x i32>* %ret, align 1 + ret void +}