diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ba14a4dbb5d..0f8b499787a 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7570,13 +7570,20 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (!IsLoadSrc) { unsigned LastConst = 0; unsigned LastLegalType = 0; + unsigned LastLegalVectorType = 0; + bool NonZero = false; for (unsigned i=0; i(StoreNodes[i].MemNode); SDValue StoredVal = St->getValue(); - bool IsConst = (isa(StoredVal) || - isa(StoredVal)); - if (!IsConst) + + if (ConstantSDNode *C = dyn_cast(StoredVal)) { + NonZero |= (C->getZExtValue() != 0); + } else if (ConstantFPSDNode *C = dyn_cast(StoredVal)) { + NonZero |= C->getValueAPF().bitcastToAPInt().getZExtValue(); + } else { + // Non constant. break; + } // Mark this index as the largest legal constant. LastConst = i; @@ -7586,16 +7593,27 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); if (TLI.isTypeLegal(StoreTy)) LastLegalType = i+1; + + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); + if (TLI.isTypeLegal(Ty)) + LastLegalVectorType = i + 1; } + // We only use vectors if the constant is known to be zero. + if (NonZero) + LastLegalVectorType = 0; + // Check if we found a legal integer type to store. - if (LastLegalType == 0) + if (LastLegalType == 0 && LastLegalVectorType == 0) return false; - // We add a +1 because the LastXXX variables refer to array location - // while NumElem holds the size. - unsigned NumElem = std::min(LastConsecutiveStore, LastConst) + 1; - NumElem = std::min(LastLegalType, NumElem); + bool UseVector = LastLegalVectorType > LastLegalType; + unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; + + // Make sure we have something to merge. + if (NumElem < 2) + return false; unsigned EarliestNodeUsed = 0; for (unsigned i=0; i < NumElem; ++i) { @@ -7609,36 +7627,41 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // The earliest Node in the DAG. LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; - - // Make sure we have something to merge. - if (NumElem < 2) - return false; - DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc(); - unsigned StoreBW = NumElem * ElementSizeBytes * 8; - APInt StoreInt(StoreBW, 0); - // Construct a single integer constant which is made of the smaller - // constant inputs. - bool IsLE = TLI.isLittleEndian(); - for (unsigned i = 0; i < NumElem ; ++i) { - unsigned Idx = IsLE ?(NumElem - 1 - i) : i; - StoreSDNode *St = cast(StoreNodes[Idx].MemNode); - SDValue Val = St->getValue(); - StoreInt<<=ElementSizeBytes*8; - if (ConstantSDNode *C = dyn_cast(Val)) { - StoreInt|=C->getAPIntValue().zext(StoreBW); - } else if (ConstantFPSDNode *C = dyn_cast(Val)) { - StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); - } else { - assert(false && "Invalid constant element type"); + SDValue StoredVal; + if (UseVector) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); + assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + StoredVal = DAG.getConstant(0, Ty); + } else { + unsigned StoreBW = NumElem * ElementSizeBytes * 8; + APInt StoreInt(StoreBW, 0); + + // Construct a single integer constant which is made of the smaller + // constant inputs. + bool IsLE = TLI.isLittleEndian(); + for (unsigned i = 0; i < NumElem ; ++i) { + unsigned Idx = IsLE ?(NumElem - 1 - i) : i; + StoreSDNode *St = cast(StoreNodes[Idx].MemNode); + SDValue Val = St->getValue(); + StoreInt<<=ElementSizeBytes*8; + if (ConstantSDNode *C = dyn_cast(Val)) { + StoreInt|=C->getAPIntValue().zext(StoreBW); + } else if (ConstantFPSDNode *C = dyn_cast(Val)) { + StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); + } else { + assert(false && "Invalid constant element type"); + } } + + // Create the new Load and Store operations. + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); + StoredVal = DAG.getConstant(StoreInt, StoreTy); } - // Create the new Load and Store operations. - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); - SDValue WideInt = DAG.getConstant(StoreInt, StoreTy); - SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, WideInt, + SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), false, false, @@ -8027,7 +8050,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } // Only perform this optimization before the types are legal, because we - // don't want to perform this optimization multiple times. + // don't want to perform this optimization on every DAGCombine invocation. if (!LegalTypes && MergeConsecutiveStores(ST)) return SDValue(N, 0); diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll index 79f8ee54a2a..64825bac971 100644 --- a/test/CodeGen/X86/MergeConsecutiveStores.ll +++ b/test/CodeGen/X86/MergeConsecutiveStores.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s +; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" @@ -6,7 +6,6 @@ target triple = "x86_64-apple-macosx10.8.0" %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 } %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 } -; Move all of the constants using a single vector store. ; CHECK: merge_const_store ; save 1,2,3 ... as one big integer. ; CHECK: movabsq $578437695752307201 @@ -41,6 +40,40 @@ define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwt ret void } +; Move the constants using a single vector store. +; CHECK: merge_const_store_vec +; CHECK: vmovups %ymm0, (%rsi) +; CHECK: ret +define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp { + %1 = icmp sgt i32 %count, 0 + br i1 %1, label %.lr.ph, label %._crit_edge +.lr.ph: + %i.02 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ] + %.01 = phi %struct.B* [ %11, %.lr.ph ], [ %p, %0 ] + %2 = getelementptr inbounds %struct.B* %.01, i64 0, i32 0 + store i32 0, i32* %2, align 4 + %3 = getelementptr inbounds %struct.B* %.01, i64 0, i32 1 + store i32 0, i32* %3, align 4 + %4 = getelementptr inbounds %struct.B* %.01, i64 0, i32 2 + store i32 0, i32* %4, align 4 + %5 = getelementptr inbounds %struct.B* %.01, i64 0, i32 3 + store i32 0, i32* %5, align 4 + %6 = getelementptr inbounds %struct.B* %.01, i64 0, i32 4 + store i32 0, i32* %6, align 4 + %7 = getelementptr inbounds %struct.B* %.01, i64 0, i32 5 + store i32 0, i32* %7, align 4 + %8 = getelementptr inbounds %struct.B* %.01, i64 0, i32 6 + store i32 0, i32* %8, align 4 + %9 = getelementptr inbounds %struct.B* %.01, i64 0, i32 7 + store i32 0, i32* %9, align 4 + %10 = add nsw i32 %i.02, 1 + %11 = getelementptr inbounds %struct.B* %.01, i64 1 + %exitcond = icmp eq i32 %10, %count + br i1 %exitcond, label %._crit_edge, label %.lr.ph +._crit_edge: + ret void +} + ; Move the first 4 constants as a single vector. Move the rest as scalars. ; CHECK: merge_nonconst_store ; CHECK: movl $67305985 @@ -223,7 +256,6 @@ block4: ; preds = %4, %.lr.ph ret void } - ;CHECK: merge_loads_no_align ; load: ;CHECK: movl