mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-03 13:31:05 +00:00
merge consecutive stores of extracted vector elements (PR21711)
This is a 2nd try at the same optimization as http://reviews.llvm.org/D6698. That patch was checked in at r224611, but reverted at r225031 because it caused a failure outside of the regression tests. The cause of the crash was not recognizing consecutive stores that have mixed source values (loads and vector element extracts), so this patch adds a check to bail out if any store value is not coming from a vector element extract. This patch also refactors the shared logic of the constant source and vector extracted elements source cases into a helper function. Differential Revision: http://reviews.llvm.org/D6850 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226845 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5196540da1
commit
05d5e213c4
@ -363,6 +363,28 @@ namespace {
|
|||||||
/// chain (aliasing node.)
|
/// chain (aliasing node.)
|
||||||
SDValue FindBetterChain(SDNode *N, SDValue Chain);
|
SDValue FindBetterChain(SDNode *N, SDValue Chain);
|
||||||
|
|
||||||
|
/// Holds a pointer to an LSBaseSDNode as well as information on where it
|
||||||
|
/// is located in a sequence of memory operations connected by a chain.
|
||||||
|
struct MemOpLink {
|
||||||
|
MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
|
||||||
|
MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
|
||||||
|
// Ptr to the mem node.
|
||||||
|
LSBaseSDNode *MemNode;
|
||||||
|
// Offset from the base ptr.
|
||||||
|
int64_t OffsetFromBase;
|
||||||
|
// What is the sequence number of this mem node.
|
||||||
|
// Lowest mem operand in the DAG starts at zero.
|
||||||
|
unsigned SequenceNum;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// This is a helper function for MergeConsecutiveStores. When the source
|
||||||
|
/// elements of the consecutive stores are all constants or all extracted
|
||||||
|
/// vector elements, try to merge them into one larger store.
|
||||||
|
/// \return True if a merged store was created.
|
||||||
|
bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
|
||||||
|
EVT MemVT, unsigned NumElem,
|
||||||
|
bool IsConstantSrc, bool UseVector);
|
||||||
|
|
||||||
/// Merge consecutive store operations into a wide store.
|
/// Merge consecutive store operations into a wide store.
|
||||||
/// This optimization uses wide integers or vectors when possible.
|
/// This optimization uses wide integers or vectors when possible.
|
||||||
/// \return True if some memory operations were changed.
|
/// \return True if some memory operations were changed.
|
||||||
@ -9706,19 +9728,116 @@ struct BaseIndexOffset {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Holds a pointer to an LSBaseSDNode as well as information on where it
|
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
|
||||||
/// is located in a sequence of memory operations connected by a chain.
|
SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
|
||||||
struct MemOpLink {
|
unsigned NumElem, bool IsConstantSrc, bool UseVector) {
|
||||||
MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
|
// Make sure we have something to merge.
|
||||||
MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
|
if (NumElem < 2)
|
||||||
// Ptr to the mem node.
|
return false;
|
||||||
LSBaseSDNode *MemNode;
|
|
||||||
// Offset from the base ptr.
|
int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
|
||||||
int64_t OffsetFromBase;
|
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
|
||||||
// What is the sequence number of this mem node.
|
unsigned EarliestNodeUsed = 0;
|
||||||
// Lowest mem operand in the DAG starts at zero.
|
|
||||||
unsigned SequenceNum;
|
for (unsigned i=0; i < NumElem; ++i) {
|
||||||
};
|
// Find a chain for the new wide-store operand. Notice that some
|
||||||
|
// of the store nodes that we found may not be selected for inclusion
|
||||||
|
// in the wide store. The chain we use needs to be the chain of the
|
||||||
|
// earliest store node which is *used* and replaced by the wide store.
|
||||||
|
if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
|
||||||
|
EarliestNodeUsed = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The earliest Node in the DAG.
|
||||||
|
LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
|
||||||
|
SDLoc DL(StoreNodes[0].MemNode);
|
||||||
|
|
||||||
|
SDValue StoredVal;
|
||||||
|
if (UseVector) {
|
||||||
|
// Find a legal type for the vector store.
|
||||||
|
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
|
||||||
|
assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
|
||||||
|
if (IsConstantSrc) {
|
||||||
|
// A vector store with a constant source implies that the constant is
|
||||||
|
// zero; we only handle merging stores of constant zeros because the zero
|
||||||
|
// can be materialized without a load.
|
||||||
|
// It may be beneficial to loosen this restriction to allow non-zero
|
||||||
|
// store merging.
|
||||||
|
StoredVal = DAG.getConstant(0, Ty);
|
||||||
|
} else {
|
||||||
|
SmallVector<SDValue, 8> Ops;
|
||||||
|
for (unsigned i = 0; i < NumElem ; ++i) {
|
||||||
|
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
|
||||||
|
SDValue Val = St->getValue();
|
||||||
|
// All of the operands of a BUILD_VECTOR must have the same type.
|
||||||
|
if (Val.getValueType() != MemVT)
|
||||||
|
return false;
|
||||||
|
Ops.push_back(Val);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the extracted vector elements back into a vector.
|
||||||
|
StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// We should always use a vector store when merging extracted vector
|
||||||
|
// elements, so this path implies a store of constants.
|
||||||
|
assert(IsConstantSrc && "Merged vector elements should use vector store");
|
||||||
|
|
||||||
|
unsigned StoreBW = NumElem * ElementSizeBytes * 8;
|
||||||
|
APInt StoreInt(StoreBW, 0);
|
||||||
|
|
||||||
|
// Construct a single integer constant which is made of the smaller
|
||||||
|
// constant inputs.
|
||||||
|
bool IsLE = TLI.isLittleEndian();
|
||||||
|
for (unsigned i = 0; i < NumElem ; ++i) {
|
||||||
|
unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
|
||||||
|
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
|
||||||
|
SDValue Val = St->getValue();
|
||||||
|
StoreInt <<= ElementSizeBytes*8;
|
||||||
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
|
||||||
|
StoreInt |= C->getAPIntValue().zext(StoreBW);
|
||||||
|
} else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
|
||||||
|
StoreInt |= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
|
||||||
|
} else {
|
||||||
|
llvm_unreachable("Invalid constant element type");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the new Load and Store operations.
|
||||||
|
EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
|
||||||
|
StoredVal = DAG.getConstant(StoreInt, StoreTy);
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
|
||||||
|
FirstInChain->getBasePtr(),
|
||||||
|
FirstInChain->getPointerInfo(),
|
||||||
|
false, false,
|
||||||
|
FirstInChain->getAlignment());
|
||||||
|
|
||||||
|
// Replace the first store with the new store
|
||||||
|
CombineTo(EarliestOp, NewStore);
|
||||||
|
// Erase all other stores.
|
||||||
|
for (unsigned i = 0; i < NumElem ; ++i) {
|
||||||
|
if (StoreNodes[i].MemNode == EarliestOp)
|
||||||
|
continue;
|
||||||
|
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
|
||||||
|
// ReplaceAllUsesWith will replace all uses that existed when it was
|
||||||
|
// called, but graph optimizations may cause new ones to appear. For
|
||||||
|
// example, the case in pr14333 looks like
|
||||||
|
//
|
||||||
|
// St's chain -> St -> another store -> X
|
||||||
|
//
|
||||||
|
// And the only difference from St to the other store is the chain.
|
||||||
|
// When we change it's chain to be St's chain they become identical,
|
||||||
|
// get CSEed and the net result is that X is now a use of St.
|
||||||
|
// Since we know that St is redundant, just iterate.
|
||||||
|
while (!St->use_empty())
|
||||||
|
DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
|
||||||
|
deleteAndRecombine(St);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
||||||
EVT MemVT = St->getMemoryVT();
|
EVT MemVT = St->getMemoryVT();
|
||||||
@ -9731,11 +9850,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Perform an early exit check. Do not bother looking at stored values that
|
// Perform an early exit check. Do not bother looking at stored values that
|
||||||
// are not constants or loads.
|
// are not constants, loads, or extracted vector elements.
|
||||||
SDValue StoredVal = St->getValue();
|
SDValue StoredVal = St->getValue();
|
||||||
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
|
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
|
||||||
if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) &&
|
bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
|
||||||
!IsLoadSrc)
|
isa<ConstantFPSDNode>(StoredVal);
|
||||||
|
bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
|
||||||
|
|
||||||
|
if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Only look at ends of store sequences.
|
// Only look at ends of store sequences.
|
||||||
@ -9877,7 +9999,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
|||||||
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
|
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
|
||||||
|
|
||||||
// Store the constants into memory as one consecutive store.
|
// Store the constants into memory as one consecutive store.
|
||||||
if (!IsLoadSrc) {
|
if (IsConstantSrc) {
|
||||||
unsigned LastLegalType = 0;
|
unsigned LastLegalType = 0;
|
||||||
unsigned LastLegalVectorType = 0;
|
unsigned LastLegalVectorType = 0;
|
||||||
bool NonZero = false;
|
bool NonZero = false;
|
||||||
@ -9926,85 +10048,33 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
|
|||||||
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
|
bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
|
||||||
unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
|
unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
|
||||||
|
|
||||||
// Make sure we have something to merge.
|
return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
|
||||||
if (NumElem < 2)
|
true, UseVector);
|
||||||
return false;
|
}
|
||||||
|
|
||||||
unsigned EarliestNodeUsed = 0;
|
// When extracting multiple vector elements, try to store them
|
||||||
for (unsigned i=0; i < NumElem; ++i) {
|
// in one vector store rather than a sequence of scalar stores.
|
||||||
// Find a chain for the new wide-store operand. Notice that some
|
if (IsExtractVecEltSrc) {
|
||||||
// of the store nodes that we found may not be selected for inclusion
|
unsigned NumElem = 0;
|
||||||
// in the wide store. The chain we use needs to be the chain of the
|
for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
|
||||||
// earliest store node which is *used* and replaced by the wide store.
|
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
|
||||||
if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum)
|
SDValue StoredVal = St->getValue();
|
||||||
EarliestNodeUsed = i;
|
// This restriction could be loosened.
|
||||||
}
|
// Bail out if any stored values are not elements extracted from a vector.
|
||||||
|
// It should be possible to handle mixed sources, but load sources need
|
||||||
|
// more careful handling (see the block of code below that handles
|
||||||
|
// consecutive loads).
|
||||||
|
if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
|
||||||
|
return false;
|
||||||
|
|
||||||
// The earliest Node in the DAG.
|
|
||||||
LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode;
|
|
||||||
SDLoc DL(StoreNodes[0].MemNode);
|
|
||||||
|
|
||||||
SDValue StoredVal;
|
|
||||||
if (UseVector) {
|
|
||||||
// Find a legal type for the vector store.
|
// Find a legal type for the vector store.
|
||||||
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
|
EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
|
||||||
assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
|
if (TLI.isTypeLegal(Ty))
|
||||||
StoredVal = DAG.getConstant(0, Ty);
|
NumElem = i + 1;
|
||||||
} else {
|
|
||||||
unsigned StoreBW = NumElem * ElementSizeBytes * 8;
|
|
||||||
APInt StoreInt(StoreBW, 0);
|
|
||||||
|
|
||||||
// Construct a single integer constant which is made of the smaller
|
|
||||||
// constant inputs.
|
|
||||||
bool IsLE = TLI.isLittleEndian();
|
|
||||||
for (unsigned i = 0; i < NumElem ; ++i) {
|
|
||||||
unsigned Idx = IsLE ?(NumElem - 1 - i) : i;
|
|
||||||
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
|
|
||||||
SDValue Val = St->getValue();
|
|
||||||
StoreInt<<=ElementSizeBytes*8;
|
|
||||||
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
|
|
||||||
StoreInt|=C->getAPIntValue().zext(StoreBW);
|
|
||||||
} else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
|
|
||||||
StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW);
|
|
||||||
} else {
|
|
||||||
llvm_unreachable("Invalid constant element type");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create the new Load and Store operations.
|
|
||||||
EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW);
|
|
||||||
StoredVal = DAG.getConstant(StoreInt, StoreTy);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal,
|
return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
|
||||||
FirstInChain->getBasePtr(),
|
false, true);
|
||||||
FirstInChain->getPointerInfo(),
|
|
||||||
false, false,
|
|
||||||
FirstInChain->getAlignment());
|
|
||||||
|
|
||||||
// Replace the first store with the new store
|
|
||||||
CombineTo(EarliestOp, NewStore);
|
|
||||||
// Erase all other stores.
|
|
||||||
for (unsigned i = 0; i < NumElem ; ++i) {
|
|
||||||
if (StoreNodes[i].MemNode == EarliestOp)
|
|
||||||
continue;
|
|
||||||
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
|
|
||||||
// ReplaceAllUsesWith will replace all uses that existed when it was
|
|
||||||
// called, but graph optimizations may cause new ones to appear. For
|
|
||||||
// example, the case in pr14333 looks like
|
|
||||||
//
|
|
||||||
// St's chain -> St -> another store -> X
|
|
||||||
//
|
|
||||||
// And the only difference from St to the other store is the chain.
|
|
||||||
// When we change it's chain to be St's chain they become identical,
|
|
||||||
// get CSEed and the net result is that X is now a use of St.
|
|
||||||
// Since we know that St is redundant, just iterate.
|
|
||||||
while (!St->use_empty())
|
|
||||||
DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
|
|
||||||
deleteAndRecombine(St);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Below we handle the case of multiple consecutive stores that
|
// Below we handle the case of multiple consecutive stores that
|
||||||
|
@ -434,3 +434,62 @@ define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
|
|||||||
; <label>:14
|
; <label>:14
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; PR21711 ( http://llvm.org/bugs/show_bug.cgi?id=21711 )
|
||||||
|
define void @merge_vec_element_store(<8 x float> %v, float* %ptr) {
|
||||||
|
%vecext0 = extractelement <8 x float> %v, i32 0
|
||||||
|
%vecext1 = extractelement <8 x float> %v, i32 1
|
||||||
|
%vecext2 = extractelement <8 x float> %v, i32 2
|
||||||
|
%vecext3 = extractelement <8 x float> %v, i32 3
|
||||||
|
%vecext4 = extractelement <8 x float> %v, i32 4
|
||||||
|
%vecext5 = extractelement <8 x float> %v, i32 5
|
||||||
|
%vecext6 = extractelement <8 x float> %v, i32 6
|
||||||
|
%vecext7 = extractelement <8 x float> %v, i32 7
|
||||||
|
%arrayidx1 = getelementptr inbounds float* %ptr, i64 1
|
||||||
|
%arrayidx2 = getelementptr inbounds float* %ptr, i64 2
|
||||||
|
%arrayidx3 = getelementptr inbounds float* %ptr, i64 3
|
||||||
|
%arrayidx4 = getelementptr inbounds float* %ptr, i64 4
|
||||||
|
%arrayidx5 = getelementptr inbounds float* %ptr, i64 5
|
||||||
|
%arrayidx6 = getelementptr inbounds float* %ptr, i64 6
|
||||||
|
%arrayidx7 = getelementptr inbounds float* %ptr, i64 7
|
||||||
|
store float %vecext0, float* %ptr, align 4
|
||||||
|
store float %vecext1, float* %arrayidx1, align 4
|
||||||
|
store float %vecext2, float* %arrayidx2, align 4
|
||||||
|
store float %vecext3, float* %arrayidx3, align 4
|
||||||
|
store float %vecext4, float* %arrayidx4, align 4
|
||||||
|
store float %vecext5, float* %arrayidx5, align 4
|
||||||
|
store float %vecext6, float* %arrayidx6, align 4
|
||||||
|
store float %vecext7, float* %arrayidx7, align 4
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; CHECK-LABEL: merge_vec_element_store
|
||||||
|
; CHECK: vmovups
|
||||||
|
; CHECK-NEXT: vzeroupper
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
}
|
||||||
|
|
||||||
|
; This is a minimized test based on real code that was failing.
|
||||||
|
; We could merge stores (and loads) like this...
|
||||||
|
|
||||||
|
define void @merge_vec_element_and_scalar_load([6 x i64]* %array) {
|
||||||
|
%idx0 = getelementptr inbounds [6 x i64]* %array, i64 0, i64 0
|
||||||
|
%idx1 = getelementptr inbounds [6 x i64]* %array, i64 0, i64 1
|
||||||
|
%idx4 = getelementptr inbounds [6 x i64]* %array, i64 0, i64 4
|
||||||
|
%idx5 = getelementptr inbounds [6 x i64]* %array, i64 0, i64 5
|
||||||
|
|
||||||
|
%a0 = load i64* %idx0, align 8
|
||||||
|
store i64 %a0, i64* %idx4, align 8
|
||||||
|
|
||||||
|
%b = bitcast i64* %idx1 to <2 x i64>*
|
||||||
|
%v = load <2 x i64>* %b, align 8
|
||||||
|
%a1 = extractelement <2 x i64> %v, i32 0
|
||||||
|
store i64 %a1, i64* %idx5, align 8
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; CHECK-LABEL: merge_vec_element_and_scalar_load
|
||||||
|
; CHECK: movq (%rdi), %rax
|
||||||
|
; CHECK-NEXT: movq %rax, 32(%rdi)
|
||||||
|
; CHECK-NEXT: movq 8(%rdi), %rax
|
||||||
|
; CHECK-NEXT: movq %rax, 40(%rdi)
|
||||||
|
; CHECK-NEXT: retq
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user