mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-22 15:39:28 +00:00
Fix sdisel memcpy, memset, memmove lowering:
1. Makes it possible to lower with floating point loads and stores. 2. Avoid unaligned loads / stores unless it's fast. 3. Fix some memcpy lowering logic bug related to when to optimize a load from constant string into a constant. 4. Adjust x86 memcpy lowering threshold to make it more sane. 5. Fix x86 target hook so it uses vector and floating point memory ops more effectively. rdar://7774704 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100090 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
48c58bb861
commit
255f20f7f7
@ -522,7 +522,7 @@ public:
|
||||
/// counterpart (e.g. structs), otherwise it will assert.
|
||||
EVT getValueType(const Type *Ty, bool AllowUnknown = false) const {
|
||||
EVT VT = EVT::getEVT(Ty, AllowUnknown);
|
||||
return VT == MVT:: iPTR ? PointerTy : VT;
|
||||
return VT == MVT::iPTR ? PointerTy : VT;
|
||||
}
|
||||
|
||||
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
|
||||
@ -636,8 +636,8 @@ public:
|
||||
/// and store operations as a result of memset, memcpy, and memmove lowering.
|
||||
/// It returns EVT::Other if SelectionDAG should be responsible for
|
||||
/// determining it.
|
||||
virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
|
||||
bool isSrcConst, bool isSrcStr,
|
||||
virtual EVT getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
SelectionDAG &DAG) const {
|
||||
return MVT::Other;
|
||||
}
|
||||
|
@ -5022,18 +5022,6 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
|
||||
SDValue Chain = LD->getChain();
|
||||
SDValue Ptr = LD->getBasePtr();
|
||||
|
||||
// Try to infer better alignment information than the load already has.
|
||||
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
|
||||
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
|
||||
if (Align > LD->getAlignment())
|
||||
return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
|
||||
LD->getValueType(0),
|
||||
Chain, Ptr, LD->getSrcValue(),
|
||||
LD->getSrcValueOffset(), LD->getMemoryVT(),
|
||||
LD->isVolatile(), LD->isNonTemporal(), Align);
|
||||
}
|
||||
}
|
||||
|
||||
// If load is not volatile and there are no uses of the loaded value (and
|
||||
// the updated indexed value in case of indexed loads), change uses of the
|
||||
// chain value into uses of the chain input (i.e. delete the dead load).
|
||||
@ -5099,6 +5087,18 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
|
||||
}
|
||||
}
|
||||
|
||||
// Try to infer better alignment information than the load already has.
|
||||
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
|
||||
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
|
||||
if (Align > LD->getAlignment())
|
||||
return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
|
||||
LD->getValueType(0),
|
||||
Chain, Ptr, LD->getSrcValue(),
|
||||
LD->getSrcValueOffset(), LD->getMemoryVT(),
|
||||
LD->isVolatile(), LD->isNonTemporal(), Align);
|
||||
}
|
||||
}
|
||||
|
||||
if (CombinerAA) {
|
||||
// Walk up chain skipping non-aliasing memory nodes.
|
||||
SDValue BetterChain = FindBetterChain(N, Chain);
|
||||
@ -5250,17 +5250,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
|
||||
SDValue Value = ST->getValue();
|
||||
SDValue Ptr = ST->getBasePtr();
|
||||
|
||||
// Try to infer better alignment information than the store already has.
|
||||
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
|
||||
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
|
||||
if (Align > ST->getAlignment())
|
||||
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
|
||||
Ptr, ST->getSrcValue(),
|
||||
ST->getSrcValueOffset(), ST->getMemoryVT(),
|
||||
ST->isVolatile(), ST->isNonTemporal(), Align);
|
||||
}
|
||||
}
|
||||
|
||||
// If this is a store of a bit convert, store the input value if the
|
||||
// resultant store does not need a higher alignment than the original.
|
||||
if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
|
||||
@ -5351,6 +5340,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
|
||||
}
|
||||
}
|
||||
|
||||
// Try to infer better alignment information than the store already has.
|
||||
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
|
||||
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
|
||||
if (Align > ST->getAlignment())
|
||||
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
|
||||
Ptr, ST->getSrcValue(),
|
||||
ST->getSrcValueOffset(), ST->getMemoryVT(),
|
||||
ST->isVolatile(), ST->isNonTemporal(), Align);
|
||||
}
|
||||
}
|
||||
|
||||
if (CombinerAA) {
|
||||
// Walk up chain skipping non-aliasing memory nodes.
|
||||
SDValue BetterChain = FindBetterChain(N, Chain);
|
||||
|
@ -3132,11 +3132,17 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
|
||||
if (Str.empty()) {
|
||||
if (VT.isInteger())
|
||||
return DAG.getConstant(0, VT);
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
|
||||
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
|
||||
DAG.getConstant(0,
|
||||
EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts)));
|
||||
else if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
|
||||
VT.getSimpleVT().SimpleTy == MVT::f64)
|
||||
return DAG.getConstantFP(0.0, VT);
|
||||
else if (VT.isVector()) {
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
|
||||
return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
|
||||
DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
|
||||
EltVT, NumElts)));
|
||||
} else
|
||||
llvm_unreachable("Expected type!");
|
||||
}
|
||||
|
||||
assert(!VT.isVector() && "Can't handle vector type here!");
|
||||
@ -3184,51 +3190,33 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
|
||||
/// to replace the memset / memcpy is below the threshold. It also returns the
|
||||
/// types of the sequence of memory ops to perform memset / memcpy.
|
||||
static
|
||||
bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
|
||||
SDValue Dst, SDValue Src,
|
||||
unsigned Limit, uint64_t Size, unsigned &Align,
|
||||
std::string &Str, bool &isSrcStr,
|
||||
SelectionDAG &DAG,
|
||||
const TargetLowering &TLI) {
|
||||
isSrcStr = isMemSrcFromString(Src, Str);
|
||||
bool isSrcConst = isa<ConstantSDNode>(Src);
|
||||
EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
|
||||
bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT);
|
||||
if (VT != MVT::Other) {
|
||||
const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
|
||||
unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
|
||||
// If source is a string constant, this will require an unaligned load.
|
||||
if (NewAlign > Align && (isSrcConst || AllowUnalign)) {
|
||||
if (Dst.getOpcode() != ISD::FrameIndex) {
|
||||
// Can't change destination alignment. It requires a unaligned store.
|
||||
if (AllowUnalign)
|
||||
VT = MVT::Other;
|
||||
} else {
|
||||
int FI = cast<FrameIndexSDNode>(Dst)->getIndex();
|
||||
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
if (MFI->isFixedObjectIndex(FI)) {
|
||||
// Can't change destination alignment. It requires a unaligned store.
|
||||
if (AllowUnalign)
|
||||
VT = MVT::Other;
|
||||
} else {
|
||||
// Give the stack frame object a larger alignment if needed.
|
||||
if (MFI->getObjectAlignment(FI) < NewAlign)
|
||||
MFI->setObjectAlignment(FI, NewAlign);
|
||||
Align = NewAlign;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/// FindOptimalMemOpLowering - Determines the optimial series memory ops
|
||||
/// to replace the memset / memcpy. Return true if the number of memory ops
|
||||
/// is below the threshold. It returns the types of the sequence of
|
||||
/// memory ops to perform memset / memcpy by reference.
|
||||
static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
|
||||
SDValue Dst, SDValue Src,
|
||||
unsigned Limit, uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
SelectionDAG &DAG,
|
||||
const TargetLowering &TLI) {
|
||||
assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
|
||||
"Expecting memcpy / memset source to meet alignment requirement!");
|
||||
// If 'SrcAlign' is zero, that means the memory operation does not need load
|
||||
// the value, i.e. memset or memcpy from constant string. Otherwise, it's
|
||||
// the inferred alignment of the source. 'DstAlign', on the other hand, is the
|
||||
// specified alignment of the memory operation. If it is zero, that means
|
||||
// it's possible to change the alignment of the destination.
|
||||
EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, DAG);
|
||||
|
||||
if (VT == MVT::Other) {
|
||||
if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) {
|
||||
VT = TLI.getPointerTy();
|
||||
const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
|
||||
if (DstAlign >= TLI.getTargetData()->getABITypeAlignment(Ty) ||
|
||||
TLI.allowsUnalignedMemoryAccesses(VT)) {
|
||||
VT = MVT::i64;
|
||||
} else {
|
||||
switch (Align & 7) {
|
||||
switch (DstAlign & 7) {
|
||||
case 0: VT = MVT::i64; break;
|
||||
case 4: VT = MVT::i32; break;
|
||||
case 2: VT = MVT::i16; break;
|
||||
@ -3250,7 +3238,7 @@ bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
|
||||
unsigned VTSize = VT.getSizeInBits() / 8;
|
||||
while (VTSize > Size) {
|
||||
// For now, only use non-vector load / store's for the left-over pieces.
|
||||
if (VT.isVector()) {
|
||||
if (VT.isVector() || VT.isFloatingPoint()) {
|
||||
VT = MVT::i64;
|
||||
while (!TLI.isTypeLegal(VT))
|
||||
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
|
||||
@ -3286,15 +3274,33 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
uint64_t Limit = -1ULL;
|
||||
if (!AlwaysInline)
|
||||
Limit = TLI.getMaxStoresPerMemcpy();
|
||||
unsigned DstAlign = Align; // Destination alignment can change.
|
||||
bool DstAlignCanChange = false;
|
||||
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
|
||||
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
|
||||
DstAlignCanChange = true;
|
||||
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
|
||||
if (Align > SrcAlign)
|
||||
SrcAlign = Align;
|
||||
std::string Str;
|
||||
bool CopyFromStr;
|
||||
if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
|
||||
Str, CopyFromStr, DAG, TLI))
|
||||
bool CopyFromStr = isMemSrcFromString(Src, Str);
|
||||
bool isZeroStr = CopyFromStr && Str.empty();
|
||||
if (!FindOptimalMemOpLowering(MemOps, Dst, Src, Limit, Size,
|
||||
(DstAlignCanChange ? 0 : Align),
|
||||
(isZeroStr ? 0 : SrcAlign), DAG, TLI))
|
||||
return SDValue();
|
||||
|
||||
if (DstAlignCanChange) {
|
||||
const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
|
||||
unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
|
||||
if (NewAlign > Align) {
|
||||
// Give the stack frame object a larger alignment if needed.
|
||||
if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
|
||||
MFI->setObjectAlignment(FI->getIndex(), NewAlign);
|
||||
Align = NewAlign;
|
||||
}
|
||||
}
|
||||
|
||||
bool isZeroStr = CopyFromStr && Str.empty();
|
||||
SmallVector<SDValue, 8> OutChains;
|
||||
unsigned NumMemOps = MemOps.size();
|
||||
uint64_t SrcOff = 0, DstOff = 0;
|
||||
@ -3303,16 +3309,17 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
unsigned VTSize = VT.getSizeInBits() / 8;
|
||||
SDValue Value, Store;
|
||||
|
||||
if (CopyFromStr && (isZeroStr || !VT.isVector())) {
|
||||
if (CopyFromStr &&
|
||||
(isZeroStr || (VT.isInteger() && !VT.isVector()))) {
|
||||
// It's unlikely a store of a vector immediate can be done in a single
|
||||
// instruction. It would require a load from a constantpool first.
|
||||
// We also handle store a vector with all zero's.
|
||||
// We only handle zero vectors here.
|
||||
// FIXME: Handle other cases where store of vector immediate is done in
|
||||
// a single instruction.
|
||||
Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
|
||||
Store = DAG.getStore(Chain, dl, Value,
|
||||
getMemBasePlusOffset(Dst, DstOff, DAG),
|
||||
DstSV, DstSVOff + DstOff, false, false, DstAlign);
|
||||
DstSV, DstSVOff + DstOff, false, false, Align);
|
||||
} else {
|
||||
// The type might not be legal for the target. This should only happen
|
||||
// if the type is smaller than a legal type, as on PPC, so the right
|
||||
@ -3323,11 +3330,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
assert(NVT.bitsGE(VT));
|
||||
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
|
||||
getMemBasePlusOffset(Src, SrcOff, DAG),
|
||||
SrcSV, SrcSVOff + SrcOff, VT, false, false, Align);
|
||||
SrcSV, SrcSVOff + SrcOff, VT, false, false,
|
||||
MinAlign(SrcAlign, SrcOff));
|
||||
Store = DAG.getTruncStore(Chain, dl, Value,
|
||||
getMemBasePlusOffset(Dst, DstOff, DAG),
|
||||
DstSV, DstSVOff + DstOff, VT, false, false,
|
||||
DstAlign);
|
||||
Align);
|
||||
}
|
||||
OutChains.push_back(Store);
|
||||
SrcOff += VTSize;
|
||||
@ -3339,11 +3347,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
}
|
||||
|
||||
static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
SDValue Chain, SDValue Dst,
|
||||
SDValue Src, uint64_t Size,
|
||||
unsigned Align, bool AlwaysInline,
|
||||
const Value *DstSV, uint64_t DstSVOff,
|
||||
const Value *SrcSV, uint64_t SrcSVOff){
|
||||
SDValue Chain, SDValue Dst,
|
||||
SDValue Src, uint64_t Size,
|
||||
unsigned Align,bool AlwaysInline,
|
||||
const Value *DstSV, uint64_t DstSVOff,
|
||||
const Value *SrcSV, uint64_t SrcSVOff) {
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
|
||||
// Expand memmove to a series of load and store ops if the size operand falls
|
||||
@ -3352,15 +3360,32 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
uint64_t Limit = -1ULL;
|
||||
if (!AlwaysInline)
|
||||
Limit = TLI.getMaxStoresPerMemmove();
|
||||
unsigned DstAlign = Align; // Destination alignment can change.
|
||||
std::string Str;
|
||||
bool CopyFromStr;
|
||||
if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
|
||||
Str, CopyFromStr, DAG, TLI))
|
||||
bool DstAlignCanChange = false;
|
||||
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
|
||||
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
|
||||
DstAlignCanChange = true;
|
||||
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
|
||||
if (Align > SrcAlign)
|
||||
SrcAlign = Align;
|
||||
|
||||
if (!FindOptimalMemOpLowering(MemOps, Dst, Src, Limit, Size,
|
||||
(DstAlignCanChange ? 0 : Align),
|
||||
SrcAlign, DAG, TLI))
|
||||
return SDValue();
|
||||
|
||||
uint64_t SrcOff = 0, DstOff = 0;
|
||||
if (DstAlignCanChange) {
|
||||
const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
|
||||
unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
|
||||
if (NewAlign > Align) {
|
||||
// Give the stack frame object a larger alignment if needed.
|
||||
if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
|
||||
MFI->setObjectAlignment(FI->getIndex(), NewAlign);
|
||||
Align = NewAlign;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t SrcOff = 0, DstOff = 0;
|
||||
SmallVector<SDValue, 8> LoadValues;
|
||||
SmallVector<SDValue, 8> LoadChains;
|
||||
SmallVector<SDValue, 8> OutChains;
|
||||
@ -3372,7 +3397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
|
||||
Value = DAG.getLoad(VT, dl, Chain,
|
||||
getMemBasePlusOffset(Src, SrcOff, DAG),
|
||||
SrcSV, SrcSVOff + SrcOff, false, false, Align);
|
||||
SrcSV, SrcSVOff + SrcOff, false, false, SrcAlign);
|
||||
LoadValues.push_back(Value);
|
||||
LoadChains.push_back(Value.getValue(1));
|
||||
SrcOff += VTSize;
|
||||
@ -3387,7 +3412,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
|
||||
Store = DAG.getStore(Chain, dl, LoadValues[i],
|
||||
getMemBasePlusOffset(Dst, DstOff, DAG),
|
||||
DstSV, DstSVOff + DstOff, false, false, DstAlign);
|
||||
DstSV, DstSVOff + DstOff, false, false, Align);
|
||||
OutChains.push_back(Store);
|
||||
DstOff += VTSize;
|
||||
}
|
||||
@ -3397,24 +3422,38 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
}
|
||||
|
||||
static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
SDValue Chain, SDValue Dst,
|
||||
SDValue Src, uint64_t Size,
|
||||
unsigned Align,
|
||||
const Value *DstSV, uint64_t DstSVOff) {
|
||||
SDValue Chain, SDValue Dst,
|
||||
SDValue Src, uint64_t Size,
|
||||
unsigned Align,
|
||||
const Value *DstSV, uint64_t DstSVOff) {
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
|
||||
// Expand memset to a series of load/store ops if the size operand
|
||||
// falls below a certain threshold.
|
||||
std::vector<EVT> MemOps;
|
||||
std::string Str;
|
||||
bool CopyFromStr;
|
||||
if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
|
||||
Size, Align, Str, CopyFromStr, DAG, TLI))
|
||||
bool DstAlignCanChange = false;
|
||||
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
|
||||
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
|
||||
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
|
||||
DstAlignCanChange = true;
|
||||
if (!FindOptimalMemOpLowering(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
|
||||
Size, (DstAlignCanChange ? 0 : Align), 0,
|
||||
DAG, TLI))
|
||||
return SDValue();
|
||||
|
||||
if (DstAlignCanChange) {
|
||||
const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
|
||||
unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
|
||||
if (NewAlign > Align) {
|
||||
// Give the stack frame object a larger alignment if needed.
|
||||
if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
|
||||
MFI->setObjectAlignment(FI->getIndex(), NewAlign);
|
||||
Align = NewAlign;
|
||||
}
|
||||
}
|
||||
|
||||
SmallVector<SDValue, 8> OutChains;
|
||||
uint64_t DstOff = 0;
|
||||
|
||||
unsigned NumMemOps = MemOps.size();
|
||||
for (unsigned i = 0; i < NumMemOps; i++) {
|
||||
EVT VT = MemOps[i];
|
||||
@ -3445,10 +3484,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
|
||||
if (ConstantSize->isNullValue())
|
||||
return Chain;
|
||||
|
||||
SDValue Result =
|
||||
getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
|
||||
ConstantSize->getZExtValue(),
|
||||
Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
|
||||
SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
|
||||
ConstantSize->getZExtValue(),Align,
|
||||
false, DstSV, DstSVOff, SrcSV, SrcSVOff);
|
||||
if (Result.getNode())
|
||||
return Result;
|
||||
}
|
||||
@ -6106,8 +6144,18 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
|
||||
// If this is a GlobalAddress + cst, return the alignment.
|
||||
GlobalValue *GV;
|
||||
int64_t GVOffset = 0;
|
||||
if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset))
|
||||
return MinAlign(GV->getAlignment(), GVOffset);
|
||||
if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
|
||||
// If GV has specified alignment, then use it. Otherwise, use the preferred
|
||||
// alignment.
|
||||
unsigned Align = GV->getAlignment();
|
||||
if (!Align) {
|
||||
if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
|
||||
const TargetData *TD = TLI.getTargetData();
|
||||
Align = TD->getPreferredAlignment(GVar);
|
||||
}
|
||||
}
|
||||
return MinAlign(Align, GVOffset);
|
||||
}
|
||||
|
||||
// If this is a direct reference to a stack slot, use information about the
|
||||
// stack slot's alignment.
|
||||
|
@ -5539,8 +5539,8 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
|
||||
bool isSrcConst, bool isSrcStr,
|
||||
EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
SelectionDAG &DAG) const {
|
||||
if (this->PPCSubTarget.isPPC64()) {
|
||||
return MVT::i64;
|
||||
|
@ -347,8 +347,8 @@ namespace llvm {
|
||||
|
||||
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
|
||||
|
||||
virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
|
||||
bool isSrcConst, bool isSrcStr,
|
||||
virtual EVT getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
/// getFunctionAlignment - Return the Log2 alignment of this function.
|
||||
|
@ -1012,7 +1012,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||
// FIXME: These should be based on subtarget info. Plus, the values should
|
||||
// be smaller when we are in optimizing for size mode.
|
||||
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
|
||||
maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
|
||||
maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
|
||||
maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
|
||||
setPrefLoopAlignment(16);
|
||||
benefitFromCodePlacementOpt = true;
|
||||
@ -1074,19 +1074,27 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
|
||||
/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
|
||||
/// determining it.
|
||||
EVT
|
||||
X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
|
||||
bool isSrcConst, bool isSrcStr,
|
||||
X86TargetLowering::getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
SelectionDAG &DAG) const {
|
||||
// FIXME: This turns off use of xmm stores for memset/memcpy on targets like
|
||||
// linux. This is because the stack realignment code can't handle certain
|
||||
// cases like PR2962. This should be removed when PR2962 is fixed.
|
||||
const Function *F = DAG.getMachineFunction().getFunction();
|
||||
bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
|
||||
if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) {
|
||||
if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
|
||||
return MVT::v4i32;
|
||||
if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
|
||||
return MVT::v4f32;
|
||||
if (!F->hasFnAttr(Attribute::NoImplicitFloat)) {
|
||||
if (Size >= 16 &&
|
||||
(Subtarget->isUnalignedMemAccessFast() ||
|
||||
(DstAlign == 0 || DstAlign >= 16) &&
|
||||
(SrcAlign == 0 || SrcAlign >= 16)) &&
|
||||
Subtarget->getStackAlignment() >= 16) {
|
||||
if (Subtarget->hasSSE2())
|
||||
return MVT::v4i32;
|
||||
if (Subtarget->hasSSE1())
|
||||
return MVT::v4f32;
|
||||
} else if (Size >= 8 &&
|
||||
Subtarget->getStackAlignment() >= 8 &&
|
||||
Subtarget->hasSSE2())
|
||||
return MVT::f64;
|
||||
}
|
||||
if (Subtarget->is64Bit() && Size >= 8)
|
||||
return MVT::i64;
|
||||
|
@ -423,8 +423,8 @@ namespace llvm {
|
||||
/// and store operations as a result of memset, memcpy, and memmove
|
||||
/// lowering. It returns EVT::iAny if SelectionDAG should be responsible for
|
||||
/// determining it.
|
||||
virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
|
||||
bool isSrcConst, bool isSrcStr,
|
||||
virtual EVT getOptimalMemOpType(uint64_t Size,
|
||||
unsigned DstAlign, unsigned SrcAlign,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
|
||||
; rdar://7396984
|
||||
|
||||
@str = private constant [28 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1
|
||||
|
@ -1,10 +1,17 @@
|
||||
; RUN: llc < %s -march=x86 -mcpu=yonah | egrep {add|lea} | grep 16
|
||||
; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
|
||||
|
||||
%struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
|
||||
<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
|
||||
<2 x i64> }
|
||||
|
||||
define i32 @main() nounwind {
|
||||
entry:
|
||||
; CHECK: main:
|
||||
; CHECK: movl $1, (%esp)
|
||||
; CHECK: leal 16(%esp), %edi
|
||||
; CHECK: movl $36, %ecx
|
||||
; CHECK: leal 160(%esp), %esi
|
||||
; CHECK: rep;movsl
|
||||
%s = alloca %struct.S ; <%struct.S*> [#uses=2]
|
||||
%tmp15 = getelementptr %struct.S* %s, i32 0, i32 0 ; <<2 x i64>*> [#uses=1]
|
||||
store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16
|
||||
|
@ -1,15 +1,105 @@
|
||||
; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 5
|
||||
; RUN: llc < %s -mattr=+sse2 -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=SSE2
|
||||
; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=SSE1
|
||||
; RUN: llc < %s -mattr=-sse -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=NOSSE
|
||||
|
||||
%struct.ParmT = type { [25 x i8], i8, i8* }
|
||||
@.str12 = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00" ; <[25 x i8]*> [#uses=1]
|
||||
|
||||
declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
|
||||
|
||||
define void @t(i32 %argc, i8** %argv) nounwind {
|
||||
define void @t1(i32 %argc, i8** %argv) nounwind {
|
||||
entry:
|
||||
; SSE2: t1:
|
||||
; SSE2: movaps _.str12, %xmm0
|
||||
; SSE2: movaps %xmm0
|
||||
; SSE2: movb $0
|
||||
; SSE2: movl $0
|
||||
; SSE2: movl $0
|
||||
|
||||
; SSE1: t1:
|
||||
; SSE1: movaps _.str12, %xmm0
|
||||
; SSE1: movaps %xmm0
|
||||
; SSE1: movb $0
|
||||
; SSE1: movl $0
|
||||
; SSE1: movl $0
|
||||
|
||||
; NOSSE: t1:
|
||||
; NOSSE: movb $0
|
||||
; NOSSE: movl $0
|
||||
; NOSSE: movl $0
|
||||
; NOSSE: movl $0
|
||||
; NOSSE: movl $0
|
||||
; NOSSE: movl $101
|
||||
; NOSSE: movl $1734438249
|
||||
%parms.i = alloca [13 x %struct.ParmT] ; <[13 x %struct.ParmT]*> [#uses=1]
|
||||
%parms1.i = getelementptr [13 x %struct.ParmT]* %parms.i, i32 0, i32 0, i32 0, i32 0 ; <i8*> [#uses=1]
|
||||
call void @llvm.memcpy.i32( i8* %parms1.i, i8* getelementptr ([25 x i8]* @.str12, i32 0, i32 0), i32 25, i32 1 ) nounwind
|
||||
unreachable
|
||||
}
|
||||
|
||||
;rdar://7774704
|
||||
%struct.s0 = type { [2 x double] }
|
||||
|
||||
define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
|
||||
entry:
|
||||
; SSE2: t2:
|
||||
; SSE2: movaps (%eax), %xmm0
|
||||
; SSE2: movaps %xmm0, (%eax)
|
||||
|
||||
; SSE1: t2:
|
||||
; SSE1: movaps (%eax), %xmm0
|
||||
; SSE1: movaps %xmm0, (%eax)
|
||||
|
||||
; NOSSE: t2:
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
%tmp2 = bitcast %struct.s0* %a to i8* ; <i8*> [#uses=1]
|
||||
%tmp3 = bitcast %struct.s0* %b to i8* ; <i8*> [#uses=1]
|
||||
tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
|
||||
entry:
|
||||
; SSE2: t3:
|
||||
; SSE2: movsd (%eax), %xmm0
|
||||
; SSE2: movsd 8(%eax), %xmm1
|
||||
; SSE2: movsd %xmm1, 8(%eax)
|
||||
; SSE2: movsd %xmm0, (%eax)
|
||||
|
||||
; SSE1: t3:
|
||||
; SSE1: movl
|
||||
; SSE1: movl
|
||||
; SSE1: movl
|
||||
; SSE1: movl
|
||||
; SSE1: movl
|
||||
; SSE1: movl
|
||||
; SSE1: movl
|
||||
; SSE1: movl
|
||||
; SSE1: movl
|
||||
; SSE1: movl
|
||||
|
||||
; NOSSE: t3:
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
; NOSSE: movl
|
||||
%tmp2 = bitcast %struct.s0* %a to i8* ; <i8*> [#uses=1]
|
||||
%tmp3 = bitcast %struct.s0* %b to i8* ; <i8*> [#uses=1]
|
||||
tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 8)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
|
||||
|
@ -1,47 +1,13 @@
|
||||
; RUN: llc < %s | not grep rep
|
||||
; RUN: llc < %s | grep memset
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
|
||||
target triple = "i386"
|
||||
|
||||
declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
|
||||
|
||||
define fastcc i32 @cli_scanzip(i32 %desc) nounwind {
|
||||
define fastcc void @t() nounwind {
|
||||
entry:
|
||||
br label %bb8.i.i.i.i
|
||||
|
||||
bb8.i.i.i.i: ; preds = %bb8.i.i.i.i, %entry
|
||||
icmp eq i32 0, 0 ; <i1>:0 [#uses=1]
|
||||
br i1 %0, label %bb61.i.i.i, label %bb8.i.i.i.i
|
||||
|
||||
bb32.i.i.i: ; preds = %bb61.i.i.i
|
||||
ptrtoint i8* %tail.0.i.i.i to i32 ; <i32>:1 [#uses=1]
|
||||
sub i32 0, %1 ; <i32>:2 [#uses=1]
|
||||
icmp sgt i32 %2, 19 ; <i1>:3 [#uses=1]
|
||||
br i1 %3, label %bb34.i.i.i, label %bb61.i.i.i
|
||||
|
||||
bb34.i.i.i: ; preds = %bb32.i.i.i
|
||||
load i32* null, align 4 ; <i32>:4 [#uses=1]
|
||||
icmp eq i32 %4, 101010256 ; <i1>:5 [#uses=1]
|
||||
br i1 %5, label %bb8.i11.i.i.i, label %bb61.i.i.i
|
||||
|
||||
bb8.i11.i.i.i: ; preds = %bb8.i11.i.i.i, %bb34.i.i.i
|
||||
icmp eq i32 0, 0 ; <i1>:6 [#uses=1]
|
||||
br i1 %6, label %cli_dbgmsg.exit49.i, label %bb8.i11.i.i.i
|
||||
|
||||
cli_dbgmsg.exit49.i: ; preds = %bb8.i11.i.i.i
|
||||
icmp eq [32768 x i8]* null, null ; <i1>:7 [#uses=1]
|
||||
br i1 %7, label %bb1.i28.i, label %bb8.i.i
|
||||
|
||||
bb61.i.i.i: ; preds = %bb61.i.i.i, %bb34.i.i.i, %bb32.i.i.i, %bb8.i.i.i.i
|
||||
%tail.0.i.i.i = getelementptr [1024 x i8]* null, i32 0, i32 0 ; <i8*> [#uses=2]
|
||||
load i8* %tail.0.i.i.i, align 1 ; <i8>:8 [#uses=1]
|
||||
icmp eq i8 %8, 80 ; <i1>:9 [#uses=1]
|
||||
br i1 %9, label %bb32.i.i.i, label %bb61.i.i.i
|
||||
|
||||
bb1.i28.i: ; preds = %cli_dbgmsg.exit49.i
|
||||
call void @llvm.memset.i32( i8* null, i8 0, i32 88, i32 1 ) nounwind
|
||||
unreachable
|
||||
|
||||
bb8.i.i: ; preds = %bb8.i.i, %cli_dbgmsg.exit49.i
|
||||
br label %bb8.i.i
|
||||
; CHECK: t:
|
||||
; CHECK: call memset
|
||||
call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind
|
||||
unreachable
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin | grep stosl
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin | grep movl | count 20
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movq | count 10
|
||||
|
||||
define void @bork() nounwind {
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s | not grep movs
|
||||
; RUN: llc < %s | grep movsd | count 8
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
|
||||
target triple = "i386-apple-darwin8"
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=CORE2 %s
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=corei7 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=COREI7 %s
|
||||
|
||||
@.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8
|
||||
@.str3 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, 2'ND STRING\00", align 8
|
||||
@ -11,7 +12,11 @@ entry:
|
||||
bb:
|
||||
%String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
|
||||
call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
|
||||
; CHECK: movups _.str3
|
||||
; CORE2: movsd _.str3+16
|
||||
; CORE2: movsd _.str3+8
|
||||
; CORE2: movsd _.str3
|
||||
|
||||
; COREI7: movups _.str3
|
||||
br label %bb
|
||||
|
||||
return:
|
||||
@ -20,8 +25,14 @@ return:
|
||||
|
||||
declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
|
||||
|
||||
; CHECK: .align 3
|
||||
; CHECK-NEXT: _.str1:
|
||||
; CHECK-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
|
||||
; CHECK: .align 3
|
||||
; CHECK-NEXT: _.str3:
|
||||
; CORE2: .align 3
|
||||
; CORE2-NEXT: _.str1:
|
||||
; CORE2-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
|
||||
; CORE2: .align 3
|
||||
; CORE2-NEXT: _.str3:
|
||||
|
||||
; COREI7: .align 3
|
||||
; COREI7-NEXT: _.str1:
|
||||
; COREI7-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
|
||||
; COREI7: .align 3
|
||||
; COREI7-NEXT: _.str3:
|
||||
|
Loading…
x
Reference in New Issue
Block a user