diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index b0534ddaa5e..f040c9db38c 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -633,15 +633,19 @@ public: } /// getOptimalMemOpType - Returns the target specific optimal type for load - /// and store operations as a result of memset, memcpy, and memmove lowering. - /// If DstAlign is zero that means it's safe to destination alignment can - /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't - /// a need to check it against alignment requirement, probably because the - /// source does not need to be loaded. It returns EVT::Other if SelectionDAG - /// should be responsible for determining it. + /// and store operations as a result of memset, memcpy, and memmove + /// lowering. If DstAlign is zero that means it's safe to destination + /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it + /// means there isn't a need to check it against alignment requirement, + /// probably because the source does not need to be loaded. If + /// 'NonScalarIntSafe' is true, that means it's safe to return a + /// non-scalar-integer type, e.g. empty string source, constant, or loaded + /// from memory. It returns EVT::Other if SelectionDAG should be responsible + /// for determining it. virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool SafeToUseFP, SelectionDAG &DAG) const { + bool NonScalarIntSafe, + SelectionDAG &DAG) const { return MVT::Other; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0ba65ab7f96..39cdba74e0f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3094,6 +3094,8 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { /// operand. static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, DebugLoc dl) { + assert(Value.getOpcode() != ISD::UNDEF); + unsigned NumBits = VT.getScalarType().getSizeInBits(); if (ConstantSDNode *C = dyn_cast(Value)) { APInt Val = APInt(NumBits, C->getZExtValue() & 255); @@ -3197,7 +3199,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { static bool FindOptimalMemOpLowering(std::vector &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool SafeToUseFP, + bool NonScalarIntSafe, SelectionDAG &DAG, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && @@ -3207,7 +3209,8 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, // the inferred alignment of the source. 'DstAlign', on the other hand, is the // specified alignment of the memory operation. If it is zero, that means // it's possible to change the alignment of the destination. - EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, SafeToUseFP, DAG); + EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, + NonScalarIntSafe, DAG); if (VT == MVT::Other) { VT = TLI.getPointerTy(); @@ -3266,10 +3269,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned Align, bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Turn a memcpy of undef to nop. + if (Src.getOpcode() == ISD::UNDEF) + return Chain; // Expand memcpy to a series of load and store ops if the size operand falls // below a certain threshold. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector MemOps; uint64_t Limit = -1ULL; if (!AlwaysInline) @@ -3352,10 +3358,13 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned Align,bool AlwaysInline, const Value *DstSV, uint64_t DstSVOff, const Value *SrcSV, uint64_t SrcSVOff) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Turn a memmove of undef to nop. + if (Src.getOpcode() == ISD::UNDEF) + return Chain; // Expand memmove to a series of load and store ops if the size operand falls // below a certain threshold. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector MemOps; uint64_t Limit = -1ULL; if (!AlwaysInline) @@ -3426,21 +3435,24 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, SDValue Src, uint64_t Size, unsigned Align, const Value *DstSV, uint64_t DstSVOff) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + // Turn a memset of undef to nop. + if (Src.getOpcode() == ISD::UNDEF) + return Chain; // Expand memset to a series of load/store ops if the size operand // falls below a certain threshold. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector MemOps; bool DstAlignCanChange = false; MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - bool IsZero = isa(Src) && - cast(Src)->isNullValue(); + bool NonScalarIntSafe = + isa(Src) && cast(Src)->isNullValue(); if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(), Size, (DstAlignCanChange ? 0 : Align), 0, - IsZero, DAG, TLI)) + NonScalarIntSafe, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3592,9 +3604,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, if (ConstantSize->isNullValue()) return Chain; - SDValue Result = - getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), - Align, DstSV, DstSVOff); + SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), + Align, DstSV, DstSVOff); if (Result.getNode()) return Result; } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index e67666d8048..337b0d74c33 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5540,15 +5540,18 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { } /// getOptimalMemOpType - Returns the target specific optimal type for load -/// and store operations as a result of memset, memcpy, and memmove lowering. -/// If DstAlign is zero that means it's safe to destination alignment can -/// satisfy any constraint. Similarly if SrcAlign is zero it means there -/// isn't a need to check it against alignment requirement, probably because -/// the source does not need to be loaded. It returns EVT::Other if -/// SelectionDAG should be responsible for determining it. +/// and store operations as a result of memset, memcpy, and memmove +/// lowering. If DstAlign is zero that means it's safe to destination +/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it +/// means there isn't a need to check it against alignment requirement, +/// probably because the source does not need to be loaded. If +/// 'NonScalarIntSafe' is true, that means it's safe to return a +/// non-scalar-integer type, e.g. empty string source, constant, or loaded +/// from memory. It returns EVT::Other if SelectionDAG should be responsible +/// for determining it. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool SafeToUseFP, + bool NonScalarIntSafe, SelectionDAG &DAG) const { if (this->PPCSubTarget.isPPC64()) { return MVT::i64; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 19fefab2d3a..f816bddaf5a 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -348,15 +348,19 @@ namespace llvm { virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; /// getOptimalMemOpType - Returns the target specific optimal type for load - /// and store operations as a result of memset, memcpy, and memmove lowering. - /// If DstAlign is zero that means it's safe to destination alignment can - /// satisfy any constraint. Similarly if SrcAlign is zero it means there - /// isn't a need to check it against alignment requirement, probably because - /// the source does not need to be loaded. It returns EVT::Other if - /// SelectionDAG should be responsible for determining it. - virtual EVT getOptimalMemOpType(uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool SafeToUseFP, SelectionDAG &DAG) const; + /// and store operations as a result of memset, memcpy, and memmove + /// lowering. If DstAlign is zero that means it's safe to destination + /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it + /// means there isn't a need to check it against alignment requirement, + /// probably because the source does not need to be loaded. If + /// 'NonScalarIntSafe' is true, that means it's safe to return a + /// non-scalar-integer type, e.g. empty string source, constant, or loaded + /// from memory. It returns EVT::Other if SelectionDAG should be responsible + /// for determining it. + virtual EVT + getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, SelectionDAG &DAG) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b24d5a1707a..f46586a6b70 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1071,18 +1071,21 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { /// If DstAlign is zero that means it's safe to destination alignment can /// satisfy any constraint. Similarly if SrcAlign is zero it means there /// isn't a need to check it against alignment requirement, probably because -/// the source does not need to be loaded. It returns EVT::Other if -/// SelectionDAG should be responsible for determining it. +/// the source does not need to be loaded. If 'NonScalarIntSafe' is true, that +/// means it's safe to return a non-scalar-integer type, e.g. constant string +/// source or loaded from memory. It returns EVT::Other if SelectionDAG should +/// be responsible for determining it. EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool SafeToUseFP, + bool NonScalarIntSafe, SelectionDAG &DAG) const { // FIXME: This turns off use of xmm stores for memset/memcpy on targets like // linux. This is because the stack realignment code can't handle certain // cases like PR2962. This should be removed when PR2962 is fixed. const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->hasFnAttr(Attribute::NoImplicitFloat)) { + if (NonScalarIntSafe && + !F->hasFnAttr(Attribute::NoImplicitFloat)) { if (Size >= 16 && (Subtarget->isUnalignedMemAccessFast() || ((DstAlign == 0 || DstAlign >= 16) && @@ -1090,10 +1093,9 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, Subtarget->getStackAlignment() >= 16) { if (Subtarget->hasSSE2()) return MVT::v4i32; - if (SafeToUseFP && Subtarget->hasSSE1()) + if (Subtarget->hasSSE1()) return MVT::v4f32; - } else if (SafeToUseFP && - Size >= 8 && + } else if (Size >= 8 && !Subtarget->is64Bit() && Subtarget->getStackAlignment() >= 8 && Subtarget->hasSSE2()) diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 4549cba6f39..2c2a5fbb803 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -417,15 +417,19 @@ namespace llvm { virtual unsigned getByValTypeAlignment(const Type *Ty) const; /// getOptimalMemOpType - Returns the target specific optimal type for load - /// and store operations as a result of memset, memcpy, and memmove lowering. - /// If DstAlign is zero that means it's safe to destination alignment can - /// satisfy any constraint. Similarly if SrcAlign is zero it means there - /// isn't a need to check it against alignment requirement, probably because - /// the source does not need to be loaded. It returns EVT::Other if - /// SelectionDAG should be responsible for determining it. - virtual EVT getOptimalMemOpType(uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool SafeToUseFP, SelectionDAG &DAG) const; + /// and store operations as a result of memset, memcpy, and memmove + /// lowering. If DstAlign is zero that means it's safe to destination + /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it + /// means there isn't a need to check it against alignment requirement, + /// probably because the source does not need to be loaded. If + /// 'NonScalarIntSafe' is true, that means it's safe to return a + /// non-scalar-integer type, e.g. empty string source, constant, or loaded + /// from memory. It returns EVT::Other if SelectionDAG should be responsible + /// for determining it. + virtual EVT + getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, SelectionDAG &DAG) const; /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll index 702632cde96..0e1559548e2 100644 --- a/test/CodeGen/X86/memset-2.ll +++ b/test/CodeGen/X86/memset-2.ll @@ -1,13 +1,11 @@ -; RUN: llc < %s | FileCheck %s - -target triple = "i386" +; RUN: llc -mtriple=i386-apple-darwin < %s | FileCheck %s declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind define fastcc void @t1() nounwind { entry: ; CHECK: t1: -; CHECK: call memset +; CHECK: call _memset call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind unreachable } @@ -15,7 +13,7 @@ entry: define fastcc void @t2(i8 signext %c) nounwind { entry: ; CHECK: t2: -; CHECK: call memset +; CHECK: call _memset call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind unreachable } diff --git a/test/CodeGen/X86/memset-3.ll b/test/CodeGen/X86/memset-3.ll new file mode 100644 index 00000000000..9b20ad506a5 --- /dev/null +++ b/test/CodeGen/X86/memset-3.ll @@ -0,0 +1,12 @@ +; RUN: llc -mtriple=i386-apple-darwin < %s | not grep memset +; PR6767 + +define void @t() nounwind ssp { +entry: + %buf = alloca [512 x i8], align 1 + %ptr = getelementptr inbounds [512 x i8]* %buf, i32 0, i32 0 + call void @llvm.memset.i32(i8* %ptr, i8 undef, i32 512, i32 1) + unreachable +} + +declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind