From 94107ba9ceaa199f8e5c03912511b0619c84226d Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 1 Apr 2010 18:19:11 +0000 Subject: [PATCH] - Avoid using floating point stores to implement memset unless the value is zero. - Do not try to infer GV alignment unless its type is sized. It's not possible to infer alignment if it has opaque type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100118 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 2 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 24 +++++++++++++---------- lib/Target/PowerPC/PPCISelLowering.cpp | 1 + lib/Target/PowerPC/PPCISelLowering.h | 2 +- lib/Target/X86/X86ISelLowering.cpp | 6 ++++-- lib/Target/X86/X86ISelLowering.h | 2 +- test/CodeGen/X86/memset-2.ll | 12 ++++++++++-- 7 files changed, 32 insertions(+), 17 deletions(-) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 23506c9feb9..ad6fcef765d 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -638,7 +638,7 @@ public: /// determining it. virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - SelectionDAG &DAG) const { + bool SafeToUseFP, SelectionDAG &DAG) const { return MVT::Other; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 25bf8612869..4fcfeecb49a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3195,9 +3195,9 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { /// is below the threshold. It returns the types of the sequence of /// memory ops to perform memset / memcpy by reference. static bool FindOptimalMemOpLowering(std::vector &MemOps, - SDValue Dst, SDValue Src, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool SafeToUseFP, SelectionDAG &DAG, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && @@ -3207,7 +3207,7 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, // the inferred alignment of the source. 'DstAlign', on the other hand, is the // specified alignment of the memory operation. If it is zero, that means // it's possible to change the alignment of the destination. - EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, DAG); + EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, SafeToUseFP, DAG); if (VT == MVT::Other) { VT = TLI.getPointerTy(); @@ -3285,9 +3285,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, std::string Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); - if (!FindOptimalMemOpLowering(MemOps, Dst, Src, Limit, Size, + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), - (isZeroStr ? 0 : SrcAlign), DAG, TLI)) + (isZeroStr ? 0 : SrcAlign), true, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3369,9 +3369,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (Align > SrcAlign) SrcAlign = Align; - if (!FindOptimalMemOpLowering(MemOps, Dst, Src, Limit, Size, + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), - SrcAlign, DAG, TLI)) + SrcAlign, true, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3436,9 +3436,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - if (!FindOptimalMemOpLowering(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(), + bool IsZero = isa(Src) && + cast(Src)->isNullValue(); + if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(), Size, (DstAlignCanChange ? 0 : Align), 0, - DAG, TLI)) + IsZero, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -6150,8 +6152,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { unsigned Align = GV->getAlignment(); if (!Align) { if (GlobalVariable *GVar = dyn_cast(GV)) { - const TargetData *TD = TLI.getTargetData(); - Align = TD->getPreferredAlignment(GVar); + if (GV->getType()->getElementType()->isSized()) { + const TargetData *TD = TLI.getTargetData(); + Align = TD->getPreferredAlignment(GVar); + } } } return MinAlign(Align, GVOffset); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index d00fbff77a1..d043ec626a2 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5541,6 +5541,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool SafeToUseFP, SelectionDAG &DAG) const { if (this->PPCSubTarget.isPPC64()) { return MVT::i64; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 2d5daefb333..bb2d84ece54 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -349,7 +349,7 @@ namespace llvm { virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - SelectionDAG &DAG) const; + bool SafeToUseFP, SelectionDAG &DAG) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index bd268eca8f5..5d6e571db12 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1076,6 +1076,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, + bool SafeToUseFP, SelectionDAG &DAG) const { // FIXME: This turns off use of xmm stores for memset/memcpy on targets like // linux. This is because the stack realignment code can't handle certain @@ -1089,9 +1090,10 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, Subtarget->getStackAlignment() >= 16) { if (Subtarget->hasSSE2()) return MVT::v4i32; - if (Subtarget->hasSSE1()) + if (SafeToUseFP && Subtarget->hasSSE1()) return MVT::v4f32; - } else if (Size >= 8 && + } else if (SafeToUseFP && + Size >= 8 && Subtarget->getStackAlignment() >= 8 && Subtarget->hasSSE2()) return MVT::f64; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 569dc1fbb25..cc7e964f786 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -425,7 +425,7 @@ namespace llvm { /// determining it. virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - SelectionDAG &DAG) const; + bool SafeToUseFP, SelectionDAG &DAG) const; /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll index e2eba76da8a..702632cde96 100644 --- a/test/CodeGen/X86/memset-2.ll +++ b/test/CodeGen/X86/memset-2.ll @@ -4,10 +4,18 @@ target triple = "i386" declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind -define fastcc void @t() nounwind { +define fastcc void @t1() nounwind { entry: -; CHECK: t: +; CHECK: t1: ; CHECK: call memset call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind unreachable } + +define fastcc void @t2(i8 signext %c) nounwind { +entry: +; CHECK: t2: +; CHECK: call memset + call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind + unreachable +}