From 61f4dfe3693bf68b20748d82ac4dd9bf2f356699 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 12 Dec 2012 00:42:09 +0000 Subject: [PATCH] Avoid using lossy load / stores for memcpy / memset expansion. e.g. f64 load / store on non-SSE2 x86 targets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169944 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 9 ++++++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 36 +++++++++++++---------- lib/Target/ARM/ARMISelLowering.cpp | 4 +++ lib/Target/ARM/ARMISelLowering.h | 7 +++++ lib/Target/X86/X86ISelLowering.cpp | 8 +++++ lib/Target/X86/X86ISelLowering.h | 7 +++++ test/CodeGen/X86/memcpy-2.ll | 4 +-- 7 files changed, 58 insertions(+), 17 deletions(-) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index faf0a0179a3..f3010438518 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -716,6 +716,15 @@ public: return MVT::Other; } + /// isLegalMemOpType - Returns true if it's legal to use load / store of the + /// specified type to expand memcpy / memset inline. This is mostly true + /// for legal types except for some special cases. For example, on X86 + /// targets without SSE2 f64 load / store are done with fldl / fstpl which + /// also does type conversion. + virtual bool isLegalMemOpType(MVT VT) const { + return VT.isInteger(); + } + /// usesUnderscoreSetJmp - Determine if we should use _setjmp or setjmp /// to implement llvm.setjmp. bool usesUnderscoreSetJmp() const { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 4cb63ce9a66..36592e54f8f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3474,27 +3474,33 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, unsigned VTSize = VT.getSizeInBits() / 8; while (VTSize > Size) { // For now, only use non-vector load / store's for the left-over pieces. - EVT NewVT; + EVT NewVT = VT; unsigned NewVTSize; + + bool Found = false; if (VT.isVector() || VT.isFloatingPoint()) { NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; - while (!TLI.isOperationLegalOrCustom(ISD::STORE, NewVT)) { - if (NewVT == MVT::i64 && - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64)) { - // i64 is usually not legal on 32-bit targets, but f64 may be. - NewVT = MVT::f64; - break; - } - NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); + if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) && + TLI.isLegalMemOpType(NewVT.getSimpleVT())) + Found = true; + else if (NewVT == MVT::i64 && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) && + TLI.isLegalMemOpType(MVT::f64)) { + // i64 is usually not legal on 32-bit targets, but f64 may be. + NewVT = MVT::f64; + Found = true; } - NewVTSize = NewVT.getSizeInBits() / 8; - } else { - // This can result in a type that is not legal on the target, e.g. - // 1 or 2 bytes on PPC. - NewVT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); - NewVTSize = VTSize >> 1; } + if (!Found) { + do { + NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); + if (NewVT == MVT::i8) + break; + } while (!TLI.isLegalMemOpType(NewVT.getSimpleVT())); + } + NewVTSize = NewVT.getSizeInBits() / 8; + // If the new VT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. // FIXME: Only does this for 64-bit or more since we don't have proper diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 88282c7331b..de7159e474a 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -9481,6 +9481,10 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::Other; } +bool ARMTargetLowering::isLegalMemOpType(MVT VT) const { + return VT.isInteger() || VT == MVT::f64 || VT == MVT::v2f64; +} + bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { if (Val.getOpcode() != ISD::LOAD) return false; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 5a44201ec48..3e78ae3b2d5 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -296,6 +296,13 @@ namespace llvm { bool MemcpyStrSrc, MachineFunction &MF) const; + /// isLegalMemOpType - Returns true if it's legal to use load / store of the + /// specified type to expand memcpy / memset inline. This is mostly true + /// for legal types except for some special cases. For example, on X86 + /// targets without SSE2 f64 load / store are done with fldl / fstpl which + /// also does type conversion. + virtual bool isLegalMemOpType(MVT VT) const; + using TargetLowering::isZExtFree; virtual bool isZExtFree(SDValue Val, EVT VT2) const; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 90bee41e35d..800c2012df5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1412,6 +1412,14 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::i32; } +bool X86TargetLowering::isLegalMemOpType(MVT VT) const { + if (VT == MVT::f32) + return X86ScalarSSEf32; + else if (VT == MVT::f64) + return X86ScalarSSEf64; + return VT.isInteger(); +} + bool X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { if (Fast) diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index a515be23ef6..9d22da1dd90 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -506,6 +506,13 @@ namespace llvm { bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const; + /// isLegalMemOpType - Returns true if it's legal to use load / store of the + /// specified type to expand memcpy / memset inline. This is mostly true + /// for legal types except for some special cases. For example, on X86 + /// targets without SSE2 f64 load / store are done with fldl / fstpl which + /// also does type conversion. + virtual bool isLegalMemOpType(MVT VT) const; + /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. Returns whether it /// is "fast" by reference in the second argument. diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll index dcc8f0d268c..949d6a42932 100644 --- a/test/CodeGen/X86/memcpy-2.ll +++ b/test/CodeGen/X86/memcpy-2.ll @@ -17,11 +17,11 @@ entry: ; SSE2: movb $0, 24(%esp) ; SSE1: t1: -; SSE1: fldl _.str+16 -; SSE1: fstpl 16(%esp) ; SSE1: movaps _.str, %xmm0 ; SSE1: movaps %xmm0 ; SSE1: movb $0, 24(%esp) +; SSE1: movl $0, 20(%esp) +; SSE1: movl $0, 16(%esp) ; NOSSE: t1: ; NOSSE: movb $0