From a0f5bf306c0e66ca5fc519fd3b0cb732d37d4a5e Mon Sep 17 00:00:00 2001 From: Reid Spencer Date: Tue, 19 Jul 2005 04:52:44 +0000 Subject: [PATCH] For: memory operations -> stores This is the first incremental patch to implement this feature. It adds no functionality to LLVM but setup up the information needed from targets in order to implement the optimization correctly. Each target needs to specify the maximum number of store operations for conversion of the llvm.memset, llvm.memcpy, and llvm.memmove intrinsics into a sequence of store operations. The limit needs to be chosen at the threshold of performance for such an optimization (generally smallish). The target also needs to specify whether the target can support unaligned stores for multi-byte store operations. This helps ensure the optimization doesn't generate code that will trap on an alignment errors. More patches to follow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@22468 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 67 +++++++++++++++++++++ lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 + lib/Target/X86/X86ISelPattern.cpp | 5 ++ 3 files changed, 74 insertions(+) diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index d30d6c73747..086da4b46be 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -12,6 +12,7 @@ // // 1. Which ValueTypes are natively supported by the target. // 2. Which operations are supported for supported ValueTypes. +// 3. Cost thresholds for alternative implementations of certain operations. // // In addition it has a few other components, like information about FP // immediates. @@ -187,6 +188,31 @@ public: return NumElementsForVT[VT]; } + /// This function returns the maximum number of store operations permitted + /// to replace a call to llvm.memset. The value is set by the target at the + /// performance threshold for such a replacement. + /// @brief Get maximum # of store operations permitted for llvm.memset + unsigned getMaxStoresPerMemSet() const { return maxStoresPerMemSet; } + + /// This function returns the maximum number of store operations permitted + /// to replace a call to llvm.memcpy. The value is set by the target at the + /// performance threshold for such a replacement. + /// @brief Get maximum # of store operations permitted for llvm.memcpy + unsigned getMaxStoresPerMemCpy() const { return maxStoresPerMemCpy; } + + /// This function returns the maximum number of store operations permitted + /// to replace a call to llvm.memmove. The value is set by the target at the + /// performance threshold for such a replacement. + /// @brief Get maximum # of store operations permitted for llvm.memmove + unsigned getMaxStoresPerMemMove() const { return maxStoresPerMemMove; } + + /// This function returns true if the target allows unaligned stores. This is + /// used in situations where an array copy/move/set is converted to a sequence + /// of store operations. It ensures that such replacements don't generate + /// code that causes an alignment error (trap) on the target machine. + /// @brief Determine if the target supports unaligned stores. + bool allowsUnalignedStores() const { return allowUnalignedStores; } + //===--------------------------------------------------------------------===// // TargetLowering Configuration Methods - These methods should be invoked by // the derived class constructor to configure this object for the target. @@ -365,6 +391,47 @@ private: std::vector > AvailableRegClasses; + +protected: + /// When lowering %llvm.memset this field specifies the maximum number of + /// store operations that may be substituted for the call to memset. Targets + /// must set this value based on the cost threshold for that target. Targets + /// should assume that the memset will be done using as many of the largest + /// store operations first, followed by smaller ones, if necessary, per + /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine + /// with 16-bit alignment would result in four 2-byte stores and one 1-byte + /// store. This only applies to setting a constant array of a constant size. + /// @brief Specify maximum number of store instructions per memset call. + unsigned maxStoresPerMemSet; + + /// When lowering %llvm.memcpy this field specifies the maximum number of + /// store operations that may be substituted for a call to memcpy. Targets + /// must set this value based on the cost threshold for that target. Targets + /// should assume that the memcpy will be done using as many of the largest + /// store operations first, followed by smaller ones, if necessary, per + /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine + /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store + /// and one 1-byte store. This only applies to copying a constant array of + /// constant size. + /// @brief Specify maximum bytes of store instructions per memcpy call. + unsigned maxStoresPerMemCpy; + + /// When lowering %llvm.memmove this field specifies the maximum number of + /// store instructions that may be substituted for a call to memmove. Targets + /// must set this value based on the cost threshold for that target. Targets + /// should assume that the memmove will be done using as many of the largest + /// store operations first, followed by smaller ones, if necessary, per + /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine + /// with 8-bit alignment would result in nine 1-byte stores. This only + /// applies to copying a constant array of constant size. + /// @brief Specify maximum bytes of store instructions per memmove call. + unsigned maxStoresPerMemMove; + + /// This field specifies whether the target machine permits unaligned stores. + /// This is used to determine the size of store operations for copying + /// small arrays and other similar tasks. + /// @brief Indicate whether the target machine permits unaligned stores. + bool allowUnalignedStores; }; } // end llvm namespace diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 7d720f149f7..46ac8e96336 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -27,6 +27,8 @@ TargetLowering::TargetLowering(TargetMachine &tm) ShiftAmountTy = SetCCResultTy = PointerTy = getValueType(TD.getIntPtrType()); ShiftAmtHandling = Undefined; memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); + maxStoresPerMemSet = maxStoresPerMemCpy = maxStoresPerMemMove = 0; + allowUnalignedStores = false; } TargetLowering::~TargetLowering() {} diff --git a/lib/Target/X86/X86ISelPattern.cpp b/lib/Target/X86/X86ISelPattern.cpp index 999f089395e..29e088ef764 100644 --- a/lib/Target/X86/X86ISelPattern.cpp +++ b/lib/Target/X86/X86ISelPattern.cpp @@ -189,6 +189,11 @@ namespace { addLegalFPImmediate(-1.0); // FLD1/FCHS } computeRegisterProperties(); + + maxStoresPerMemSet = 8; // For %llvm.memset -> sequence of stores + maxStoresPerMemCpy = 8; // For %llvm.memcpy -> sequence of stores + maxStoresPerMemMove = 8; // For %llvm.memmove -> sequence of stores + allowUnalignedStores = true; // x86 supports it! } // Return the number of bytes that a function should pop when it returns (in