Make ARM and X86 LowerMEMCPY identical by moving the isThumb check into getMaxInlineSizeThreshold

and by restructuring the X86 version. New I just have to move this to a common place :-) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43554 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-01 18:33:56 +00:00 · 2007-10-31 14:39:58 +00:00 · 2007-10-31 14:39:58 +00:00 · e0703c84dd
commit e0703c84dd
parent a24b294e74
4 changed files with 16 additions and 23 deletions
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -1287,8 +1287,7 @@ static SDOperand LowerSRx(SDOperand Op, SelectionDAG &DAG,
  return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
 }

-SDOperand ARMTargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG,
-                                         const ARMSubtarget *ST) {
+SDOperand ARMTargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
  SDOperand ChainOp = Op.getOperand(0);
  SDOperand DestOp = Op.getOperand(1);
  SDOperand SourceOp = Op.getOperand(2);
@ -1311,11 +1310,9 @@ SDOperand ARMTargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG,
  // The libc version is likely to be faster for the these cases. It can
  // use the address value and run time information about the CPU.
  // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
-  // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb. Change
-  // this once Thumb ldmia / stmia support is added.
  unsigned Size = I->getValue();
  if (AlwaysInline ||
-      (!ST->isThumb() && Size <= Subtarget->getMaxInlineSizeThreshold() &&
+      (Size <= Subtarget->getMaxInlineSizeThreshold() &&
       (Align & 3) == 0))
    return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);
  return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
@ -1461,7 +1458,7 @@ SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
  case ISD::RETURNADDR:    break;
  case ISD::FRAMEADDR:     break;
  case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
-  case ISD::MEMCPY:        return LowerMEMCPY(Op, DAG, Subtarget);
+  case ISD::MEMCPY:        return LowerMEMCPY(Op, DAG);
  }
  return SDOperand();
 }
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@ -134,8 +134,7 @@ namespace llvm {
    SDOperand LowerGLOBAL_OFFSET_TABLE(SDOperand Op, SelectionDAG &DAG);
    SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG);
    SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG);
-    SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG,
-                          const ARMSubtarget *ST);
+    SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG);
    SDOperand LowerMEMCPYCall(SDOperand Chain, SDOperand Dest,
                              SDOperand Source, SDOperand Count,
                              SelectionDAG &DAG);
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@ -62,7 +62,11 @@ protected:
  ///
  ARMSubtarget(const Module &M, const std::string &FS, bool thumb);

-  unsigned getMaxInlineSizeThreshold() const { return 64; }
+  unsigned getMaxInlineSizeThreshold() const {
+    // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb.
+    // Change this once Thumb ldmia / stmia support is added.
+    return isThumb() ? 0 : 64;
+  }
  /// ParseSubtargetFeatures - Parses features string setting specified 
  /// subtarget options.  Definition of function is auto generated by tblgen.
  void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU);
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -4496,24 +4496,17 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
    assert(!AlwaysInline && "Cannot inline copy of unknown size");
    return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
  }
-  unsigned Size = I->getValue();
-
-  if (AlwaysInline)
-    return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);

+  // If not DWORD aligned or if size is more than threshold, then call memcpy.
  // The libc version is likely to be faster for the following cases. It can
  // use the address value and run time information about the CPU.
  // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster
-
-  // If not DWORD aligned, call memcpy.
-  if ((Align & 3) != 0)
-    return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-
-  // If size is more than the threshold, call memcpy.
-  if (Size > Subtarget->getMaxInlineSizeThreshold())
-    return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
-
-  return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);
+  unsigned Size = I->getValue();
+  if (AlwaysInline ||
+      (Size <= Subtarget->getMaxInlineSizeThreshold() &&
+       (Align & 3) == 0))
+    return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG);
+  return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG);
 }

 SDOperand X86TargetLowering::LowerMEMCPYCall(SDOperand Chain,