From e0703c84ddeb1a1276de4e38210c1127ef5df130 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 31 Oct 2007 14:39:58 +0000 Subject: [PATCH] Make ARM and X86 LowerMEMCPY identical by moving the isThumb check into getMaxInlineSizeThreshold and by restructuring the X86 version. New I just have to move this to a common place :-) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43554 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 9 +++------ lib/Target/ARM/ARMISelLowering.h | 3 +-- lib/Target/ARM/ARMSubtarget.h | 6 +++++- lib/Target/X86/X86ISelLowering.cpp | 21 +++++++-------------- 4 files changed, 16 insertions(+), 23 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ef1c86d22a8..b7e37660a7b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1287,8 +1287,7 @@ static SDOperand LowerSRx(SDOperand Op, SelectionDAG &DAG, return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi); } -SDOperand ARMTargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG, - const ARMSubtarget *ST) { +SDOperand ARMTargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { SDOperand ChainOp = Op.getOperand(0); SDOperand DestOp = Op.getOperand(1); SDOperand SourceOp = Op.getOperand(2); @@ -1311,11 +1310,9 @@ SDOperand ARMTargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG, // The libc version is likely to be faster for the these cases. It can // use the address value and run time information about the CPU. // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster - // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb. Change - // this once Thumb ldmia / stmia support is added. unsigned Size = I->getValue(); if (AlwaysInline || - (!ST->isThumb() && Size <= Subtarget->getMaxInlineSizeThreshold() && + (Size <= Subtarget->getMaxInlineSizeThreshold() && (Align & 3) == 0)) return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG); return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); @@ -1461,7 +1458,7 @@ SDOperand ARMTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { case ISD::RETURNADDR: break; case ISD::FRAMEADDR: break; case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); - case ISD::MEMCPY: return LowerMEMCPY(Op, DAG, Subtarget); + case ISD::MEMCPY: return LowerMEMCPY(Op, DAG); } return SDOperand(); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 47cb2a17718..41045e75696 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -134,8 +134,7 @@ namespace llvm { SDOperand LowerGLOBAL_OFFSET_TABLE(SDOperand Op, SelectionDAG &DAG); SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG); SDOperand LowerBR_JT(SDOperand Op, SelectionDAG &DAG); - SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG, - const ARMSubtarget *ST); + SDOperand LowerMEMCPY(SDOperand Op, SelectionDAG &DAG); SDOperand LowerMEMCPYCall(SDOperand Chain, SDOperand Dest, SDOperand Source, SDOperand Count, SelectionDAG &DAG); diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 5b5ee39ced0..087623dee2f 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -62,7 +62,11 @@ protected: /// ARMSubtarget(const Module &M, const std::string &FS, bool thumb); - unsigned getMaxInlineSizeThreshold() const { return 64; } + unsigned getMaxInlineSizeThreshold() const { + // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb. + // Change this once Thumb ldmia / stmia support is added. + return isThumb() ? 0 : 64; + } /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f1bf150e1f9..3180e647033 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4496,24 +4496,17 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { assert(!AlwaysInline && "Cannot inline copy of unknown size"); return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); } - unsigned Size = I->getValue(); - - if (AlwaysInline) - return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG); + // If not DWORD aligned or if size is more than threshold, then call memcpy. // The libc version is likely to be faster for the following cases. It can // use the address value and run time information about the CPU. // With glibc 2.6.1 on a core 2, coping an array of 100M longs was 30% faster - - // If not DWORD aligned, call memcpy. - if ((Align & 3) != 0) - return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); - - // If size is more than the threshold, call memcpy. - if (Size > Subtarget->getMaxInlineSizeThreshold()) - return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); - - return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG); + unsigned Size = I->getValue(); + if (AlwaysInline || + (Size <= Subtarget->getMaxInlineSizeThreshold() && + (Align & 3) == 0)) + return LowerMEMCPYInline(ChainOp, DestOp, SourceOp, Size, Align, DAG); + return LowerMEMCPYCall(ChainOp, DestOp, SourceOp, CountOp, DAG); } SDOperand X86TargetLowering::LowerMEMCPYCall(SDOperand Chain,