From 6b83b5d1ae07dcd1c5987f7548ed2cf4be73b6a1 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Mon, 27 Aug 2007 10:18:20 +0000 Subject: [PATCH] call libc memcpy/memset if array size is bigger then threshold. Coping 100MB array (after a warmup) shows that glibc 2.6.1 implementation on x86-64 (core 2) is 30% faster (from 0.270917s to 0.188079s) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@41479 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 8 +++---- test/CodeGen/X86/2004-02-12-Memcpy.llx | 30 ++++++++++++++------------ 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6673c5f6399..75fbd4490a4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -3753,10 +3753,10 @@ SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) { if (Align == 0) Align = 1; ConstantSDNode *I = dyn_cast(Op.getOperand(3)); - // If not DWORD aligned, call memset if size is less than the threshold. + // If not DWORD aligned or size is more than the threshold, call memset. // It knows how to align to the right boundary first. if ((Align & 3) != 0 || - (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { + (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) { MVT::ValueType IntPtr = getPointerTy(); const Type *IntPtrTy = getTargetData()->getIntPtrType(); TargetLowering::ArgListTy Args; @@ -3909,10 +3909,10 @@ SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) { if (Align == 0) Align = 1; ConstantSDNode *I = dyn_cast(Op.getOperand(3)); - // If not DWORD aligned, call memcpy if size is less than the threshold. + // If not DWORD aligned or size is more than the threshold, call memcpy. // It knows how to align to the right boundary first. if ((Align & 3) != 0 || - (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) { + (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) { MVT::ValueType IntPtr = getPointerTy(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; diff --git a/test/CodeGen/X86/2004-02-12-Memcpy.llx b/test/CodeGen/X86/2004-02-12-Memcpy.llx index 8cd9a50cbb3..56b8d3ba462 100644 --- a/test/CodeGen/X86/2004-02-12-Memcpy.llx +++ b/test/CodeGen/X86/2004-02-12-Memcpy.llx @@ -1,24 +1,26 @@ -; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 | grep movs -declare void %llvm.memcpy.i32(sbyte* %A, sbyte* %B, uint %amt, uint %align) +; RUN: llvm-as < %s | llc -march=x86 | grep movs | count 1 +; RUN: llvm-as < %s | llc -march=x86 | grep memcpy | count 2 -%A = global [1000 x int] zeroinitializer -%B = global [1000 x int] zeroinitializer +@A = global [32 x i32] zeroinitializer +@B = global [32 x i32] zeroinitializer +declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) -void %main() { +define void @main() { ; dword copy - call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr ([1000 x int]* %A, long 0, long 0) to sbyte*), - sbyte* cast (int* getelementptr ([1000 x int]* %B, long 0, long 0) to sbyte*), - uint 4000, uint 4) + call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*), + i8* bitcast ([32 x i32]* @B to i8*), + i32 128, i32 4 ) ; word copy - call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr ([1000 x int]* %A, long 0, long 0) to sbyte*), - sbyte* cast (int* getelementptr ([1000 x int]* %B, long 0, long 0) to sbyte*), - uint 4000, uint 2) + call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*), + i8* bitcast ([32 x i32]* @B to i8*), + i32 128, i32 2 ) ; byte copy - call void %llvm.memcpy.i32(sbyte* cast (int* getelementptr ([1000 x int]* %A, long 0, long 0) to sbyte*), - sbyte* cast (int* getelementptr ([1000 x int]* %B, long 0, long 0) to sbyte*), - uint 4000, uint 1) + call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*), + i8* bitcast ([32 x i32]* @B to i8*), + i32 128, i32 1 ) + ret void }