From d11c5d08a5f4f030d6e357378d0d46d93efd9a59 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 27 Oct 2012 14:25:51 +0000 Subject: [PATCH] LoopIdiom: Recognize memmove loops. This turns loops like for (unsigned i = 0; i != n; ++i) p[i] = p[i+1]; into memmove, which has a highly optimized implementation in most libcs. This was really easy with the new DependenceAnalysis :) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@166875 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 34 ++++++++++++++------ test/Transforms/LoopIdiom/basic.ll | 22 +++++++++++++ 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 3050b1e0c13..495d403e549 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -16,7 +16,7 @@ // TODO List: // // Future loop memory idioms to recognize: -// memcmp, memmove, strlen, etc. +// memcmp, strlen, etc. // Future floating point idioms to recognize in -ffast-math mode: // fpowi // Future integer operation idioms to recognize: @@ -60,8 +60,9 @@ #include "llvm/Transforms/Utils/Local.h" using namespace llvm; -STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); -STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); +STATISTIC(NumMemSet, "Number of memsets formed from loop stores"); +STATISTIC(NumMemCpy, "Number of memcpys formed from loop load+stores"); +STATISTIC(NumMemMove, "Number of memmoves formed from loop load+stores"); namespace { class LoopIdiomRecognize : public LoopPass { @@ -532,6 +533,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // stores) in the loop. We ignore the direct dependency between SI and LI here // and check it later. DependenceAnalysis &DA = getAnalysis(); + bool isMemcpySafe = true; for (Loop::block_iterator BI = CurLoop->block_begin(), BE = CurLoop->block_end(); BI != BE; ++BI) for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) @@ -552,8 +554,14 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // Now check the dependency between SI and LI. If there is no dependency we // can safely emit a memcpy. OwningPtr Dep(DA.depends(SI, LI, true)); - if (Dep) - return false; + if (Dep) { + // If there is a dependence but the direction is positive we can still + // safely turn this into memmove. + if (Dep->getLevels() != 1 || + Dep->getDirection(1) != Dependence::DVEntry::GT) + return false; + isMemcpySafe = false; + } // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the @@ -590,12 +598,19 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); - CallInst *NewCall = - Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, - std::min(SI->getAlignment(), LI->getAlignment())); + CallInst *NewCall; + unsigned Align = std::min(SI->getAlignment(), LI->getAlignment()); + if (isMemcpySafe) { + NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align); + ++NumMemCpy; + } else { + NewCall = Builder.CreateMemMove(StoreBasePtr, LoadBasePtr, NumBytes, Align); + ++NumMemMove; + } NewCall->setDebugLoc(SI->getDebugLoc()); - DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" + DEBUG(dbgs() << " Formed " << (isMemcpySafe ? "memcpy: " : "memmove: ") + << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); @@ -603,6 +618,5 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. deleteDeadInstruction(SI, *SE, TLI); - ++NumMemCpy; return true; } diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll index 46ab7e5542b..5afc405fa6b 100644 --- a/test/Transforms/LoopIdiom/basic.ll +++ b/test/Transforms/LoopIdiom/basic.ll @@ -383,4 +383,26 @@ for.end: ; preds = %for.inc } +@p = common global [1024 x i8] zeroinitializer, align 16 +define void @test15(i32 %n) nounwind { +entry: + %cmp6 = icmp eq i32 %n, 0 + br i1 %cmp6, label %for.end, label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %indvars.iv.next = add i64 %indvars.iv, 1 + %arrayidx = getelementptr inbounds [1024 x i8]* @p, i64 0, i64 %indvars.iv.next + %0 = load i8* %arrayidx, align 1 + %arrayidx2 = getelementptr inbounds [1024 x i8]* @p, i64 0, i64 %indvars.iv + store i8 %0, i8* %arrayidx2, align 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +; CHECK: @test15 +; CHECK: call void @llvm.memmove.p0i8.p0i8.i64(i8* getelementptr inbounds ([1024 x i8]* @p, i32 0, i32 0), i8* getelementptr inbounds ([1024 x i8]* @p, i64 0, i64 1), +}