Loop idiom recognizer was replacing too many uses of popcount.

When spotting that a loop can use ctpop, we were incorrectly replacing all uses of a value with a value derived from ctpop.

The bug here was exposed because we were replacing a use prior to the ctpop with the ctpop value and so we have a use before def, i.e., we changed

 %tobool.5 = icmp ne i32 %num, 0
 store i1 %tobool.5, i1* %ptr
 br i1 %tobool.5, label %for.body.lr.ph, label %for.end

to

 store i1 %1, i1* %ptr
 %0 = call i32 @llvm.ctpop.i32(i32 %num)
 %1 = icmp ne i32 %0, 0
 br i1 %1, label %for.body.lr.ph, label %for.end

Even if we inserted the ctpop so that it dominates the store here, that would still be incorrect.  The store doesn’t want the result of ctpop.

The fix is very simple, and involves replacing only the branch condition with the ctpop instead of all uses.

Reviewed by Hal Finkel.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242068 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Pete Cooper 2015-07-13 21:25:33 +00:00
parent da9c587dad
commit 71a4b301fd
2 changed files with 38 additions and 1 deletions

View File

@ -508,7 +508,7 @@ void NclPopcountRecognize::transform(Instruction *CntInst,
ICmpInst *NewPreCond =
cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
PreCond->replaceAllUsesWith(NewPreCond);
PreCondBr->setCondition(NewPreCond);
RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
}

View File

@ -0,0 +1,37 @@
; RUN: opt -loop-idiom -S < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-ios8.0.0"
; When we replace the precondition with a ctpop, we need to ensure
; that only the first branch reads the ctpop. The store prior
; to that should continue to read from the original compare.
; CHECK: %tobool.5 = icmp ne i32 %num, 0
; CHECK: store i1 %tobool.5, i1* %ptr
; CHECK: %0 = call i32 @llvm.ctpop.i32(i32 %num)
; CHECK: %1 = icmp ne i32 %0, 0
; CHECK: br i1 %1, label %for.body.lr.ph, label %for.end
define internal fastcc i32 @num_bits_set(i32 %num, i1* %ptr) #1 {
entry:
%tobool.5 = icmp ne i32 %num, 0
store i1 %tobool.5, i1* %ptr
br i1 %tobool.5, label %for.body.lr.ph, label %for.end
for.body.lr.ph: ; preds = %entry
br label %for.body
for.body: ; preds = %for.body.lr.ph, %for.body
%count.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
%num.addr.06 = phi i32 [ %num, %for.body.lr.ph ], [ %and, %for.body ]
%sub = add i32 %num.addr.06, -1
%and = and i32 %sub, %num.addr.06
%inc = add nsw i32 %count.07, 1
%tobool = icmp ne i32 %and, 0
br i1 %tobool, label %for.body, label %for.end
for.end: ; preds = %for.cond.for.end_crit_edge, %entry
%count.0.lcssa = phi i32 [ %inc, %for.body ], [ 0, %entry ]
ret i32 %count.0.lcssa
}