mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-02 07:11:49 +00:00
[CodeGenPrepare] Improved logic to speculate calls to cttz/ctlz.
This patch improves the logic added at revision 224899 (see review D6728) that teaches the backend when it is profitable to speculate calls to cttz/ctlz. The original algorithm conservatively avoided speculating more than one instruction from a basic block in a control flow grap modelling an if-statement. In particular, the only allowed instruction (excluding the terminator) was a call to cttz/ctlz. However, there are cases where we could be less conservative and still be able to speculate a call to cttz/ctlz. With this patch, CodeGenPrepare now tries to speculate a cttz/ctlz if the result is zero extended/truncated in the same basic block, and the zext/trunc instruction is "free" for the target. Added new test cases to CodeGen/X86/cttz-ctlz.ll Differential Revision: http://reviews.llvm.org/D6853 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225274 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
46cb54c0fb
commit
e46783d5b7
@ -4008,15 +4008,41 @@ static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) {
|
|||||||
// See if ThenBB contains only one instruction (excluding the
|
// See if ThenBB contains only one instruction (excluding the
|
||||||
// terminator and DbgInfoIntrinsic calls).
|
// terminator and DbgInfoIntrinsic calls).
|
||||||
IntrinsicInst *II = nullptr;
|
IntrinsicInst *II = nullptr;
|
||||||
|
CastInst *CI = nullptr;
|
||||||
for (BasicBlock::iterator I = ThenBB->begin(),
|
for (BasicBlock::iterator I = ThenBB->begin(),
|
||||||
E = std::prev(ThenBB->end()); I != E; ++I) {
|
E = std::prev(ThenBB->end()); I != E; ++I) {
|
||||||
// Skip debug info.
|
// Skip debug info.
|
||||||
if (isa<DbgInfoIntrinsic>(I))
|
if (isa<DbgInfoIntrinsic>(I))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (II)
|
// Check if this is a zero extension or a truncate of a previously
|
||||||
// Avoid speculating more than one instruction.
|
// matched call to intrinsic cttz/ctlz.
|
||||||
return false;
|
if (II) {
|
||||||
|
// Early exit if we already found a "free" zero extend/truncate.
|
||||||
|
if (CI)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Type *SrcTy = II->getType();
|
||||||
|
Type *DestTy = I->getType();
|
||||||
|
Value *V;
|
||||||
|
|
||||||
|
if (match(cast<Instruction>(I), m_ZExt(m_Value(V))) && V == II) {
|
||||||
|
// Speculate this zero extend only if it is "free" for the target.
|
||||||
|
if (TLI.isZExtFree(SrcTy, DestTy)) {
|
||||||
|
CI = cast<CastInst>(I);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else if (match(cast<Instruction>(I), m_Trunc(m_Value(V))) && V == II) {
|
||||||
|
// Speculate this truncate only if it is "free" for the target.
|
||||||
|
if (TLI.isTruncateFree(SrcTy, DestTy)) {
|
||||||
|
CI = cast<CastInst>(I);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Avoid speculating more than one instruction.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// See if this is a call to intrinsic cttz/ctlz.
|
// See if this is a call to intrinsic cttz/ctlz.
|
||||||
if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::cttz>())) {
|
if (match(cast<Instruction>(I), m_Intrinsic<Intrinsic::cttz>())) {
|
||||||
@ -4041,11 +4067,14 @@ static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) {
|
|||||||
Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
|
Value *ThenV = PN->getIncomingValueForBlock(ThenBB);
|
||||||
Value *OrigV = PN->getIncomingValueForBlock(EntryBB);
|
Value *OrigV = PN->getIncomingValueForBlock(EntryBB);
|
||||||
|
|
||||||
if (!OrigV || ThenV != II)
|
if (!OrigV)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
if (ThenV != II && (!CI || ThenV != CI))
|
||||||
|
return false;
|
||||||
|
|
||||||
if (ConstantInt *CInt = dyn_cast<ConstantInt>(OrigV)) {
|
if (ConstantInt *CInt = dyn_cast<ConstantInt>(OrigV)) {
|
||||||
unsigned BitWidth = ThenV->getType()->getIntegerBitWidth();
|
unsigned BitWidth = II->getType()->getIntegerBitWidth();
|
||||||
|
|
||||||
// Don't try to simplify this phi node if 'ThenV' is a cttz/ctlz
|
// Don't try to simplify this phi node if 'ThenV' is a cttz/ctlz
|
||||||
// intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits
|
// intrinsic call, but 'OrigV' is not equal to the 'size-of' in bits
|
||||||
@ -4070,7 +4099,7 @@ static bool OptimizeBranchInst(BranchInst *BrInst, const TargetLowering &TLI) {
|
|||||||
ConstantInt::getFalse(II->getContext()) };
|
ConstantInt::getFalse(II->getContext()) };
|
||||||
Module *M = EntryBB->getParent()->getParent();
|
Module *M = EntryBB->getParent()->getParent();
|
||||||
Value *IF = Intrinsic::getDeclaration(M, II->getIntrinsicID(), Ty);
|
Value *IF = Intrinsic::getDeclaration(M, II->getIntrinsicID(), Ty);
|
||||||
IRBuilder<> Builder(BrInst);
|
IRBuilder<> Builder(II);
|
||||||
Instruction *NewI = Builder.CreateCall(IF, Args);
|
Instruction *NewI = Builder.CreateCall(IF, Args);
|
||||||
|
|
||||||
// Replace the old call to cttz/ctlz.
|
// Replace the old call to cttz/ctlz.
|
||||||
|
@ -241,6 +241,178 @@ cond.end: ; preds = %entry, %cond.true
|
|||||||
ret i16 %cond
|
ret i16 %cond
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; The following tests verify that calls to cttz/ctlz are speculated even if
|
||||||
|
; basic block %cond.true has an extra zero extend/truncate which is "free"
|
||||||
|
; for the target.
|
||||||
|
|
||||||
|
define i64 @test1e(i32 %x) {
|
||||||
|
; ALL-LABEL: @test1e(
|
||||||
|
; LZCNT: icmp eq i32 %x, 0
|
||||||
|
; LZCNT: call i32 @llvm.cttz.i32(i32 %x, i1 true)
|
||||||
|
; BMI: call i32 @llvm.cttz.i32(i32 %x, i1 false)
|
||||||
|
; GENERIC: icmp eq i32 %x, 0
|
||||||
|
; GENERIC: call i32 @llvm.cttz.i32(i32 %x, i1 true)
|
||||||
|
entry:
|
||||||
|
%tobool = icmp eq i32 %x, 0
|
||||||
|
br i1 %tobool, label %cond.end, label %cond.true
|
||||||
|
|
||||||
|
cond.true: ; preds = %entry
|
||||||
|
%0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
|
||||||
|
%phitmp2 = zext i32 %0 to i64
|
||||||
|
br label %cond.end
|
||||||
|
|
||||||
|
cond.end: ; preds = %entry, %cond.true
|
||||||
|
%cond = phi i64 [ %phitmp2, %cond.true ], [ 32, %entry ]
|
||||||
|
ret i64 %cond
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @test2e(i64 %x) {
|
||||||
|
; ALL-LABEL: @test2e(
|
||||||
|
; LZCNT: icmp eq i64 %x, 0
|
||||||
|
; LZCNT: call i64 @llvm.cttz.i64(i64 %x, i1 true)
|
||||||
|
; BMI: call i64 @llvm.cttz.i64(i64 %x, i1 false)
|
||||||
|
; GENERIC: icmp eq i64 %x, 0
|
||||||
|
; GENERIC: call i64 @llvm.cttz.i64(i64 %x, i1 true)
|
||||||
|
entry:
|
||||||
|
%tobool = icmp eq i64 %x, 0
|
||||||
|
br i1 %tobool, label %cond.end, label %cond.true
|
||||||
|
|
||||||
|
cond.true: ; preds = %entry
|
||||||
|
%0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
|
||||||
|
%cast = trunc i64 %0 to i32
|
||||||
|
br label %cond.end
|
||||||
|
|
||||||
|
cond.end: ; preds = %entry, %cond.true
|
||||||
|
%cond = phi i32 [ %cast, %cond.true ], [ 64, %entry ]
|
||||||
|
ret i32 %cond
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 @test3e(i32 %x) {
|
||||||
|
; ALL-LABEL: @test3e(
|
||||||
|
; BMI: icmp eq i32 %x, 0
|
||||||
|
; BMI: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
|
||||||
|
; LZCNT: call i32 @llvm.ctlz.i32(i32 %x, i1 false)
|
||||||
|
; GENERIC: icmp eq i32 %x, 0
|
||||||
|
; GENERIC: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
|
||||||
|
entry:
|
||||||
|
%tobool = icmp eq i32 %x, 0
|
||||||
|
br i1 %tobool, label %cond.end, label %cond.true
|
||||||
|
|
||||||
|
cond.true: ; preds = %entry
|
||||||
|
%0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
|
||||||
|
%phitmp2 = zext i32 %0 to i64
|
||||||
|
br label %cond.end
|
||||||
|
|
||||||
|
cond.end: ; preds = %entry, %cond.true
|
||||||
|
%cond = phi i64 [ %phitmp2, %cond.true ], [ 32, %entry ]
|
||||||
|
ret i64 %cond
|
||||||
|
}
|
||||||
|
|
||||||
|
define i32 @test4e(i64 %x) {
|
||||||
|
; ALL-LABEL: @test4e(
|
||||||
|
; BMI: icmp eq i64 %x, 0
|
||||||
|
; BMI: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
|
||||||
|
; LZCNT: call i64 @llvm.ctlz.i64(i64 %x, i1 false)
|
||||||
|
; GENERIC: icmp eq i64 %x, 0
|
||||||
|
; GENERIC: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
|
||||||
|
entry:
|
||||||
|
%tobool = icmp eq i64 %x, 0
|
||||||
|
br i1 %tobool, label %cond.end, label %cond.true
|
||||||
|
|
||||||
|
cond.true: ; preds = %entry
|
||||||
|
%0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
|
||||||
|
%cast = trunc i64 %0 to i32
|
||||||
|
br label %cond.end
|
||||||
|
|
||||||
|
cond.end: ; preds = %entry, %cond.true
|
||||||
|
%cond = phi i32 [ %cast, %cond.true ], [ 64, %entry ]
|
||||||
|
ret i32 %cond
|
||||||
|
}
|
||||||
|
|
||||||
|
define i16 @test5e(i64 %x) {
|
||||||
|
; ALL-LABEL: @test5e(
|
||||||
|
; BMI: icmp eq i64 %x, 0
|
||||||
|
; BMI: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
|
||||||
|
; LZCNT: call i64 @llvm.ctlz.i64(i64 %x, i1 false)
|
||||||
|
; GENERIC: icmp eq i64 %x, 0
|
||||||
|
; GENERIC: call i64 @llvm.ctlz.i64(i64 %x, i1 true)
|
||||||
|
entry:
|
||||||
|
%tobool = icmp eq i64 %x, 0
|
||||||
|
br i1 %tobool, label %cond.end, label %cond.true
|
||||||
|
|
||||||
|
cond.true: ; preds = %entry
|
||||||
|
%0 = tail call i64 @llvm.ctlz.i64(i64 %x, i1 true)
|
||||||
|
%cast = trunc i64 %0 to i16
|
||||||
|
br label %cond.end
|
||||||
|
|
||||||
|
cond.end: ; preds = %entry, %cond.true
|
||||||
|
%cond = phi i16 [ %cast, %cond.true ], [ 64, %entry ]
|
||||||
|
ret i16 %cond
|
||||||
|
}
|
||||||
|
|
||||||
|
define i16 @test6e(i32 %x) {
|
||||||
|
; ALL-LABEL: @test6e(
|
||||||
|
; BMI: icmp eq i32 %x, 0
|
||||||
|
; BMI: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
|
||||||
|
; LZCNT: call i32 @llvm.ctlz.i32(i32 %x, i1 false)
|
||||||
|
; GENERIC: icmp eq i32 %x, 0
|
||||||
|
; GENERIC: call i32 @llvm.ctlz.i32(i32 %x, i1 true)
|
||||||
|
entry:
|
||||||
|
%tobool = icmp eq i32 %x, 0
|
||||||
|
br i1 %tobool, label %cond.end, label %cond.true
|
||||||
|
|
||||||
|
cond.true: ; preds = %entry
|
||||||
|
%0 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
|
||||||
|
%cast = trunc i32 %0 to i16
|
||||||
|
br label %cond.end
|
||||||
|
|
||||||
|
cond.end: ; preds = %entry, %cond.true
|
||||||
|
%cond = phi i16 [ %cast, %cond.true ], [ 32, %entry ]
|
||||||
|
ret i16 %cond
|
||||||
|
}
|
||||||
|
|
||||||
|
define i16 @test7e(i64 %x) {
|
||||||
|
; ALL-LABEL: @test7e(
|
||||||
|
; LZCNT: icmp eq i64 %x, 0
|
||||||
|
; LZCNT: call i64 @llvm.cttz.i64(i64 %x, i1 true)
|
||||||
|
; BMI: call i64 @llvm.cttz.i64(i64 %x, i1 false)
|
||||||
|
; GENERIC: icmp eq i64 %x, 0
|
||||||
|
; GENERIC: call i64 @llvm.cttz.i64(i64 %x, i1 true)
|
||||||
|
entry:
|
||||||
|
%tobool = icmp eq i64 %x, 0
|
||||||
|
br i1 %tobool, label %cond.end, label %cond.true
|
||||||
|
|
||||||
|
cond.true: ; preds = %entry
|
||||||
|
%0 = tail call i64 @llvm.cttz.i64(i64 %x, i1 true)
|
||||||
|
%cast = trunc i64 %0 to i16
|
||||||
|
br label %cond.end
|
||||||
|
|
||||||
|
cond.end: ; preds = %entry, %cond.true
|
||||||
|
%cond = phi i16 [ %cast, %cond.true ], [ 64, %entry ]
|
||||||
|
ret i16 %cond
|
||||||
|
}
|
||||||
|
|
||||||
|
define i16 @test8e(i32 %x) {
|
||||||
|
; ALL-LABEL: @test8e(
|
||||||
|
; LZCNT: icmp eq i32 %x, 0
|
||||||
|
; LZCNT: call i32 @llvm.cttz.i32(i32 %x, i1 true)
|
||||||
|
; BMI: call i32 @llvm.cttz.i32(i32 %x, i1 false)
|
||||||
|
; GENERIC: icmp eq i32 %x, 0
|
||||||
|
; GENERIC: call i32 @llvm.cttz.i32(i32 %x, i1 true)
|
||||||
|
entry:
|
||||||
|
%tobool = icmp eq i32 %x, 0
|
||||||
|
br i1 %tobool, label %cond.end, label %cond.true
|
||||||
|
|
||||||
|
cond.true: ; preds = %entry
|
||||||
|
%0 = tail call i32 @llvm.cttz.i32(i32 %x, i1 true)
|
||||||
|
%cast = trunc i32 %0 to i16
|
||||||
|
br label %cond.end
|
||||||
|
|
||||||
|
cond.end: ; preds = %entry, %cond.true
|
||||||
|
%cond = phi i16 [ %cast, %cond.true ], [ 32, %entry ]
|
||||||
|
ret i16 %cond
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
declare i64 @llvm.ctlz.i64(i64, i1)
|
declare i64 @llvm.ctlz.i64(i64, i1)
|
||||||
declare i32 @llvm.ctlz.i32(i32, i1)
|
declare i32 @llvm.ctlz.i32(i32, i1)
|
||||||
|
Loading…
Reference in New Issue
Block a user