mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-05 14:34:55 +00:00
- Stop simplifycfg from duplicating "ret" instructions into unconditional
branches. PR8575, rdar://5134905, rdar://8911460. - Allow codegen tail duplication to dup small return blocks after register allocation is done. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124462 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c3a20bab75
commit
40f64cb0de
@ -465,9 +465,12 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
|
||||
MaxDuplicateCount = TailDuplicateSize;
|
||||
|
||||
if (PreRegAlloc) {
|
||||
// Pre-regalloc tail duplication hurts compile time and doesn't help
|
||||
// much except for indirect branches.
|
||||
if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch())
|
||||
if (TailBB->empty())
|
||||
return false;
|
||||
const TargetInstrDesc &TID = TailBB->back().getDesc();
|
||||
// Pre-regalloc tail duplication hurts compile time and doesn't help
|
||||
// much except for indirect branches and returns.
|
||||
if (!TID.isIndirectBranch() && !TID.isReturn())
|
||||
return false;
|
||||
// If the target has hardware branch prediction that can handle indirect
|
||||
// branches, duplicating them can often make them predictable when there
|
||||
@ -502,7 +505,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
|
||||
}
|
||||
// Heuristically, don't tail-duplicate calls if it would expand code size,
|
||||
// as it's less likely to be worth the extra cost.
|
||||
if (InstrCount > 1 && HasCall)
|
||||
if (InstrCount > 1 && (PreRegAlloc && HasCall))
|
||||
return false;
|
||||
|
||||
DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Support/CFG.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/ConstantRange.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
@ -36,6 +37,10 @@
|
||||
#include <map>
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool>
|
||||
DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
|
||||
cl::desc("Duplicate return instructions into unconditional branches"));
|
||||
|
||||
STATISTIC(NumSpeculations, "Number of speculative executed instructions");
|
||||
|
||||
namespace {
|
||||
@ -2027,7 +2032,7 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI) {
|
||||
}
|
||||
|
||||
// If we found some, do the transformation!
|
||||
if (!UncondBranchPreds.empty()) {
|
||||
if (!UncondBranchPreds.empty() && DupRet) {
|
||||
while (!UncondBranchPreds.empty()) {
|
||||
BasicBlock *Pred = UncondBranchPreds.pop_back_val();
|
||||
DEBUG(dbgs() << "FOLDING: " << *BB
|
||||
|
@ -1,50 +0,0 @@
|
||||
; RUN: llc < %s -mtriple=i386-apple-darwin -o /dev/null -stats -info-output-file - | grep asm-printer | grep 29
|
||||
|
||||
%CC = type { %Register }
|
||||
%II = type { %"struct.XX::II::$_74" }
|
||||
%JITFunction = type %YYValue* (%CC*, %YYValue**)
|
||||
%YYValue = type { i32 (...)** }
|
||||
%Register = type { %"struct.XX::ByteCodeFeatures" }
|
||||
%"struct.XX::ByteCodeFeatures" = type { i32 }
|
||||
%"struct.XX::II::$_74" = type { i8* }
|
||||
@llvm.used = appending global [1 x i8*] [ i8* bitcast (%JITFunction* @loop to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
|
||||
|
||||
define %YYValue* @loop(%CC*, %YYValue**) nounwind {
|
||||
; <label>:2
|
||||
%3 = getelementptr %CC* %0, i32 -9 ; <%CC*> [#uses=1]
|
||||
%4 = bitcast %CC* %3 to %YYValue** ; <%YYValue**> [#uses=2]
|
||||
%5 = load %YYValue** %4 ; <%YYValue*> [#uses=3]
|
||||
%unique_1.i = ptrtoint %YYValue* %5 to i1 ; <i1> [#uses=1]
|
||||
br i1 %unique_1.i, label %loop, label %11
|
||||
|
||||
loop: ; preds = %6, %2
|
||||
%.1 = phi %YYValue* [ inttoptr (i32 1 to %YYValue*), %2 ], [ %intAddValue, %6 ] ; <%YYValue*> [#uses=3]
|
||||
%immediateCmp = icmp slt %YYValue* %.1, %5 ; <i1> [#uses=1]
|
||||
br i1 %immediateCmp, label %6, label %8
|
||||
|
||||
; <label>:6 ; preds = %loop
|
||||
%lhsInt = ptrtoint %YYValue* %.1 to i32 ; <i32> [#uses=1]
|
||||
%7 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhsInt, i32 2) ; <{ i32, i1 }> [#uses=2]
|
||||
%intAdd = extractvalue { i32, i1 } %7, 0 ; <i32> [#uses=1]
|
||||
%intAddValue = inttoptr i32 %intAdd to %YYValue* ; <%YYValue*> [#uses=1]
|
||||
%intAddOverflow = extractvalue { i32, i1 } %7, 1 ; <i1> [#uses=1]
|
||||
br i1 %intAddOverflow, label %.loopexit, label %loop
|
||||
|
||||
; <label>:8 ; preds = %loop
|
||||
ret %YYValue* inttoptr (i32 10 to %YYValue*)
|
||||
|
||||
.loopexit: ; preds = %6
|
||||
%9 = bitcast %CC* %0 to %YYValue** ; <%YYValue**> [#uses=1]
|
||||
store %YYValue* %.1, %YYValue** %9
|
||||
store %YYValue* %5, %YYValue** %4
|
||||
%10 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431104 to %II*), %CC* %0, %YYValue** %1) ; <%YYValue*> [#uses=1]
|
||||
ret %YYValue* %10
|
||||
|
||||
; <label>:11 ; preds = %2
|
||||
%12 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431080 to %II*), %CC* %0, %YYValue** %1) ; <%YYValue*> [#uses=1]
|
||||
ret %YYValue* %12
|
||||
}
|
||||
|
||||
declare fastcc %YYValue* @foobar(%II*, %CC*, %YYValue**) nounwind
|
||||
|
||||
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind
|
@ -70,6 +70,7 @@ exit:
|
||||
|
||||
; Same as slightly_more_involved, but block_a is now a CFG diamond with
|
||||
; fallthrough edges which should be preserved.
|
||||
; "callq block_a_merge_func" is tail duped.
|
||||
|
||||
; CHECK: yet_more_involved:
|
||||
; CHECK: jmp .LBB2_1
|
||||
@ -78,12 +79,12 @@ exit:
|
||||
; CHECK-NEXT: callq bar99
|
||||
; CHECK-NEXT: callq get
|
||||
; CHECK-NEXT: cmpl $2999, %eax
|
||||
; CHECK-NEXT: jg .LBB2_6
|
||||
; CHECK-NEXT: callq block_a_true_func
|
||||
; CHECK-NEXT: jmp .LBB2_7
|
||||
; CHECK-NEXT: .LBB2_6:
|
||||
; CHECK-NEXT: jle .LBB2_5
|
||||
; CHECK-NEXT: callq block_a_false_func
|
||||
; CHECK-NEXT: .LBB2_7:
|
||||
; CHECK-NEXT: callq block_a_merge_func
|
||||
; CHECK-NEXT: jmp .LBB2_1
|
||||
; CHECK-NEXT: .LBB2_5:
|
||||
; CHECK-NEXT: callq block_a_true_func
|
||||
; CHECK-NEXT: callq block_a_merge_func
|
||||
; CHECK-NEXT: .LBB2_1:
|
||||
; CHECK-NEXT: callq body
|
||||
|
@ -1,14 +1,14 @@
|
||||
; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | grep {ret i32 %v1}
|
||||
; There should be no uncond branches left.
|
||||
; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | not grep {br label}
|
||||
; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | FileCheck %s
|
||||
|
||||
declare i32 @f1()
|
||||
declare i32 @f2()
|
||||
declare void @f3()
|
||||
|
||||
define i32 @test(i1 %cond, i1 %cond2, i1 %cond3) {
|
||||
; CHECK: test
|
||||
br i1 %cond, label %T1, label %F1
|
||||
|
||||
; CHECK-NOT: T1:
|
||||
T1:
|
||||
%v1 = call i32 @f1()
|
||||
br label %Merge
|
||||
@ -18,6 +18,10 @@ F1:
|
||||
br label %Merge
|
||||
|
||||
Merge:
|
||||
; CHECK: Merge:
|
||||
; CHECK: %v1 = call i32 @f1()
|
||||
; CHECK-NEXT: %D = and i1 %cond2, %cond3
|
||||
; CHECK-NEXT: br i1 %D
|
||||
%A = phi i1 [true, %T1], [false, %F1]
|
||||
%B = phi i32 [%v1, %T1], [%v2, %F1]
|
||||
%C = and i1 %A, %cond2
|
||||
|
@ -1,14 +1,14 @@
|
||||
; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | grep {ret i32 %v1}
|
||||
; There should be no uncond branches left.
|
||||
; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | not grep {br label}
|
||||
; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg -S | FileCheck %s
|
||||
|
||||
declare i32 @f1()
|
||||
declare i32 @f2()
|
||||
declare void @f3()
|
||||
|
||||
define i32 @test(i1 %cond, i1 %cond2) {
|
||||
; CHECK: test
|
||||
br i1 %cond, label %T1, label %F1
|
||||
|
||||
; CHECK-NOT: T1
|
||||
T1:
|
||||
%v1 = call i32 @f1()
|
||||
br label %Merge
|
||||
@ -18,6 +18,9 @@ F1:
|
||||
br label %Merge
|
||||
|
||||
Merge:
|
||||
; CHECK: Merge:
|
||||
; CHECK: %v1 = call i32 @f1()
|
||||
; CHECK-NEXT: br i1 %cond2
|
||||
%A = phi i1 [true, %T1], [false, %F1]
|
||||
%B = phi i32 [%v1, %T1], [%v2, %F1]
|
||||
%C = and i1 %A, %cond2
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt < %s -jump-threading -simplifycfg -S | grep {ret i32 1}
|
||||
; RUN: opt < %s -jump-threading -S | FileCheck %s
|
||||
; rdar://6402033
|
||||
|
||||
; Test that we can thread through the block with the partially redundant load (%2).
|
||||
@ -6,12 +6,16 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
|
||||
target triple = "i386-apple-darwin7"
|
||||
|
||||
define i32 @foo(i32* %P) nounwind {
|
||||
; CHECK: foo
|
||||
entry:
|
||||
%0 = tail call i32 (...)* @f1() nounwind ; <i32> [#uses=1]
|
||||
%1 = icmp eq i32 %0, 0 ; <i1> [#uses=1]
|
||||
br i1 %1, label %bb1, label %bb
|
||||
|
||||
bb: ; preds = %entry
|
||||
; CHECK: bb1.thread:
|
||||
; CHECK: store
|
||||
; CHECK: br label %bb3
|
||||
store i32 42, i32* %P, align 4
|
||||
br label %bb1
|
||||
|
||||
@ -26,6 +30,9 @@ bb2: ; preds = %bb1
|
||||
ret i32 %res.0
|
||||
|
||||
bb3: ; preds = %bb1
|
||||
; CHECK: bb3:
|
||||
; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
|
||||
; CHECK: ret i32 %res.01
|
||||
ret i32 %res.0
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,6 @@
|
||||
; CHECK: i64 2, label
|
||||
; CHECK: i64 3, label
|
||||
; CHECK: i64 4, label
|
||||
; CHECK-NOT: br
|
||||
; CHECK: }
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
|
@ -25,16 +25,6 @@ define void @test3(i1 %T) {
|
||||
}
|
||||
|
||||
|
||||
define void @test4() {
|
||||
br label %return
|
||||
return:
|
||||
ret void
|
||||
; CHECK: @test4
|
||||
; CHECK-NEXT: ret void
|
||||
}
|
||||
@test4g = global i8* blockaddress(@test4, %return)
|
||||
|
||||
|
||||
; PR5795
|
||||
define void @test5(i32 %A) {
|
||||
switch i32 %A, label %return [
|
||||
|
@ -147,7 +147,7 @@ UnifiedReturnBlock: ; preds = %shortcirc_done.4, %shortcirc_next.4
|
||||
; CHECK: i32 16, label %UnifiedReturnBlock
|
||||
; CHECK: i32 17, label %UnifiedReturnBlock
|
||||
; CHECK: i32 18, label %UnifiedReturnBlock
|
||||
; CHECK: i32 19, label %switch.edge
|
||||
; CHECK: i32 19, label %UnifiedReturnBlock
|
||||
; CHECK: ]
|
||||
}
|
||||
|
||||
@ -441,3 +441,29 @@ if.end:
|
||||
; CHECK-NOT: switch
|
||||
; CHECK: ret void
|
||||
}
|
||||
|
||||
; PR8675
|
||||
; rdar://5134905
|
||||
define zeroext i1 @test16(i32 %x) nounwind {
|
||||
entry:
|
||||
; CHECK: @test16
|
||||
; CHECK: switch i32 %x, label %lor.rhs [
|
||||
; CHECK: i32 1, label %lor.end
|
||||
; CHECK: i32 2, label %lor.end
|
||||
; CHECK: i32 3, label %lor.end
|
||||
; CHECK: ]
|
||||
%cmp.i = icmp eq i32 %x, 1
|
||||
br i1 %cmp.i, label %lor.end, label %lor.lhs.false
|
||||
|
||||
lor.lhs.false:
|
||||
%cmp.i2 = icmp eq i32 %x, 2
|
||||
br i1 %cmp.i2, label %lor.end, label %lor.rhs
|
||||
|
||||
lor.rhs:
|
||||
%cmp.i1 = icmp eq i32 %x, 3
|
||||
br label %lor.end
|
||||
|
||||
lor.end:
|
||||
%0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp.i1, %lor.rhs ]
|
||||
ret i1 %0
|
||||
}
|
||||
|
@ -1,5 +1,4 @@
|
||||
; RUN: opt < %s -simplifycfg -S | not grep br
|
||||
|
||||
; RUN: opt < %s -simplifycfg -S | FileCheck %s
|
||||
|
||||
%llvm.dbg.anchor.type = type { i32, i32 }
|
||||
%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
|
||||
@ -13,7 +12,16 @@
|
||||
|
||||
declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
|
||||
|
||||
define i1 @_ZN4llvm11SetCondInst7classofEPKNS_11InstructionE({ i32, i32 }* %I) {
|
||||
define i1 @t({ i32, i32 }* %I) {
|
||||
; CHECK: t
|
||||
; CHECK: switch i32 %tmp.2.i, label %shortcirc_next.4 [
|
||||
; CHECK: i32 14, label %UnifiedReturnBlock
|
||||
; CHECK: i32 15, label %UnifiedReturnBlock
|
||||
; CHECK: i32 16, label %UnifiedReturnBlock
|
||||
; CHECK: i32 17, label %UnifiedReturnBlock
|
||||
; CHECK: i32 18, label %UnifiedReturnBlock
|
||||
; CHECK: i32 19, label %UnifiedReturnBlock
|
||||
; CHECK: ]
|
||||
entry:
|
||||
%tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1 ; <i32*> [#uses=1]
|
||||
%tmp.2.i = load i32* %tmp.1.i ; <i32> [#uses=6]
|
||||
|
Loading…
x
Reference in New Issue
Block a user