mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 20:32:21 +00:00
Re-commit r235560: Switch lowering: extract jump tables and bit tests before building binary tree (PR22262)
Third time's the charm. The previous commit was reverted as a reverse for-loop in SelectionDAGBuilder::lowerWorkItem did 'I--' on an iterator at the beginning of a vector, causing asserts when using debugging iterators. This commit fixes that. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235608 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
08aea0a553
commit
defaf830f9
File diff suppressed because it is too large
Load Diff
@ -134,26 +134,65 @@ private:
|
||||
/// SDNodes we create.
|
||||
unsigned SDNodeOrder;
|
||||
|
||||
/// Case - A struct to record the Value for a switch case, and the
|
||||
/// case's target basic block.
|
||||
struct Case {
|
||||
const ConstantInt *Low;
|
||||
const ConstantInt *High;
|
||||
MachineBasicBlock* BB;
|
||||
uint32_t ExtraWeight;
|
||||
enum CaseClusterKind {
|
||||
/// A cluster of adjacent case labels with the same destination, or just one
|
||||
/// case.
|
||||
CC_Range,
|
||||
/// A cluster of cases suitable for jump table lowering.
|
||||
CC_JumpTable,
|
||||
/// A cluster of cases suitable for bit test lowering.
|
||||
CC_BitTests
|
||||
};
|
||||
|
||||
Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { }
|
||||
Case(const ConstantInt *low, const ConstantInt *high, MachineBasicBlock *bb,
|
||||
uint32_t extraweight) : Low(low), High(high), BB(bb),
|
||||
ExtraWeight(extraweight) { }
|
||||
/// A cluster of case labels.
|
||||
struct CaseCluster {
|
||||
CaseClusterKind Kind;
|
||||
const ConstantInt *Low, *High;
|
||||
union {
|
||||
MachineBasicBlock *MBB;
|
||||
unsigned JTCasesIndex;
|
||||
unsigned BTCasesIndex;
|
||||
};
|
||||
uint64_t Weight;
|
||||
|
||||
APInt size() const {
|
||||
const APInt &rHigh = High->getValue();
|
||||
const APInt &rLow = Low->getValue();
|
||||
return (rHigh - rLow + 1ULL);
|
||||
static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
|
||||
MachineBasicBlock *MBB, uint32_t Weight) {
|
||||
CaseCluster C;
|
||||
C.Kind = CC_Range;
|
||||
C.Low = Low;
|
||||
C.High = High;
|
||||
C.MBB = MBB;
|
||||
C.Weight = Weight;
|
||||
return C;
|
||||
}
|
||||
|
||||
static CaseCluster jumpTable(const ConstantInt *Low,
|
||||
const ConstantInt *High, unsigned JTCasesIndex,
|
||||
uint32_t Weight) {
|
||||
CaseCluster C;
|
||||
C.Kind = CC_JumpTable;
|
||||
C.Low = Low;
|
||||
C.High = High;
|
||||
C.JTCasesIndex = JTCasesIndex;
|
||||
C.Weight = Weight;
|
||||
return C;
|
||||
}
|
||||
|
||||
static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
|
||||
unsigned BTCasesIndex, uint32_t Weight) {
|
||||
CaseCluster C;
|
||||
C.Kind = CC_BitTests;
|
||||
C.Low = Low;
|
||||
C.High = High;
|
||||
C.BTCasesIndex = BTCasesIndex;
|
||||
C.Weight = Weight;
|
||||
return C;
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::vector<CaseCluster> CaseClusterVector;
|
||||
typedef CaseClusterVector::iterator CaseClusterIt;
|
||||
|
||||
struct CaseBits {
|
||||
uint64_t Mask;
|
||||
MachineBasicBlock* BB;
|
||||
@ -163,42 +202,14 @@ private:
|
||||
CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
|
||||
uint32_t Weight):
|
||||
Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { }
|
||||
|
||||
CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {}
|
||||
};
|
||||
|
||||
typedef std::vector<Case> CaseVector;
|
||||
typedef std::vector<CaseBits> CaseBitsVector;
|
||||
typedef CaseVector::iterator CaseItr;
|
||||
typedef std::pair<CaseItr, CaseItr> CaseRange;
|
||||
|
||||
/// CaseRec - A struct with ctor used in lowering switches to a binary tree
|
||||
/// of conditional branches.
|
||||
struct CaseRec {
|
||||
CaseRec(MachineBasicBlock *bb, const ConstantInt *lt, const ConstantInt *ge,
|
||||
CaseRange r) :
|
||||
CaseBB(bb), LT(lt), GE(ge), Range(r) {}
|
||||
|
||||
/// CaseBB - The MBB in which to emit the compare and branch
|
||||
MachineBasicBlock *CaseBB;
|
||||
/// LT, GE - If nonzero, we know the current case value must be less-than or
|
||||
/// greater-than-or-equal-to these Constants.
|
||||
const ConstantInt *LT;
|
||||
const ConstantInt *GE;
|
||||
/// Range - A pair of iterators representing the range of case values to be
|
||||
/// processed at this point in the binary search tree.
|
||||
CaseRange Range;
|
||||
};
|
||||
|
||||
typedef std::vector<CaseRec> CaseRecVector;
|
||||
|
||||
struct CaseBitsCmp {
|
||||
bool operator()(const CaseBits &C1, const CaseBits &C2) {
|
||||
return C1.Bits > C2.Bits;
|
||||
}
|
||||
};
|
||||
|
||||
/// Populate Cases with the cases in SI, clustering adjacent cases with the
|
||||
/// same destination together.
|
||||
void Clusterify(CaseVector &Cases, const SwitchInst *SI);
|
||||
/// Sort Clusters and merge adjacent cases.
|
||||
void sortAndRangeify(CaseClusterVector &Clusters);
|
||||
|
||||
/// CaseBlock - This structure is used to communicate between
|
||||
/// SelectionDAGBuilder and SDISel for the code generation of additional basic
|
||||
@ -288,6 +299,58 @@ private:
|
||||
BitTestInfo Cases;
|
||||
};
|
||||
|
||||
/// Minimum jump table density, in percent.
|
||||
enum { MinJumpTableDensity = 40 };
|
||||
|
||||
/// Check whether a range of clusters is dense enough for a jump table.
|
||||
bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases,
|
||||
unsigned First, unsigned Last);
|
||||
|
||||
/// Build a jump table cluster from Clusters[First..Last]. Returns false if it
|
||||
/// decides it's not a good idea.
|
||||
bool buildJumpTable(CaseClusterVector &Clusters, unsigned First,
|
||||
unsigned Last, const SwitchInst *SI,
|
||||
MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
|
||||
|
||||
/// Find clusters of cases suitable for jump table lowering.
|
||||
void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
|
||||
MachineBasicBlock *DefaultMBB);
|
||||
|
||||
/// Check whether the range [Low,High] fits in a machine word.
|
||||
bool rangeFitsInWord(const APInt &Low, const APInt &High);
|
||||
|
||||
/// Check whether these clusters are suitable for lowering with bit tests based
|
||||
/// on the number of destinations, comparison metric, and range.
|
||||
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
|
||||
const APInt &Low, const APInt &High);
|
||||
|
||||
/// Build a bit test cluster from Clusters[First..Last]. Returns false if it
|
||||
/// decides it's not a good idea.
|
||||
bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last,
|
||||
const SwitchInst *SI, CaseCluster &BTCluster);
|
||||
|
||||
/// Find clusters of cases suitable for bit test lowering.
|
||||
void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI);
|
||||
|
||||
struct SwitchWorkListItem {
|
||||
MachineBasicBlock *MBB;
|
||||
CaseClusterIt FirstCluster;
|
||||
CaseClusterIt LastCluster;
|
||||
const ConstantInt *GE;
|
||||
const ConstantInt *LT;
|
||||
};
|
||||
typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList;
|
||||
|
||||
/// Emit comparison and split W into two subtrees.
|
||||
void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W,
|
||||
Value *Cond, MachineBasicBlock *SwitchMBB);
|
||||
|
||||
/// Lower W.
|
||||
void lowerWorkItem(SwitchWorkListItem W, Value *Cond,
|
||||
MachineBasicBlock *SwitchMBB,
|
||||
MachineBasicBlock *DefaultMBB);
|
||||
|
||||
|
||||
/// A class which encapsulates all of the information needed to generate a
|
||||
/// stack protector check and signals to isel via its state being initialized
|
||||
/// that a stack protector needs to be generated.
|
||||
@ -670,29 +733,6 @@ private:
|
||||
void visitIndirectBr(const IndirectBrInst &I);
|
||||
void visitUnreachable(const UnreachableInst &I);
|
||||
|
||||
// Helpers for visitSwitch
|
||||
bool handleSmallSwitchRange(CaseRec& CR,
|
||||
CaseRecVector& WorkList,
|
||||
const Value* SV,
|
||||
MachineBasicBlock* Default,
|
||||
MachineBasicBlock *SwitchBB);
|
||||
bool handleJTSwitchCase(CaseRec& CR,
|
||||
CaseRecVector& WorkList,
|
||||
const Value* SV,
|
||||
MachineBasicBlock* Default,
|
||||
MachineBasicBlock *SwitchBB);
|
||||
bool handleBTSplitSwitchCase(CaseRec& CR,
|
||||
CaseRecVector& WorkList,
|
||||
const Value* SV,
|
||||
MachineBasicBlock *SwitchBB);
|
||||
void splitSwitchCase(CaseRec &CR, CaseItr Pivot, CaseRecVector &WorkList,
|
||||
const Value *SV, MachineBasicBlock *SwitchBB);
|
||||
bool handleBitTestsSwitchCase(CaseRec& CR,
|
||||
CaseRecVector& WorkList,
|
||||
const Value* SV,
|
||||
MachineBasicBlock* Default,
|
||||
MachineBasicBlock *SwitchBB);
|
||||
|
||||
uint32_t getEdgeWeight(const MachineBasicBlock *Src,
|
||||
const MachineBasicBlock *Dst) const;
|
||||
void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
|
||||
|
@ -1413,13 +1413,8 @@ SelectionDAGISel::FinishBasicBlock() {
|
||||
<< FuncInfo->PHINodesToUpdate[i].first
|
||||
<< ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
|
||||
|
||||
const bool MustUpdatePHINodes = SDB->SwitchCases.empty() &&
|
||||
SDB->JTCases.empty() &&
|
||||
SDB->BitTestCases.empty();
|
||||
|
||||
// Next, now that we know what the last MBB the LLVM BB expanded is, update
|
||||
// PHI nodes in successors.
|
||||
if (MustUpdatePHINodes) {
|
||||
for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
|
||||
MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
|
||||
assert(PHI->isPHI() &&
|
||||
@ -1428,7 +1423,6 @@ SelectionDAGISel::FinishBasicBlock() {
|
||||
continue;
|
||||
PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
|
||||
}
|
||||
}
|
||||
|
||||
// Handle stack protector.
|
||||
if (SDB->SPDescriptor.shouldEmitStackProtector()) {
|
||||
@ -1472,10 +1466,6 @@ SelectionDAGISel::FinishBasicBlock() {
|
||||
SDB->SPDescriptor.resetPerBBState();
|
||||
}
|
||||
|
||||
// If we updated PHI Nodes, return early.
|
||||
if (MustUpdatePHINodes)
|
||||
return;
|
||||
|
||||
for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
|
||||
// Lower header first, if it wasn't already lowered
|
||||
if (!SDB->BitTestCases[i].Emitted) {
|
||||
@ -1589,16 +1579,6 @@ SelectionDAGISel::FinishBasicBlock() {
|
||||
}
|
||||
SDB->JTCases.clear();
|
||||
|
||||
// If the switch block involved a branch to one of the actual successors, we
|
||||
// need to update PHI nodes in that block.
|
||||
for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
|
||||
MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first);
|
||||
assert(PHI->isPHI() &&
|
||||
"This is not a machine PHI node that we are updating!");
|
||||
if (FuncInfo->MBB->isSuccessor(PHI->getParent()))
|
||||
PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB);
|
||||
}
|
||||
|
||||
// If we generated any switch lowering information, build and codegen any
|
||||
// additional DAGs necessary.
|
||||
for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
|
||||
|
@ -151,13 +151,14 @@ protected:
|
||||
if (Changed && !ReturnMBB.hasAddressTaken()) {
|
||||
// We now might be able to merge this blr-only block into its
|
||||
// by-layout predecessor.
|
||||
if (ReturnMBB.pred_size() == 1 &&
|
||||
(*ReturnMBB.pred_begin())->isLayoutSuccessor(&ReturnMBB)) {
|
||||
// Move the blr into the preceding block.
|
||||
if (ReturnMBB.pred_size() == 1) {
|
||||
MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin();
|
||||
if (PrevMBB.isLayoutSuccessor(&ReturnMBB) && PrevMBB.canFallThrough()) {
|
||||
// Move the blr into the preceding block.
|
||||
PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
|
||||
PrevMBB.removeSuccessor(&ReturnMBB);
|
||||
}
|
||||
}
|
||||
|
||||
if (ReturnMBB.pred_empty())
|
||||
ReturnMBB.eraseFromParent();
|
||||
|
@ -4,8 +4,8 @@
|
||||
|
||||
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||
; CHECK-LABEL: t1:
|
||||
; CHECK: cmp r2, #1
|
||||
; CHECK: cmpne r2, #7
|
||||
; CHECK: cmp r2, #7
|
||||
; CHECK: cmpne r2, #1
|
||||
switch i32 %c, label %cond_next [
|
||||
i32 1, label %cond_true
|
||||
i32 7, label %cond_true
|
||||
|
@ -194,7 +194,7 @@ lor.lhs.false459: ; preds = %if.end454
|
||||
%18 = load i32, i32* %mb_type, align 4
|
||||
switch i32 %18, label %for.inc503 [
|
||||
i32 9, label %if.then475
|
||||
i32 10, label %if.then475
|
||||
i32 11, label %if.then475
|
||||
i32 13, label %if.then475
|
||||
i32 14, label %if.then475
|
||||
]
|
||||
|
@ -17,9 +17,9 @@ entry:
|
||||
; CHECK: BB#0: derived from LLVM BB %entry
|
||||
; CHECK: Successors according to CFG: BB#2(64) BB#4(14)
|
||||
; CHECK: BB#4: derived from LLVM BB %entry
|
||||
; CHECK: Successors according to CFG: BB#1(10) BB#5(4)
|
||||
; CHECK: Successors according to CFG: BB#1(4) BB#5(10)
|
||||
; CHECK: BB#5: derived from LLVM BB %entry
|
||||
; CHECK: Successors according to CFG: BB#1(4) BB#3(7)
|
||||
; CHECK: Successors according to CFG: BB#1(10) BB#3(7)
|
||||
|
||||
sw.bb:
|
||||
br label %return
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
@ -45,4 +45,37 @@ if.end3: ; preds = %if.then, %if.then2,
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
|
||||
@.str0 = private unnamed_addr constant [2 x i8] c"a\00"
|
||||
@.str1 = private unnamed_addr constant [2 x i8] c"b\00"
|
||||
@.str2 = private unnamed_addr constant [2 x i8] c"c\00"
|
||||
@.str3 = private unnamed_addr constant [2 x i8] c"d\00"
|
||||
@.str4 = private unnamed_addr constant [2 x i8] c"e\00"
|
||||
define i8* @dont_assert(i32 %x) {
|
||||
; LLVM would assert due to moving an early return into the jump table block and
|
||||
; removing one of its predecessors despite that block ending with an indirect
|
||||
; branch.
|
||||
entry:
|
||||
switch i32 %x, label %sw.epilog [
|
||||
i32 1, label %return
|
||||
i32 2, label %sw.bb1
|
||||
i32 3, label %sw.bb2
|
||||
i32 4, label %sw.bb3
|
||||
i32 255, label %sw.bb4
|
||||
]
|
||||
sw.bb1: br label %return
|
||||
sw.bb2: br label %return
|
||||
sw.bb3: br label %return
|
||||
sw.bb4: br label %return
|
||||
sw.epilog: br label %return
|
||||
return:
|
||||
%retval.0 = phi i8* [ null, %sw.epilog ],
|
||||
[ getelementptr inbounds ([2 x i8], [2 x i8]* @.str4, i64 0, i64 0), %sw.bb4 ],
|
||||
[ getelementptr inbounds ([2 x i8], [2 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ],
|
||||
[ getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ],
|
||||
[ getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ],
|
||||
[ getelementptr inbounds ([2 x i8], [2 x i8]* @.str0, i64 0, i64 0), %entry ]
|
||||
ret i8* %retval.0
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
|
||||
; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
|
||||
; RUN: llc -mcpu=pwr7 -code-model=medium <%s | FileCheck %s
|
||||
; RUN: llc -mcpu=pwr7 -code-model=large <%s | FileCheck %s
|
||||
|
||||
; Test correct code generation for medium and large code model
|
||||
; for loading the address of a jump table from the TOC.
|
||||
|
@ -3,6 +3,12 @@
|
||||
; RUN: llc -O0 -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
|
||||
; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE %s
|
||||
|
||||
; Run jump table test separately since jump tables aren't generated at -O0.
|
||||
; RUN: llc -mcpu=pwr7 -code-model=medium -filetype=obj -fast-isel=false %s -o - | \
|
||||
; RUN: llvm-readobj -r | FileCheck -check-prefix=MEDIUM-JT %s
|
||||
; RUN: llc -mcpu=pwr7 -code-model=large -filetype=obj -fast-isel=false %s -o - | \
|
||||
; RUN: llvm-readobj -r | FileCheck -check-prefix=LARGE-JT %s
|
||||
|
||||
; FIXME: When asm-parse is available, could make this an assembly test.
|
||||
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
|
||||
@ -92,6 +98,46 @@ entry:
|
||||
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM4:[^ ]+]]
|
||||
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM4]]
|
||||
|
||||
@ti = common global i32 0, align 4
|
||||
|
||||
define signext i32 @test_tentative() nounwind {
|
||||
entry:
|
||||
%0 = load i32, i32* @ti, align 4
|
||||
%inc = add nsw i32 %0, 1
|
||||
store i32 %inc, i32* @ti, align 4
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
|
||||
; accessing tentatively declared variable ti.
|
||||
;
|
||||
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
|
||||
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
|
||||
;
|
||||
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
|
||||
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
|
||||
|
||||
define i8* @test_fnaddr() nounwind {
|
||||
entry:
|
||||
%func = alloca i32 (i32)*, align 8
|
||||
store i32 (i32)* @foo, i32 (i32)** %func, align 8
|
||||
%0 = load i32 (i32)*, i32 (i32)** %func, align 8
|
||||
%1 = bitcast i32 (i32)* %0 to i8*
|
||||
ret i8* %1
|
||||
}
|
||||
|
||||
declare signext i32 @foo(i32 signext)
|
||||
|
||||
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
|
||||
; accessing function address foo.
|
||||
;
|
||||
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
|
||||
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
|
||||
;
|
||||
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
|
||||
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
|
||||
|
||||
|
||||
define signext i32 @test_jump_table(i32 signext %i) nounwind {
|
||||
entry:
|
||||
%i.addr = alloca i32, align 4
|
||||
@ -139,47 +185,12 @@ sw.epilog: ; preds = %sw.bb3, %sw.default
|
||||
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
|
||||
; accessing a jump table address.
|
||||
;
|
||||
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
|
||||
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
|
||||
; MEDIUM-JT: Relocations [
|
||||
; MEDIUM-JT: Section ({{.*}}) .rela.text {
|
||||
; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
|
||||
; MEDIUM-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]
|
||||
;
|
||||
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM5:[^ ]+]]
|
||||
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM5]]
|
||||
|
||||
@ti = common global i32 0, align 4
|
||||
|
||||
define signext i32 @test_tentative() nounwind {
|
||||
entry:
|
||||
%0 = load i32, i32* @ti, align 4
|
||||
%inc = add nsw i32 %0, 1
|
||||
store i32 %inc, i32* @ti, align 4
|
||||
ret i32 %0
|
||||
}
|
||||
|
||||
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
|
||||
; accessing tentatively declared variable ti.
|
||||
;
|
||||
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
|
||||
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
|
||||
;
|
||||
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM6:[^ ]+]]
|
||||
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM6]]
|
||||
|
||||
define i8* @test_fnaddr() nounwind {
|
||||
entry:
|
||||
%func = alloca i32 (i32)*, align 8
|
||||
store i32 (i32)* @foo, i32 (i32)** %func, align 8
|
||||
%0 = load i32 (i32)*, i32 (i32)** %func, align 8
|
||||
%1 = bitcast i32 (i32)* %0 to i8*
|
||||
ret i8* %1
|
||||
}
|
||||
|
||||
declare signext i32 @foo(i32 signext)
|
||||
|
||||
; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
|
||||
; accessing function address foo.
|
||||
;
|
||||
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
|
||||
; MEDIUM-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
|
||||
;
|
||||
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM7:[^ ]+]]
|
||||
; LARGE-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM7]]
|
||||
; LARGE-JT: Relocations [
|
||||
; LARGE-JT: Section ({{.*}}) .rela.text {
|
||||
; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_HA [[SYM:[^ ]+]]
|
||||
; LARGE-JT-NEXT: 0x{{[0-9,A-F]+}} R_PPC64_TOC16_LO_DS [[SYM]]
|
||||
|
@ -55,13 +55,15 @@ entry:
|
||||
]
|
||||
|
||||
bb: ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
|
||||
call void @_Z3bari( i32 0 )
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb, %entry
|
||||
call void @_Z3bari( i32 1 )
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb1, %entry
|
||||
call void @_Z3bari( i32 1 )
|
||||
call void @_Z3bari( i32 2 )
|
||||
br label %bb11
|
||||
|
||||
bb3: ; preds = %entry
|
||||
|
@ -140,19 +140,17 @@ sw.epilog:
|
||||
|
||||
; The balanced binary switch here would start with a comparison against 39, but
|
||||
; it is currently starting with 29 because of the density-sum heuristic.
|
||||
; CHECK: cmpl $29
|
||||
; CHECK: cmpl $39
|
||||
; CHECK: jg
|
||||
; CHECK: cmpl $10
|
||||
; CHECK: jne
|
||||
; CHECK: cmpl $49
|
||||
; CHECK: jg
|
||||
; CHECK: cmpl $30
|
||||
; CHECK: jne
|
||||
; CHECK: je
|
||||
; CHECK: cmpl $20
|
||||
; CHECK: jne
|
||||
; CHECK: cmpl $40
|
||||
; CHECK: je
|
||||
; CHECK: cmpl $50
|
||||
; CHECK: jne
|
||||
; CHECK: cmpl $40
|
||||
; CHECK: cmpl $30
|
||||
; CHECK: jne
|
||||
; CHECK: cmpl $60
|
||||
; CHECK: jne
|
||||
|
306
test/CodeGen/X86/switch.ll
Normal file
306
test/CodeGen/X86/switch.ll
Normal file
@ -0,0 +1,306 @@
|
||||
; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s
|
||||
; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -O0 | FileCheck --check-prefix=NOOPT %s
|
||||
|
||||
declare void @g(i32)
|
||||
|
||||
define void @basic(i32 %x) {
|
||||
entry:
|
||||
switch i32 %x, label %return [
|
||||
i32 3, label %bb0
|
||||
i32 1, label %bb1
|
||||
i32 4, label %bb1
|
||||
i32 5, label %bb0
|
||||
]
|
||||
bb0: tail call void @g(i32 0) br label %return
|
||||
bb1: tail call void @g(i32 1) br label %return
|
||||
return: ret void
|
||||
|
||||
; Should be lowered as straight compares in -O0 mode.
|
||||
; NOOPT-LABEL: basic
|
||||
; NOOPT: subl $3, %eax
|
||||
; NOOPT: je
|
||||
; NOOPT: subl $1, %eax
|
||||
; NOOPT: je
|
||||
; NOOPT: subl $4, %eax
|
||||
; NOOPT: je
|
||||
; NOOPT: subl $5, %eax
|
||||
; NOOPT: je
|
||||
|
||||
; Jump table otherwise.
|
||||
; CHECK-LABEL: basic
|
||||
; CHECK: decl
|
||||
; CHECK: cmpl $4
|
||||
; CHECK: ja
|
||||
; CHECK: jmpq *.LJTI
|
||||
}
|
||||
|
||||
|
||||
define void @simple_ranges(i32 %x) {
|
||||
entry:
|
||||
switch i32 %x, label %return [
|
||||
i32 0, label %bb0
|
||||
i32 1, label %bb0
|
||||
i32 2, label %bb0
|
||||
i32 3, label %bb0
|
||||
i32 100, label %bb1
|
||||
i32 101, label %bb1
|
||||
i32 102, label %bb1
|
||||
i32 103, label %bb1
|
||||
]
|
||||
bb0: tail call void @g(i32 0) br label %return
|
||||
bb1: tail call void @g(i32 1) br label %return
|
||||
return: ret void
|
||||
|
||||
; Should be lowered to two range checks.
|
||||
; CHECK-LABEL: simple_ranges
|
||||
; CHECK: leal -100
|
||||
; CHECK: cmpl $4
|
||||
; CHECK: jae
|
||||
; CHECK: cmpl $3
|
||||
; CHECK: ja
|
||||
}
|
||||
|
||||
|
||||
define void @jt_is_better(i32 %x) {
|
||||
entry:
|
||||
switch i32 %x, label %return [
|
||||
i32 0, label %bb0
|
||||
i32 2, label %bb0
|
||||
i32 4, label %bb0
|
||||
i32 1, label %bb1
|
||||
i32 3, label %bb1
|
||||
i32 5, label %bb1
|
||||
|
||||
i32 6, label %bb2
|
||||
i32 7, label %bb3
|
||||
i32 8, label %bb4
|
||||
]
|
||||
bb0: tail call void @g(i32 0) br label %return
|
||||
bb1: tail call void @g(i32 1) br label %return
|
||||
bb2: tail call void @g(i32 2) br label %return
|
||||
bb3: tail call void @g(i32 3) br label %return
|
||||
bb4: tail call void @g(i32 4) br label %return
|
||||
return: ret void
|
||||
|
||||
; Cases 0-5 could be lowered with two bit tests,
|
||||
; but with 6-8, the whole switch is suitable for a jump table.
|
||||
; CHECK-LABEL: jt_is_better
|
||||
; CHECK: cmpl $8
|
||||
; CHECK: jbe
|
||||
; CHECK: jmpq *.LJTI
|
||||
}
|
||||
|
||||
|
||||
define void @bt_is_better(i32 %x) {
|
||||
entry:
|
||||
switch i32 %x, label %return [
|
||||
i32 0, label %bb0
|
||||
i32 3, label %bb0
|
||||
i32 6, label %bb0
|
||||
i32 1, label %bb1
|
||||
i32 4, label %bb1
|
||||
i32 7, label %bb1
|
||||
i32 2, label %bb2
|
||||
i32 5, label %bb2
|
||||
i32 8, label %bb2
|
||||
|
||||
]
|
||||
bb0: tail call void @g(i32 0) br label %return
|
||||
bb1: tail call void @g(i32 1) br label %return
|
||||
bb2: tail call void @g(i32 2) br label %return
|
||||
return: ret void
|
||||
|
||||
; This could be lowered as a jump table, but bit tests is more efficient.
|
||||
; CHECK-LABEL: bt_is_better
|
||||
; 73 = 2^0 + 2^3 + 2^6
|
||||
; CHECK: movl $73
|
||||
; CHECK: btl
|
||||
; 146 = 2^1 + 2^4 + 2^7
|
||||
; CHECK: movl $146
|
||||
; CHECK: btl
|
||||
; 292 = 2^2 + 2^5 + 2^8
|
||||
; CHECK: movl $292
|
||||
; CHECK: btl
|
||||
}
|
||||
|
||||
|
||||
define void @optimal_pivot1(i32 %x) {
|
||||
entry:
|
||||
switch i32 %x, label %return [
|
||||
i32 100, label %bb0
|
||||
i32 200, label %bb1
|
||||
i32 300, label %bb0
|
||||
i32 400, label %bb1
|
||||
i32 500, label %bb0
|
||||
i32 600, label %bb1
|
||||
|
||||
]
|
||||
bb0: tail call void @g(i32 0) br label %return
|
||||
bb1: tail call void @g(i32 1) br label %return
|
||||
return: ret void
|
||||
|
||||
; Should pivot around 400 for two subtrees of equal size.
|
||||
; CHECK-LABEL: optimal_pivot1
|
||||
; CHECK-NOT: cmpl
|
||||
; CHECK: cmpl $399
|
||||
}
|
||||
|
||||
|
||||
define void @optimal_pivot2(i32 %x) {
|
||||
entry:
|
||||
switch i32 %x, label %return [
|
||||
i32 100, label %bb0 i32 101, label %bb1 i32 102, label %bb2 i32 103, label %bb3
|
||||
i32 200, label %bb0 i32 201, label %bb1 i32 202, label %bb2 i32 203, label %bb3
|
||||
i32 300, label %bb0 i32 301, label %bb1 i32 302, label %bb2 i32 303, label %bb3
|
||||
i32 400, label %bb0 i32 401, label %bb1 i32 402, label %bb2 i32 403, label %bb3
|
||||
|
||||
]
|
||||
bb0: tail call void @g(i32 0) br label %return
|
||||
bb1: tail call void @g(i32 1) br label %return
|
||||
bb2: tail call void @g(i32 2) br label %return
|
||||
bb3: tail call void @g(i32 3) br label %return
|
||||
return: ret void
|
||||
|
||||
; Should pivot around 300 for two subtrees with two jump tables each.
|
||||
; CHECK-LABEL: optimal_pivot2
|
||||
; CHECK-NOT: cmpl
|
||||
; CHECK: cmpl $299
|
||||
; CHECK: jmpq *.LJTI
|
||||
; CHECK: jmpq *.LJTI
|
||||
; CHECK: jmpq *.LJTI
|
||||
; CHECK: jmpq *.LJTI
|
||||
}
|
||||
|
||||
|
||||
define void @optimal_jump_table1(i32 %x) {
|
||||
entry:
|
||||
switch i32 %x, label %return [
|
||||
i32 0, label %bb0
|
||||
i32 5, label %bb1
|
||||
i32 6, label %bb2
|
||||
i32 12, label %bb3
|
||||
i32 13, label %bb4
|
||||
i32 15, label %bb5
|
||||
]
|
||||
bb0: tail call void @g(i32 0) br label %return
|
||||
bb1: tail call void @g(i32 1) br label %return
|
||||
bb2: tail call void @g(i32 2) br label %return
|
||||
bb3: tail call void @g(i32 3) br label %return
|
||||
bb4: tail call void @g(i32 4) br label %return
|
||||
bb5: tail call void @g(i32 5) br label %return
|
||||
return: ret void
|
||||
|
||||
; Splitting in the largest gap (between 6 and 12) would yield suboptimal result.
|
||||
; Expecting a jump table from 5 to 15.
|
||||
; CHECK-LABEL: optimal_jump_table1
|
||||
; CHECK: leal -5
|
||||
; CHECK: cmpl $10
|
||||
; CHECK: jmpq *.LJTI
|
||||
}
|
||||
|
||||
|
||||
define void @optimal_jump_table2(i32 %x) {
|
||||
entry:
|
||||
switch i32 %x, label %return [
|
||||
i32 0, label %bb0
|
||||
i32 1, label %bb1
|
||||
i32 2, label %bb2
|
||||
i32 9, label %bb3
|
||||
i32 14, label %bb4
|
||||
i32 15, label %bb5
|
||||
]
|
||||
bb0: tail call void @g(i32 0) br label %return
|
||||
bb1: tail call void @g(i32 1) br label %return
|
||||
bb2: tail call void @g(i32 2) br label %return
|
||||
bb3: tail call void @g(i32 3) br label %return
|
||||
bb4: tail call void @g(i32 4) br label %return
|
||||
bb5: tail call void @g(i32 5) br label %return
|
||||
return: ret void
|
||||
|
||||
; Partitioning the cases to the minimum number of dense sets is not good enough.
|
||||
; This can be partitioned as {0,1,2,9},{14,15} or {0,1,2},{9,14,15}. The former
|
||||
; should be preferred. Expecting a table from 0-9.
|
||||
; CHECK-LABEL: optimal_jump_table2
|
||||
; CHECK: cmpl $9
|
||||
; CHECK: jmpq *.LJTI
|
||||
}
|
||||
|
||||
|
||||
define void @optimal_jump_table3(i32 %x) {
|
||||
entry:
|
||||
switch i32 %x, label %return [
|
||||
i32 1, label %bb0
|
||||
i32 2, label %bb1
|
||||
i32 3, label %bb2
|
||||
i32 10, label %bb3
|
||||
i32 13, label %bb0
|
||||
i32 14, label %bb1
|
||||
i32 15, label %bb2
|
||||
i32 20, label %bb3
|
||||
i32 25, label %bb4
|
||||
]
|
||||
bb0: tail call void @g(i32 0) br label %return
|
||||
bb1: tail call void @g(i32 1) br label %return
|
||||
bb2: tail call void @g(i32 2) br label %return
|
||||
bb3: tail call void @g(i32 3) br label %return
|
||||
bb4: tail call void @g(i32 4) br label %return
|
||||
return: ret void
|
||||
|
||||
; Splitting to maximize left-right density sum and gap size would split this
|
||||
; between 3 and 10, and then between 20 and 25. It's better to build a table
|
||||
; from 1-20.
|
||||
; CHECK-LABEL: optimal_jump_table3
|
||||
; CHECK: leal -1
|
||||
; CHECK: cmpl $19
|
||||
; CHECK: jmpq *.LJTI
|
||||
}
|
||||
|
||||
%struct.S = type { %struct.S*, i32 }
|
||||
define void @phi_node_trouble(%struct.S* %s) {
|
||||
entry:
|
||||
br label %header
|
||||
header:
|
||||
%ptr = phi %struct.S* [ %s, %entry ], [ %next, %loop ]
|
||||
%bool = icmp eq %struct.S* %ptr, null
|
||||
br i1 %bool, label %exit, label %loop
|
||||
loop:
|
||||
%nextptr = getelementptr inbounds %struct.S, %struct.S* %ptr, i64 0, i32 0
|
||||
%next = load %struct.S*, %struct.S** %nextptr
|
||||
%xptr = getelementptr inbounds %struct.S, %struct.S* %next, i64 0, i32 1
|
||||
%x = load i32, i32* %xptr
|
||||
switch i32 %x, label %exit [
|
||||
i32 4, label %header
|
||||
i32 36, label %exit2
|
||||
i32 69, label %exit2
|
||||
i32 25, label %exit2
|
||||
]
|
||||
exit:
|
||||
ret void
|
||||
exit2:
|
||||
ret void
|
||||
|
||||
; This will be lowered to a comparison with 4 and then bit tests. Make sure
|
||||
; that the phi node in %header gets a value from the comparison block.
|
||||
; CHECK-LABEL: phi_node_trouble
|
||||
; CHECK: movq (%[[REG1:[a-z]+]]), %[[REG1]]
|
||||
; CHECK: movl 8(%[[REG1]]), %[[REG2:[a-z]+]]
|
||||
; CHECK: cmpl $4, %[[REG2]]
|
||||
}
|
||||
|
||||
|
||||
define void @default_only(i32 %x) {
|
||||
entry:
|
||||
br label %sw
|
||||
return:
|
||||
ret void
|
||||
sw:
|
||||
switch i32 %x, label %return [
|
||||
]
|
||||
|
||||
; Branch directly to the default.
|
||||
; (In optimized builds the switch is removed earlier.)
|
||||
; NOOPT-LABEL: default_only
|
||||
; NOOPT: .[[L:[A-Z0-9_]+]]:
|
||||
; NOOPT-NEXT: retq
|
||||
; NOOPT: jmp .[[L]]
|
||||
}
|
@ -1,8 +1,8 @@
|
||||
;; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 \
|
||||
;; RUN: llc -verify-machineinstrs \
|
||||
;; RUN: -mtriple=armv7-linux-gnueabi -filetype=obj %s -o - | \
|
||||
;; RUN: llvm-readobj -t | FileCheck -check-prefix=ARM %s
|
||||
|
||||
;; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 \
|
||||
;; RUN: llc -verify-machineinstrs \
|
||||
;; RUN: -mtriple=thumbv7-linux-gnueabi -filetype=obj %s -o - | \
|
||||
;; RUN: llvm-readobj -t | FileCheck -check-prefix=TMB %s
|
||||
|
||||
@ -11,102 +11,25 @@
|
||||
|
||||
define void @foo(i32* %ptr) nounwind ssp {
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
switch i32 %tmp, label %default [
|
||||
i32 11, label %bb0
|
||||
i32 10, label %bb1
|
||||
i32 8, label %bb2
|
||||
i32 4, label %bb3
|
||||
i32 2, label %bb4
|
||||
i32 6, label %bb5
|
||||
i32 9, label %bb6
|
||||
i32 15, label %bb7
|
||||
i32 1, label %bb8
|
||||
i32 3, label %bb9
|
||||
i32 5, label %bb10
|
||||
i32 30, label %bb11
|
||||
i32 31, label %bb12
|
||||
i32 13, label %bb13
|
||||
i32 14, label %bb14
|
||||
i32 20, label %bb15
|
||||
i32 19, label %bb16
|
||||
i32 17, label %bb17
|
||||
i32 18, label %bb18
|
||||
i32 21, label %bb19
|
||||
i32 22, label %bb20
|
||||
i32 16, label %bb21
|
||||
i32 24, label %bb22
|
||||
i32 25, label %bb23
|
||||
i32 26, label %bb24
|
||||
i32 27, label %bb25
|
||||
i32 28, label %bb26
|
||||
i32 23, label %bb27
|
||||
i32 12, label %bb28
|
||||
switch i32 %tmp, label %exit [
|
||||
i32 0, label %bb0
|
||||
i32 1, label %bb1
|
||||
i32 2, label %bb2
|
||||
i32 3, label %bb3
|
||||
]
|
||||
|
||||
default:
|
||||
br label %exit
|
||||
bb0:
|
||||
store i32 0, i32* %ptr, align 4
|
||||
br label %exit
|
||||
bb1:
|
||||
store i32 1, i32* %ptr, align 4
|
||||
br label %exit
|
||||
bb2:
|
||||
store i32 2, i32* %ptr, align 4
|
||||
br label %exit
|
||||
bb3:
|
||||
store i32 3, i32* %ptr, align 4
|
||||
br label %exit
|
||||
bb4:
|
||||
br label %exit
|
||||
bb5:
|
||||
br label %exit
|
||||
bb6:
|
||||
br label %exit
|
||||
bb7:
|
||||
br label %exit
|
||||
bb8:
|
||||
br label %exit
|
||||
bb9:
|
||||
br label %exit
|
||||
bb10:
|
||||
br label %exit
|
||||
bb11:
|
||||
br label %exit
|
||||
bb12:
|
||||
br label %exit
|
||||
bb13:
|
||||
br label %exit
|
||||
bb14:
|
||||
br label %exit
|
||||
bb15:
|
||||
br label %exit
|
||||
bb16:
|
||||
br label %exit
|
||||
bb17:
|
||||
br label %exit
|
||||
bb18:
|
||||
br label %exit
|
||||
bb19:
|
||||
br label %exit
|
||||
bb20:
|
||||
br label %exit
|
||||
bb21:
|
||||
br label %exit
|
||||
bb22:
|
||||
br label %exit
|
||||
bb23:
|
||||
br label %exit
|
||||
bb24:
|
||||
br label %exit
|
||||
bb25:
|
||||
br label %exit
|
||||
bb26:
|
||||
br label %exit
|
||||
bb27:
|
||||
br label %exit
|
||||
bb28:
|
||||
br label %exit
|
||||
|
||||
|
||||
exit:
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user