[FastISel][AArch64] Fix and extend the tbz/tbnz pattern matching.

The pattern matching failed to recognize all instances of "-1", because when
comparing against "-1" we didn't use an APInt of the same bitwidth.

This commit fixes this and also adds inverse versions of the conditon to catch
more cases.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222722 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Juergen Ributzka 2014-11-25 04:16:15 +00:00
parent e5aa5ce5b2
commit a4ebd338c4
2 changed files with 176 additions and 21 deletions

View File

@ -2112,15 +2112,15 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
int TestBit = -1;
bool IsCmpNE;
if ((Predicate == CmpInst::ICMP_EQ) || (Predicate == CmpInst::ICMP_NE)) {
if (const auto *C = dyn_cast<Constant>(LHS))
if (C->isNullValue())
std::swap(LHS, RHS);
switch (Predicate) {
default:
return false;
case CmpInst::ICMP_EQ:
case CmpInst::ICMP_NE:
if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
std::swap(LHS, RHS);
if (!isa<Constant>(RHS))
return false;
if (!cast<Constant>(RHS)->isNullValue())
if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
return false;
if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
@ -2143,26 +2143,27 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
TestBit = 0;
IsCmpNE = Predicate == CmpInst::ICMP_NE;
} else if (Predicate == CmpInst::ICMP_SLT) {
if (!isa<Constant>(RHS))
return false;
if (!cast<Constant>(RHS)->isNullValue())
break;
case CmpInst::ICMP_SLT:
case CmpInst::ICMP_SGE:
if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
return false;
TestBit = BW - 1;
IsCmpNE = true;
} else if (Predicate == CmpInst::ICMP_SGT) {
IsCmpNE = Predicate == CmpInst::ICMP_SLT;
break;
case CmpInst::ICMP_SGT:
case CmpInst::ICMP_SLE:
if (!isa<ConstantInt>(RHS))
return false;
if (cast<ConstantInt>(RHS)->getValue() != -1)
if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
return false;
TestBit = BW - 1;
IsCmpNE = false;
} else
return false;
IsCmpNE = Predicate == CmpInst::ICMP_SLE;
break;
} // end switch
static const unsigned OpcTable[2][2][2] = {
{ {AArch64::CBZW, AArch64::CBZX },

View File

@ -1,5 +1,5 @@
; RUN: llc -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck %s
; RUN: llc -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK %s
; RUN: llc -fast-isel -fast-isel-abort -aarch64-atomic-cfg-tidy=0 -verify-machineinstrs -mtriple=aarch64-apple-darwin < %s | FileCheck --check-prefix=CHECK --check-prefix=FAST %s
define i32 @icmp_eq_i8(i8 zeroext %a) {
; CHECK-LABEL: icmp_eq_i8
@ -121,6 +121,160 @@ bb2:
ret i32 0
}
define i32 @icmp_slt_i8(i8 zeroext %a) {
; FAST-LABEL: icmp_slt_i8
; FAST: tbnz w0, #7, {{LBB.+_2}}
%1 = icmp slt i8 %a, 0
br i1 %1, label %bb1, label %bb2, !prof !0
bb1:
ret i32 1
bb2:
ret i32 0
}
define i32 @icmp_slt_i16(i16 zeroext %a) {
; FAST-LABEL: icmp_slt_i16
; FAST: tbnz w0, #15, {{LBB.+_2}}
%1 = icmp slt i16 %a, 0
br i1 %1, label %bb1, label %bb2, !prof !0
bb1:
ret i32 1
bb2:
ret i32 0
}
define i32 @icmp_slt_i32(i32 %a) {
; CHECK-LABEL: icmp_slt_i32
; CHECK: tbnz w0, #31, {{LBB.+_2}}
%1 = icmp slt i32 %a, 0
br i1 %1, label %bb1, label %bb2, !prof !0
bb1:
ret i32 1
bb2:
ret i32 0
}
define i32 @icmp_slt_i64(i64 %a) {
; CHECK-LABEL: icmp_slt_i64
; CHECK: tbnz x0, #63, {{LBB.+_2}}
%1 = icmp slt i64 %a, 0
br i1 %1, label %bb1, label %bb2, !prof !0
bb1:
ret i32 1
bb2:
ret i32 0
}
define i32 @icmp_sge_i8(i8 zeroext %a) {
; FAST-LABEL: icmp_sge_i8
; FAST: tbz w0, #7, {{LBB.+_2}}
%1 = icmp sge i8 %a, 0
br i1 %1, label %bb1, label %bb2, !prof !0
bb1:
ret i32 1
bb2:
ret i32 0
}
define i32 @icmp_sge_i16(i16 zeroext %a) {
; FAST-LABEL: icmp_sge_i16
; FAST: tbz w0, #15, {{LBB.+_2}}
%1 = icmp sge i16 %a, 0
br i1 %1, label %bb1, label %bb2, !prof !0
bb1:
ret i32 1
bb2:
ret i32 0
}
define i32 @icmp_sle_i8(i8 zeroext %a) {
; FAST-LABEL: icmp_sle_i8
; FAST: tbnz w0, #7, {{LBB.+_2}}
%1 = icmp sle i8 %a, -1
br i1 %1, label %bb1, label %bb2, !prof !0
bb1:
ret i32 1
bb2:
ret i32 0
}
define i32 @icmp_sle_i16(i16 zeroext %a) {
; FAST-LABEL: icmp_sle_i16
; FAST: tbnz w0, #15, {{LBB.+_2}}
%1 = icmp sle i16 %a, -1
br i1 %1, label %bb1, label %bb2, !prof !0
bb1:
ret i32 1
bb2:
ret i32 0
}
define i32 @icmp_sle_i32(i32 %a) {
; CHECK-LABEL: icmp_sle_i32
; CHECK: tbnz w0, #31, {{LBB.+_2}}
%1 = icmp sle i32 %a, -1
br i1 %1, label %bb1, label %bb2, !prof !0
bb1:
ret i32 1
bb2:
ret i32 0
}
define i32 @icmp_sle_i64(i64 %a) {
; CHECK-LABEL: icmp_sle_i64
; CHECK: tbnz x0, #63, {{LBB.+_2}}
%1 = icmp sle i64 %a, -1
br i1 %1, label %bb1, label %bb2, !prof !0
bb1:
ret i32 1
bb2:
ret i32 0
}
define i32 @icmp_sgt_i8(i8 zeroext %a) {
; FAST-LABEL: icmp_sgt_i8
; FAST: tbz w0, #7, {{LBB.+_2}}
%1 = icmp sgt i8 %a, -1
br i1 %1, label %bb1, label %bb2, !prof !0
bb1:
ret i32 1
bb2:
ret i32 0
}
define i32 @icmp_sgt_i16(i16 zeroext %a) {
; FAST-LABEL: icmp_sgt_i16
; FAST: tbz w0, #15, {{LBB.+_2}}
%1 = icmp sgt i16 %a, -1
br i1 %1, label %bb1, label %bb2, !prof !0
bb1:
ret i32 1
bb2:
ret i32 0
}
define i32 @icmp_sgt_i32(i32 %a) {
; CHECK-LABEL: icmp_sgt_i32
; CHECK: tbz w0, #31, {{LBB.+_2}}
%1 = icmp sgt i32 %a, -1
br i1 %1, label %bb1, label %bb2, !prof !0
bb1:
ret i32 1
bb2:
ret i32 0
}
define i32 @icmp_sgt_i64(i64 %a) {
; FAST-LABEL: icmp_sgt_i64
; FAST: tbz x0, #63, {{LBB.+_2}}
%1 = icmp sgt i64 %a, -1
br i1 %1, label %bb1, label %bb2, !prof !0
bb1:
ret i32 1
bb2:
ret i32 0
}
; Test that we don't fold the 'and' instruction into the compare.
define i32 @icmp_eq_and_i32(i32 %a, i1 %c) {
; CHECK-LABEL: icmp_eq_and_i32