[FastISel][X86] Use INC/DEC when possible for {sadd|ssub}.with.overflow intrinsics.

This is a small peephole optimization to emit INC/DEC when possible.

Fixes <rdar://problem/17952308>.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@215230 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Juergen Ributzka 2014-08-08 17:21:37 +00:00
parent 6d56b950a9
commit cbda4b32c6
2 changed files with 83 additions and 31 deletions

View File

@ -2374,15 +2374,19 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
isCommutativeIntrinsic(II))
std::swap(LHS, RHS);
bool UseIncDec = false;
if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
UseIncDec = true;
unsigned BaseOpc, CondOpc;
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
BaseOpc = UseIncDec ? X86ISD::INC : ISD::ADD; CondOpc = X86::SETOr; break;
case Intrinsic::uadd_with_overflow:
BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
case Intrinsic::ssub_with_overflow:
BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
BaseOpc = UseIncDec ? X86ISD::DEC : ISD::SUB; CondOpc = X86::SETOr; break;
case Intrinsic::usub_with_overflow:
BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
case Intrinsic::smul_with_overflow:
@ -2398,9 +2402,24 @@ bool X86FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
unsigned ResultReg = 0;
// Check if we have an immediate version.
if (auto const *C = dyn_cast<ConstantInt>(RHS)) {
if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
static const unsigned Opc[2][2][4] = {
{ { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
{ X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r } },
{ { X86::INC8r, X86::INC64_16r, X86::INC64_32r, X86::INC64r },
{ X86::DEC8r, X86::DEC64_16r, X86::DEC64_32r, X86::DEC64r } }
};
if (UseIncDec) {
ResultReg = createResultReg(TLI.getRegClassFor(VT));
bool Is64Bit = Subtarget->is64Bit();
bool IsDec = BaseOpc == X86ISD::DEC;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc[Is64Bit][IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
.addReg(LHSReg, getKillRegState(LHSIsKill));
} else
ResultReg = FastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
C->getZExtValue());
CI->getZExtValue());
}
unsigned RHSReg;

View File

@ -67,16 +67,48 @@ entry:
ret i1 %obit
}
; SADDO reg, imm | imm, reg
; FIXME: INC isn't supported in FastISel yet
define zeroext i1 @saddo.i64imm1(i64 %v1, i64* %res) {
; SADDO reg, 1 | INC
define zeroext i1 @saddo.inc.i8(i8 %v1, i8* %res) {
entry:
; DAG-LABEL: saddo.i64imm1
; DAG: incq %rdi
; DAG-NEXT: seto %al
; FAST-LABEL: saddo.i64imm1
; FAST: addq $1, %rdi
; FAST-NEXT: seto %al
; CHECK-LABEL: saddo.inc.i8
; CHECK: incb %dil
; CHECK-NEXT: seto %al
%t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 %v1, i8 1)
%val = extractvalue {i8, i1} %t, 0
%obit = extractvalue {i8, i1} %t, 1
store i8 %val, i8* %res
ret i1 %obit
}
define zeroext i1 @saddo.inc.i16(i16 %v1, i16* %res) {
entry:
; CHECK-LABEL: saddo.inc.i16
; CHECK: incw %di
; CHECK-NEXT: seto %al
%t = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 %v1, i16 1)
%val = extractvalue {i16, i1} %t, 0
%obit = extractvalue {i16, i1} %t, 1
store i16 %val, i16* %res
ret i1 %obit
}
define zeroext i1 @saddo.inc.i32(i32 %v1, i32* %res) {
entry:
; CHECK-LABEL: saddo.inc.i32
; CHECK: incl %edi
; CHECK-NEXT: seto %al
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 1)
%val = extractvalue {i32, i1} %t, 0
%obit = extractvalue {i32, i1} %t, 1
store i32 %val, i32* %res
ret i1 %obit
}
define zeroext i1 @saddo.inc.i64(i64 %v1, i64* %res) {
entry:
; CHECK-LABEL: saddo.inc.i64
; CHECK: incq %rdi
; CHECK-NEXT: seto %al
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 1)
%val = extractvalue {i64, i1} %t, 0
%obit = extractvalue {i64, i1} %t, 1
@ -84,17 +116,18 @@ entry:
ret i1 %obit
}
; SADDO reg, imm | imm, reg
; FIXME: DAG doesn't optimize immediates on the LHS.
define zeroext i1 @saddo.i64imm2(i64 %v1, i64* %res) {
define zeroext i1 @saddo.i64imm1(i64 %v1, i64* %res) {
entry:
; DAG-LABEL: saddo.i64imm2
; DAG-LABEL: saddo.i64imm1
; DAG: mov
; DAG-NEXT: addq
; DAG-NEXT: seto
; FAST-LABEL: saddo.i64imm2
; FAST: addq $1, %rdi
; FAST-LABEL: saddo.i64imm1
; FAST: addq $2, %rdi
; FAST-NEXT: seto %al
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 1, i64 %v1)
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 2, i64 %v1)
%val = extractvalue {i64, i1} %t, 0
%obit = extractvalue {i64, i1} %t, 1
store i64 %val, i64* %res
@ -102,12 +135,12 @@ entry:
}
; Check boundary conditions for large immediates.
define zeroext i1 @saddo.i64imm3(i64 %v1, i64* %res) {
define zeroext i1 @saddo.i64imm2(i64 %v1, i64* %res) {
entry:
; DAG-LABEL: saddo.i64imm3
; DAG-LABEL: saddo.i64imm2
; DAG: addq $-2147483648, %rdi
; DAG-NEXT: seto %al
; FAST-LABEL: saddo.i64imm3
; FAST-LABEL: saddo.i64imm2
; FAST: addq $-2147483648, %rdi
; FAST-NEXT: seto %al
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 -2147483648)
@ -117,13 +150,13 @@ entry:
ret i1 %obit
}
define zeroext i1 @saddo.i64imm4(i64 %v1, i64* %res) {
define zeroext i1 @saddo.i64imm3(i64 %v1, i64* %res) {
entry:
; DAG-LABEL: saddo.i64imm4
; DAG-LABEL: saddo.i64imm3
; DAG: movabsq $-21474836489, %[[REG:[a-z]+]]
; DAG-NEXT: addq %rdi, %[[REG]]
; DAG-NEXT: seto
; FAST-LABEL: saddo.i64imm4
; FAST-LABEL: saddo.i64imm3
; FAST: movabsq $-21474836489, %[[REG:[a-z]+]]
; FAST-NEXT: addq %rdi, %[[REG]]
; FAST-NEXT: seto
@ -134,12 +167,12 @@ entry:
ret i1 %obit
}
define zeroext i1 @saddo.i64imm5(i64 %v1, i64* %res) {
define zeroext i1 @saddo.i64imm4(i64 %v1, i64* %res) {
entry:
; DAG-LABEL: saddo.i64imm5
; DAG-LABEL: saddo.i64imm4
; DAG: addq $2147483647, %rdi
; DAG-NEXT: seto
; FAST-LABEL: saddo.i64imm5
; FAST-LABEL: saddo.i64imm4
; FAST: addq $2147483647, %rdi
; FAST-NEXT: seto
%t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 2147483647)
@ -150,13 +183,13 @@ entry:
}
; TODO: FastISel shouldn't use movabsq.
define zeroext i1 @saddo.i64imm6(i64 %v1, i64* %res) {
define zeroext i1 @saddo.i64imm5(i64 %v1, i64* %res) {
entry:
; DAG-LABEL: saddo.i64imm6
; DAG-LABEL: saddo.i64imm5
; DAG: movl $2147483648, %ecx
; DAG: addq %rdi, %rcx
; DAG-NEXT: seto
; FAST-LABEL: saddo.i64imm6
; FAST-LABEL: saddo.i64imm5
; FAST: movabsq $2147483648, %[[REG:[a-z]+]]
; FAST: addq %rdi, %[[REG]]
; FAST-NEXT: seto