Cmp peephole optimization isn't always safe for signed arithmetics.

int tries = INT_MAX;    
while (tries > 0) {
      tries--;
}

The check should be:
        subs    r4, #1
        cmp     r4, #0
        bgt     LBB0_1

The subs can set the overflow V bit when r4 is INT_MAX+1 (which loop
canonicalization apparently does in this case). cmp #0 would have cleared
it while not changing the N and Z bits. Since BGT is dependent on the V
bit, i.e. (N == V) && !Z, it is not safe to eliminate the cmp #0.

rdar://9172742


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128179 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2011-03-23 22:52:04 +00:00
parent b141099c14
commit 2c33915628
3 changed files with 88 additions and 5 deletions

View File

@ -1612,11 +1612,51 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
switch (MI->getOpcode()) {
default: break;
case ARM::ADDri:
case ARM::ANDri:
case ARM::t2ANDri:
case ARM::SUBri:
case ARM::t2ADDri:
case ARM::t2SUBri:
case ARM::t2SUBri: {
// Scan forward for the use of CPSR, if it's a conditional code requires
// checking of V bit, then this is not safe to do. If we can't find the
// CPSR use (i.e. used in another block), then it's not safe to perform
// the optimization.
bool isSafe = false;
I = CmpInstr;
E = MI->getParent()->end();
while (!isSafe && ++I != E) {
const MachineInstr &Instr = *I;
for (unsigned IO = 0, EO = Instr.getNumOperands();
!isSafe && IO != EO; ++IO) {
const MachineOperand &MO = Instr.getOperand(IO);
if (!MO.isReg() || MO.getReg() != ARM::CPSR)
continue;
if (MO.isDef()) {
isSafe = true;
break;
}
// Condition code is after the operand before CPSR.
ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
switch (CC) {
default:
isSafe = true;
break;
case ARMCC::VS:
case ARMCC::VC:
case ARMCC::GE:
case ARMCC::LT:
case ARMCC::GT:
case ARMCC::LE:
return false;
}
}
}
if (!isSafe)
return false;
// fallthrough
}
case ARM::ANDri:
case ARM::t2ANDri:
// Toggle the optional operand to CPSR.
MI->getOperand(5).setReg(ARM::CPSR);
MI->getOperand(5).setIsDef(true);

View File

@ -0,0 +1,41 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
; subs r4, #1
; cmp r4, 0
; bgt
; cmp cannot be optimized away since it will clear the overflow bit.
; gt / ge, lt, le conditions all depend on V bit.
; rdar://9172742
define i32 @t() nounwind {
; CHECK: t:
entry:
br label %bb2
bb: ; preds = %bb2
%0 = tail call i32 @rand() nounwind
%1 = icmp eq i32 %0, 50
br i1 %1, label %bb3, label %bb1
bb1: ; preds = %bb
%tmp = tail call i32 @puts() nounwind
%indvar.next = add i32 %indvar, 1
br label %bb2
bb2: ; preds = %bb1, %entry
; CHECK: bb2
; CHECK: subs [[REG:r[0-9]+]], #1
; CHECK: cmp [[REG]], #0
; CHECK: bgt
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
%tries.0 = sub i32 2147483647, %indvar
%tmp1 = icmp sgt i32 %tries.0, 0
br i1 %tmp1, label %bb, label %bb3
bb3: ; preds = %bb2, %bb
ret i32 0
}
declare i32 @rand()
declare i32 @puts() nounwind

View File

@ -24,7 +24,8 @@ define i32 @f2(i64 %x, i64 %y) {
; CHECK: f2
; CHECK: lsr{{.*}}r2
; CHECK-NEXT: rsb r3, r2, #32
; CHECK-NEXT: subs r2, r2, #32
; CHECK-NEXT: sub r2, r2, #32
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: orr r0, r0, r1, lsl r3
; CHECK-NEXT: asrge r0, r1, r2
%a = ashr i64 %x, %y
@ -36,7 +37,8 @@ define i32 @f3(i64 %x, i64 %y) {
; CHECK: f3
; CHECK: lsr{{.*}}r2
; CHECK-NEXT: rsb r3, r2, #32
; CHECK-NEXT: subs r2, r2, #32
; CHECK-NEXT: sub r2, r2, #32
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: orr r0, r0, r1, lsl r3
; CHECK-NEXT: lsrge r0, r1, r2
%a = lshr i64 %x, %y