mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 21:32:10 +00:00
Don't break the IV update in TLI::SimplifySetCC().
LSR always tries to make the ICmp in the loop latch use the incremented induction variable. This allows the induction variable to be kept in a single register. When the induction variable limit is equal to the stride, SimplifySetCC() would break LSR's hard work by transforming: (icmp (add iv, stride), stride) --> (cmp iv, 0) This forced us to use lea for the IC update, preventing the simpler incl+cmp. <rdar://problem/7643606> <rdar://problem/11184260> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154119 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
036ebfd874
commit
740cd657f3
@ -2471,6 +2471,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
|
||||
}
|
||||
}
|
||||
|
||||
// If RHS is a legal immediate value for a compare instruction, we need
|
||||
// to be careful about increasing register pressure needlessly.
|
||||
bool LegalRHSImm = false;
|
||||
|
||||
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
|
||||
if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
|
||||
// Turn (X+C1) == C2 --> X == C2-C1
|
||||
@ -2505,25 +2509,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
|
||||
Cond);
|
||||
}
|
||||
}
|
||||
|
||||
// Could RHSC fold directly into a compare?
|
||||
if (RHSC->getValueType(0).getSizeInBits() <= 64)
|
||||
LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
|
||||
}
|
||||
|
||||
// Simplify (X+Z) == X --> Z == 0
|
||||
if (N0.getOperand(0) == N1)
|
||||
return DAG.getSetCC(dl, VT, N0.getOperand(1),
|
||||
DAG.getConstant(0, N0.getValueType()), Cond);
|
||||
if (N0.getOperand(1) == N1) {
|
||||
if (DAG.isCommutativeBinOp(N0.getOpcode()))
|
||||
return DAG.getSetCC(dl, VT, N0.getOperand(0),
|
||||
DAG.getConstant(0, N0.getValueType()), Cond);
|
||||
else if (N0.getNode()->hasOneUse()) {
|
||||
assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
|
||||
// (Z-X) == X --> Z == X<<1
|
||||
SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
|
||||
N1,
|
||||
// Don't do this if X is an immediate that can fold into a cmp
|
||||
// instruction and X+Z has other uses. It could be an induction variable
|
||||
// chain, and the transform would increase register pressure.
|
||||
if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
|
||||
if (N0.getOperand(0) == N1)
|
||||
return DAG.getSetCC(dl, VT, N0.getOperand(1),
|
||||
DAG.getConstant(0, N0.getValueType()), Cond);
|
||||
if (N0.getOperand(1) == N1) {
|
||||
if (DAG.isCommutativeBinOp(N0.getOpcode()))
|
||||
return DAG.getSetCC(dl, VT, N0.getOperand(0),
|
||||
DAG.getConstant(0, N0.getValueType()), Cond);
|
||||
else if (N0.getNode()->hasOneUse()) {
|
||||
assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
|
||||
// (Z-X) == X --> Z == X<<1
|
||||
SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
|
||||
DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
|
||||
if (!DCI.isCalledByLegalizer())
|
||||
DCI.AddToWorklist(SH.getNode());
|
||||
return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
|
||||
if (!DCI.isCalledByLegalizer())
|
||||
DCI.AddToWorklist(SH.getNode());
|
||||
return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3,11 +3,6 @@
|
||||
|
||||
; This now reduces to a single induction variable.
|
||||
|
||||
; TODO: It still gets a GPR shuffle at the end of the loop
|
||||
; This is because something in instruction selection has decided
|
||||
; that comparing the pre-incremented value with zero is better
|
||||
; than comparing the post-incremented value with -4.
|
||||
|
||||
@G = external global i32 ; <i32*> [#uses=2]
|
||||
@array = external global i32* ; <i32**> [#uses=1]
|
||||
|
||||
@ -20,9 +15,9 @@ entry:
|
||||
|
||||
bb: ; preds = %bb, %entry
|
||||
; CHECK: LBB0_1:
|
||||
; CHECK: cmp [[R2:r[0-9]+]], #0
|
||||
; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], [[R2]], #1
|
||||
; CHECK: mov [[R2]], [[REGISTER]]
|
||||
; CHECK: subs [[R2:r[0-9]+]], #1
|
||||
; CHECK: cmp.w [[R2]], #-1
|
||||
; CHECK: bne LBB0_1
|
||||
|
||||
%0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1]
|
||||
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
|
||||
|
@ -1,5 +1,6 @@
|
||||
; RUN: llc -mtriple=x86_64-darwin < %s | FileCheck %s
|
||||
|
||||
; CHECK: t:
|
||||
; CHECK: decq
|
||||
; CHECK-NEXT: movl (
|
||||
; CHECK-NEXT: jne
|
||||
@ -136,3 +137,44 @@ bb2: ; preds = %bb
|
||||
store i8 %92, i8* %93, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that DAGCombiner doesn't mess up the IV update when the exiting value
|
||||
; is equal to the stride.
|
||||
; It must not fold (cmp (add iv, 1), 1) --> (cmp iv, 0).
|
||||
|
||||
; CHECK: f:
|
||||
; CHECK: %for.body
|
||||
; CHECK: incl [[IV:%e..]]
|
||||
; CHECK: cmpl $1, [[IV]]
|
||||
; CHECK: jne
|
||||
; CHECK: ret
|
||||
|
||||
define i32 @f(i32 %i, i32* nocapture %a) nounwind uwtable readonly ssp {
|
||||
entry:
|
||||
%cmp4 = icmp eq i32 %i, 1
|
||||
br i1 %cmp4, label %for.end, label %for.body.lr.ph
|
||||
|
||||
for.body.lr.ph: ; preds = %entry
|
||||
%0 = sext i32 %i to i64
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.lr.ph, %for.body
|
||||
%indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
|
||||
%bi.06 = phi i32 [ 0, %for.body.lr.ph ], [ %i.addr.0.bi.0, %for.body ]
|
||||
%b.05 = phi i32 [ 0, %for.body.lr.ph ], [ %.b.0, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
|
||||
%1 = load i32* %arrayidx, align 4
|
||||
%cmp1 = icmp ugt i32 %1, %b.05
|
||||
%.b.0 = select i1 %cmp1, i32 %1, i32 %b.05
|
||||
%2 = trunc i64 %indvars.iv to i32
|
||||
%i.addr.0.bi.0 = select i1 %cmp1, i32 %2, i32 %bi.06
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 1
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
%bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
|
||||
ret i32 %bi.0.lcssa
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user