mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-26 10:29:36 +00:00
Improve codegen for select's:
if (x != 0) x = 1 if (x == 1) x = 1 Previous codegen looks like this: mov r1, r0 cmp r1, #1 mov r0, #0 moveq r0, #1 The naive lowering select between two different values. It should recognize the test is equality test so it's more a conditional move rather than a select: cmp r0, #1 movne r0, #0 rdar://9758317 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@135017 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3641e81172
commit
e721f5c8d3
@ -2754,7 +2754,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDValue ARMcc;
|
||||
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
||||
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
|
||||
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
|
||||
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp);
|
||||
}
|
||||
|
||||
ARMCC::CondCodes CondCode, CondCode2;
|
||||
@ -6960,6 +6960,70 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
|
||||
}
|
||||
|
||||
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
|
||||
SDValue
|
||||
ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
|
||||
SDValue Cmp = N->getOperand(4);
|
||||
if (Cmp.getOpcode() != ARMISD::CMPZ)
|
||||
// Only looking at EQ and NE cases.
|
||||
return SDValue();
|
||||
|
||||
EVT VT = N->getValueType(0);
|
||||
DebugLoc dl = N->getDebugLoc();
|
||||
SDValue LHS = Cmp.getOperand(0);
|
||||
SDValue RHS = Cmp.getOperand(1);
|
||||
SDValue FalseVal = N->getOperand(0);
|
||||
SDValue TrueVal = N->getOperand(1);
|
||||
SDValue ARMcc = N->getOperand(2);
|
||||
ARMCC::CondCodes CC = (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
|
||||
|
||||
// Simplify
|
||||
// mov r1, r0
|
||||
// cmp r1, x
|
||||
// mov r0, y
|
||||
// moveq r0, x
|
||||
// to
|
||||
// cmp r0, x
|
||||
// movne r0, y
|
||||
//
|
||||
// mov r1, r0
|
||||
// cmp r1, x
|
||||
// mov r0, x
|
||||
// movne r0, y
|
||||
// to
|
||||
// cmp r0, x
|
||||
// movne r0, y
|
||||
/// FIXME: Turn this into a target neutral optimization?
|
||||
SDValue Res;
|
||||
if (CC == ARMCC::NE && FalseVal == RHS) {
|
||||
Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
|
||||
N->getOperand(3), Cmp);
|
||||
} else if (CC == ARMCC::EQ && TrueVal == RHS) {
|
||||
SDValue ARMcc;
|
||||
SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
|
||||
Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
|
||||
N->getOperand(3), NewCmp);
|
||||
}
|
||||
|
||||
if (Res.getNode()) {
|
||||
APInt KnownZero, KnownOne;
|
||||
APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
|
||||
DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne);
|
||||
// Capture demanded bits information that would be otherwise lost.
|
||||
if (KnownZero == 0xfffffffe)
|
||||
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
|
||||
DAG.getValueType(MVT::i1));
|
||||
else if (KnownZero == 0xffffff00)
|
||||
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
|
||||
DAG.getValueType(MVT::i8));
|
||||
else if (KnownZero == 0xffff0000)
|
||||
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
|
||||
DAG.getValueType(MVT::i16));
|
||||
}
|
||||
|
||||
return Res;
|
||||
}
|
||||
|
||||
SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
switch (N->getOpcode()) {
|
||||
@ -6988,6 +7052,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::ZERO_EXTEND:
|
||||
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
|
||||
case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
|
||||
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
|
||||
case ARMISD::VLD2DUP:
|
||||
case ARMISD::VLD3DUP:
|
||||
case ARMISD::VLD4DUP:
|
||||
|
@ -244,6 +244,7 @@ namespace llvm {
|
||||
EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||
MachineBasicBlock *MBB) const;
|
||||
|
||||
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
|
||||
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
||||
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
|
||||
|
@ -76,3 +76,39 @@ entry:
|
||||
%1 = select i1 %0, i32 4283826005, i32 %x
|
||||
ret i32 %1
|
||||
}
|
||||
|
||||
; rdar://9758317
|
||||
define i32 @t5(i32 %a) nounwind {
|
||||
entry:
|
||||
; ARM: t5:
|
||||
; ARM-NOT: mov
|
||||
; ARM: cmp r0, #1
|
||||
; ARM-NOT: mov
|
||||
; ARM: movne r0, #0
|
||||
|
||||
; THUMB2: t5:
|
||||
; THUMB2-NOT: mov
|
||||
; THUMB2: cmp r0, #1
|
||||
; THUMB2: it ne
|
||||
; THUMB2: movne r0, #0
|
||||
%cmp = icmp eq i32 %a, 1
|
||||
%conv = zext i1 %cmp to i32
|
||||
ret i32 %conv
|
||||
}
|
||||
|
||||
define i32 @t6(i32 %a) nounwind {
|
||||
entry:
|
||||
; ARM: t6:
|
||||
; ARM-NOT: mov
|
||||
; ARM: cmp r0, #0
|
||||
; ARM: movne r0, #1
|
||||
|
||||
; THUMB2: t6:
|
||||
; THUMB2-NOT: mov
|
||||
; THUMB2: cmp r0, #0
|
||||
; THUMB2: it ne
|
||||
; THUMB2: movne r0, #1
|
||||
%tobool = icmp ne i32 %a, 0
|
||||
%lnot.ext = zext i1 %tobool to i32
|
||||
ret i32 %lnot.ext
|
||||
}
|
||||
|
@ -3,15 +3,6 @@
|
||||
; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
|
||||
; test as 'mov.w r0, #0'. So far, that requires physreg joining.
|
||||
|
||||
; 0x000000bb = 187
|
||||
define i1 @f1(i32 %a) {
|
||||
%tmp = xor i32 %a, 187
|
||||
%tmp1 = icmp ne i32 %tmp, 0
|
||||
ret i1 %tmp1
|
||||
}
|
||||
; CHECK: f1:
|
||||
; CHECK: teq.w r0, #187
|
||||
|
||||
; 0x000000bb = 187
|
||||
define i1 @f2(i32 %a) {
|
||||
%tmp = xor i32 %a, 187
|
||||
@ -30,24 +21,6 @@ define i1 @f3(i32 %a) {
|
||||
; CHECK: f3:
|
||||
; CHECK: teq.w r0, #11141290
|
||||
|
||||
; 0x00aa00aa = 11141290
|
||||
define i1 @f4(i32 %a) {
|
||||
%tmp = xor i32 %a, 11141290
|
||||
%tmp1 = icmp ne i32 0, %tmp
|
||||
ret i1 %tmp1
|
||||
}
|
||||
; CHECK: f4:
|
||||
; CHECK: teq.w r0, #11141290
|
||||
|
||||
; 0xcc00cc00 = 3422604288
|
||||
define i1 @f5(i32 %a) {
|
||||
%tmp = xor i32 %a, 3422604288
|
||||
%tmp1 = icmp ne i32 %tmp, 0
|
||||
ret i1 %tmp1
|
||||
}
|
||||
; CHECK: f5:
|
||||
; CHECK: teq.w r0, #-872363008
|
||||
|
||||
; 0xcc00cc00 = 3422604288
|
||||
define i1 @f6(i32 %a) {
|
||||
%tmp = xor i32 %a, 3422604288
|
||||
@ -72,17 +45,6 @@ define i1 @f8(i32 %a) {
|
||||
%tmp1 = icmp ne i32 0, %tmp
|
||||
ret i1 %tmp1
|
||||
}
|
||||
; CHECK: f8:
|
||||
; CHECK: teq.w r0, #-572662307
|
||||
|
||||
; 0x00110000 = 1114112
|
||||
define i1 @f9(i32 %a) {
|
||||
%tmp = xor i32 %a, 1114112
|
||||
%tmp1 = icmp ne i32 %tmp, 0
|
||||
ret i1 %tmp1
|
||||
}
|
||||
; CHECK: f9:
|
||||
; CHECK: teq.w r0, #1114112
|
||||
|
||||
; 0x00110000 = 1114112
|
||||
define i1 @f10(i32 %a) {
|
||||
|
@ -3,14 +3,6 @@
|
||||
; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
|
||||
; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
|
||||
|
||||
define i1 @f1(i32 %a, i32 %b) {
|
||||
; CHECK: f1
|
||||
; CHECK: teq.w r0, r1
|
||||
%tmp = xor i32 %a, %b
|
||||
%tmp1 = icmp ne i32 %tmp, 0
|
||||
ret i1 %tmp1
|
||||
}
|
||||
|
||||
define i1 @f2(i32 %a, i32 %b) {
|
||||
; CHECK: f2
|
||||
; CHECK: teq.w r0, r1
|
||||
@ -19,14 +11,6 @@ define i1 @f2(i32 %a, i32 %b) {
|
||||
ret i1 %tmp1
|
||||
}
|
||||
|
||||
define i1 @f3(i32 %a, i32 %b) {
|
||||
; CHECK: f3
|
||||
; CHECK: teq.w r0, r1
|
||||
%tmp = xor i32 %a, %b
|
||||
%tmp1 = icmp ne i32 0, %tmp
|
||||
ret i1 %tmp1
|
||||
}
|
||||
|
||||
define i1 @f4(i32 %a, i32 %b) {
|
||||
; CHECK: f4
|
||||
; CHECK: teq.w r0, r1
|
||||
|
@ -3,15 +3,6 @@
|
||||
; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
|
||||
; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
|
||||
|
||||
; 0x000000bb = 187
|
||||
define i1 @f1(i32 %a) {
|
||||
%tmp = and i32 %a, 187
|
||||
%tmp1 = icmp ne i32 %tmp, 0
|
||||
ret i1 %tmp1
|
||||
}
|
||||
; CHECK: f1:
|
||||
; CHECK: tst.w r0, #187
|
||||
|
||||
; 0x000000bb = 187
|
||||
define i1 @f2(i32 %a) {
|
||||
%tmp = and i32 %a, 187
|
||||
@ -30,24 +21,6 @@ define i1 @f3(i32 %a) {
|
||||
; CHECK: f3:
|
||||
; CHECK: tst.w r0, #11141290
|
||||
|
||||
; 0x00aa00aa = 11141290
|
||||
define i1 @f4(i32 %a) {
|
||||
%tmp = and i32 %a, 11141290
|
||||
%tmp1 = icmp ne i32 0, %tmp
|
||||
ret i1 %tmp1
|
||||
}
|
||||
; CHECK: f4:
|
||||
; CHECK: tst.w r0, #11141290
|
||||
|
||||
; 0xcc00cc00 = 3422604288
|
||||
define i1 @f5(i32 %a) {
|
||||
%tmp = and i32 %a, 3422604288
|
||||
%tmp1 = icmp ne i32 %tmp, 0
|
||||
ret i1 %tmp1
|
||||
}
|
||||
; CHECK: f5:
|
||||
; CHECK: tst.w r0, #-872363008
|
||||
|
||||
; 0xcc00cc00 = 3422604288
|
||||
define i1 @f6(i32 %a) {
|
||||
%tmp = and i32 %a, 3422604288
|
||||
@ -66,24 +39,6 @@ define i1 @f7(i32 %a) {
|
||||
; CHECK: f7:
|
||||
; CHECK: tst.w r0, #-572662307
|
||||
|
||||
; 0xdddddddd = 3722304989
|
||||
define i1 @f8(i32 %a) {
|
||||
%tmp = and i32 %a, 3722304989
|
||||
%tmp1 = icmp ne i32 0, %tmp
|
||||
ret i1 %tmp1
|
||||
}
|
||||
; CHECK: f8:
|
||||
; CHECK: tst.w r0, #-572662307
|
||||
|
||||
; 0x00110000 = 1114112
|
||||
define i1 @f9(i32 %a) {
|
||||
%tmp = and i32 %a, 1114112
|
||||
%tmp1 = icmp ne i32 %tmp, 0
|
||||
ret i1 %tmp1
|
||||
}
|
||||
; CHECK: f9:
|
||||
; CHECK: tst.w r0, #1114112
|
||||
|
||||
; 0x00110000 = 1114112
|
||||
define i1 @f10(i32 %a) {
|
||||
%tmp = and i32 %a, 1114112
|
||||
|
@ -3,14 +3,6 @@
|
||||
; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
|
||||
; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
|
||||
|
||||
define i1 @f1(i32 %a, i32 %b) {
|
||||
; CHECK: f1:
|
||||
; CHECK: tst r0, r1
|
||||
%tmp = and i32 %a, %b
|
||||
%tmp1 = icmp ne i32 %tmp, 0
|
||||
ret i1 %tmp1
|
||||
}
|
||||
|
||||
define i1 @f2(i32 %a, i32 %b) {
|
||||
; CHECK: f2:
|
||||
; CHECK: tst r0, r1
|
||||
@ -19,14 +11,6 @@ define i1 @f2(i32 %a, i32 %b) {
|
||||
ret i1 %tmp1
|
||||
}
|
||||
|
||||
define i1 @f3(i32 %a, i32 %b) {
|
||||
; CHECK: f3:
|
||||
; CHECK: tst r0, r1
|
||||
%tmp = and i32 %a, %b
|
||||
%tmp1 = icmp ne i32 0, %tmp
|
||||
ret i1 %tmp1
|
||||
}
|
||||
|
||||
define i1 @f4(i32 %a, i32 %b) {
|
||||
; CHECK: f4:
|
||||
; CHECK: tst r0, r1
|
||||
|
Loading…
x
Reference in New Issue
Block a user