mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
AArch64/ARM64: make use of ANDS and BICS instructions for comparisons.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@206888 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c499ecd1d1
commit
8b36f98fd5
@ -580,6 +580,10 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
||||
case ARM64::ANDXrr:
|
||||
case ARM64::BICWrr:
|
||||
case ARM64::BICXrr:
|
||||
case ARM64::ANDSWrr:
|
||||
case ARM64::ANDSXrr:
|
||||
case ARM64::BICSWrr:
|
||||
case ARM64::BICSXrr:
|
||||
case ARM64::EONWrr:
|
||||
case ARM64::EONXrr:
|
||||
case ARM64::EORWrr:
|
||||
@ -604,6 +608,10 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
||||
case ARM64::ANDXrr: Opcode = ARM64::ANDXrs; break;
|
||||
case ARM64::BICWrr: Opcode = ARM64::BICWrs; break;
|
||||
case ARM64::BICXrr: Opcode = ARM64::BICXrs; break;
|
||||
case ARM64::ANDSWrr: Opcode = ARM64::ANDSWrs; break;
|
||||
case ARM64::ANDSXrr: Opcode = ARM64::ANDSXrs; break;
|
||||
case ARM64::BICSWrr: Opcode = ARM64::BICSWrs; break;
|
||||
case ARM64::BICSXrr: Opcode = ARM64::BICSXrs; break;
|
||||
case ARM64::EONWrr: Opcode = ARM64::EONWrs; break;
|
||||
case ARM64::EONXrr: Opcode = ARM64::EONXrs; break;
|
||||
case ARM64::EORWrr: Opcode = ARM64::EORWrs; break;
|
||||
|
@ -918,23 +918,32 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
||||
// SUBS means that it's possible to get CSE with subtract operations.
|
||||
// A later phase can perform the optimization of setting the destination
|
||||
// register to WZR/XZR if it ends up being unused.
|
||||
|
||||
// We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on the
|
||||
// grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags can be
|
||||
// set differently by this operation. It comes down to whether "SInt(~op2)+1
|
||||
// == SInt(~op2+1)" (and the same for UInt). If they are then everything is
|
||||
// fine. If not then the optimization is wrong. Thus general comparisons are
|
||||
// only valid if op2 != 0.
|
||||
|
||||
// So, finally, the only LLVM-native comparisons that don't mention C and V
|
||||
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in the
|
||||
// absence of information about op2.
|
||||
unsigned Opcode = ARM64ISD::SUBS;
|
||||
|
||||
if (RHS.getOpcode() == ISD::SUB && isa<ConstantSDNode>(RHS.getOperand(0)) &&
|
||||
cast<ConstantSDNode>(RHS.getOperand(0))->getZExtValue() == 0 &&
|
||||
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
|
||||
// We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
|
||||
// the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
|
||||
// can be set differently by this operation. It comes down to whether
|
||||
// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
|
||||
// everything is fine. If not then the optimization is wrong. Thus general
|
||||
// comparisons are only valid if op2 != 0.
|
||||
|
||||
// So, finally, the only LLVM-native comparisons that don't mention C and V
|
||||
// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
|
||||
// the absence of information about op2.
|
||||
Opcode = ARM64ISD::ADDS;
|
||||
RHS = RHS.getOperand(1);
|
||||
} else if (LHS.getOpcode() == ISD::AND && isa<ConstantSDNode>(RHS) &&
|
||||
cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
|
||||
!isUnsignedIntSetCC(CC)) {
|
||||
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
|
||||
// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
|
||||
// of the signed comparisons.
|
||||
Opcode = ARM64ISD::ANDS;
|
||||
RHS = LHS.getOperand(1);
|
||||
LHS = LHS.getOperand(0);
|
||||
}
|
||||
|
||||
return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS)
|
||||
|
@ -1798,12 +1798,18 @@ multiclass LogicalReg<bits<2> opc, bit N, string mnemonic,
|
||||
}
|
||||
|
||||
// Split from LogicalReg to allow setting CPSR Defs
|
||||
multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic> {
|
||||
multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic,
|
||||
SDPatternOperator OpNode = null_frag> {
|
||||
let Defs = [CPSR], mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
|
||||
def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic, []>{
|
||||
def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>;
|
||||
def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>;
|
||||
|
||||
def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic,
|
||||
[(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_shifted_reg32:$Rm))]> {
|
||||
let Inst{31} = 0;
|
||||
}
|
||||
def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic, []>{
|
||||
def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic,
|
||||
[(set GPR64:$Rd, (OpNode GPR64:$Rn, logical_shifted_reg64:$Rm))]> {
|
||||
let Inst{31} = 1;
|
||||
}
|
||||
} // Defs = [CPSR]
|
||||
|
@ -125,7 +125,8 @@ def ARM64sbc : SDNode<"ARM64ISD::SBC", SDTBinaryArithWithFlagsIn>;
|
||||
def ARM64add_flag : SDNode<"ARM64ISD::ADDS", SDTBinaryArithWithFlagsOut,
|
||||
[SDNPCommutative]>;
|
||||
def ARM64sub_flag : SDNode<"ARM64ISD::SUBS", SDTBinaryArithWithFlagsOut>;
|
||||
def ARM64and_flag : SDNode<"ARM64ISD::ANDS", SDTBinaryArithWithFlagsOut>;
|
||||
def ARM64and_flag : SDNode<"ARM64ISD::ANDS", SDTBinaryArithWithFlagsOut,
|
||||
[SDNPCommutative]>;
|
||||
def ARM64adc_flag : SDNode<"ARM64ISD::ADCS", SDTBinaryArithWithFlagsInOut>;
|
||||
def ARM64sbc_flag : SDNode<"ARM64ISD::SBCS", SDTBinaryArithWithFlagsInOut>;
|
||||
|
||||
@ -619,8 +620,9 @@ def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
|
||||
|
||||
|
||||
// (register)
|
||||
defm ANDS : LogicalRegS<0b11, 0, "ands">;
|
||||
defm BICS : LogicalRegS<0b11, 1, "bics">;
|
||||
defm ANDS : LogicalRegS<0b11, 0, "ands", ARM64and_flag>;
|
||||
defm BICS : LogicalRegS<0b11, 1, "bics",
|
||||
BinOpFrag<(ARM64and_flag node:$LHS, (not node:$RHS))>>;
|
||||
defm AND : LogicalReg<0b00, 0, "and", and>;
|
||||
defm BIC : LogicalReg<0b00, 1, "bic",
|
||||
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
|
||||
|
@ -1,6 +1,5 @@
|
||||
; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64
|
||||
; RUN: llc -mtriple=arm64-none-linux-gnu -mcpu=cyclone -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM64
|
||||
|
||||
; arm64 has a separate copy of this test.
|
||||
@lhs = global fp128 zeroinitializer
|
||||
@rhs = global fp128 zeroinitializer
|
||||
|
||||
@ -206,8 +205,9 @@ define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
|
||||
|
||||
%val = select i1 %cond, fp128 %lhs, fp128 %rhs
|
||||
store fp128 %val, fp128* @lhs
|
||||
; CHECK: cmp {{w[0-9]+}}, #0
|
||||
; CHECK-AARCH64: cmp {{w[0-9]+}}, #0
|
||||
; CHECK-AARCH64: str q1, [sp]
|
||||
; CHECK-ARM64: tst {{w[0-9]+}}, #0x1
|
||||
; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: BB#
|
||||
; CHECK-AARCH64-NEXT: str q0, [sp]
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=arm64-none-linux-gnu | FileCheck %s
|
||||
|
||||
@var1_32 = global i32 0
|
||||
@var2_32 = global i32 0
|
||||
@ -6,7 +7,7 @@
|
||||
@var1_64 = global i64 0
|
||||
@var2_64 = global i64 0
|
||||
|
||||
define void @logical_32bit() {
|
||||
define void @logical_32bit() minsize {
|
||||
; CHECK-LABEL: logical_32bit:
|
||||
%val1 = load i32* @var1_32
|
||||
%val2 = load i32* @var2_32
|
||||
@ -96,7 +97,7 @@ define void @logical_32bit() {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @logical_64bit() {
|
||||
define void @logical_64bit() minsize {
|
||||
; CHECK-LABEL: logical_64bit:
|
||||
%val1 = load i64* @var1_64
|
||||
%val2 = load i64* @var2_64
|
||||
|
@ -202,8 +202,7 @@ define void @test_select(i1 %cond, fp128 %lhs, fp128 %rhs) {
|
||||
|
||||
%val = select i1 %cond, fp128 %lhs, fp128 %rhs
|
||||
store fp128 %val, fp128* @lhs, align 16
|
||||
; CHECK: and [[BIT:w[0-9]+]], w0, #0x1
|
||||
; CHECK: cmp [[BIT]], #0
|
||||
; CHECK: tst w0, #0x1
|
||||
; CHECK-NEXT: b.eq [[IFFALSE:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: BB#
|
||||
; CHECK-NEXT: orr v[[VAL:[0-9]+]].16b, v0.16b, v0.16b
|
||||
|
Loading…
Reference in New Issue
Block a user