mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
Peephole optimization for ABS on ARM.
Patch by Ana Pazos! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141365 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3f329cb781
commit
244455e6d6
@ -47,6 +47,11 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
|
||||
cl::desc("Check fp vmla / vmls hazard at isel time"),
|
||||
cl::init(true));
|
||||
|
||||
static cl::opt<bool>
|
||||
DisableARMIntABS("disable-arm-int-abs", cl::Hidden,
|
||||
cl::desc("Enable / disable ARM integer abs transform"),
|
||||
cl::init(false));
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
|
||||
/// instructions for SelectionDAG operations.
|
||||
@ -252,6 +257,9 @@ private:
|
||||
ARMCC::CondCodes CCVal, SDValue CCR,
|
||||
SDValue InFlag);
|
||||
|
||||
// Select special operations if node forms integer ABS pattern
|
||||
SDNode *SelectABSOp(SDNode *N);
|
||||
|
||||
SDNode *SelectConcatVector(SDNode *N);
|
||||
|
||||
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
|
||||
@ -2295,6 +2303,53 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
|
||||
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
|
||||
}
|
||||
|
||||
/// Target-specific DAG combining for ISD::XOR.
|
||||
/// Target-independent combining lowers SELECT_CC nodes of the form
|
||||
/// select_cc setg[ge] X, 0, X, -X
|
||||
/// select_cc setgt X, -1, X, -X
|
||||
/// select_cc setl[te] X, 0, -X, X
|
||||
/// select_cc setlt X, 1, -X, X
|
||||
/// which represent Integer ABS into:
|
||||
/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
|
||||
/// ARM instruction selection detects the latter and matches it to
|
||||
/// ARM::ABS or ARM::t2ABS machine node.
|
||||
SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
|
||||
SDValue XORSrc0 = N->getOperand(0);
|
||||
SDValue XORSrc1 = N->getOperand(1);
|
||||
DebugLoc DL = N->getDebugLoc();
|
||||
EVT VT = N->getValueType(0);
|
||||
|
||||
if (DisableARMIntABS)
|
||||
return NULL;
|
||||
|
||||
if (XORSrc0.getOpcode() != ISD::ADD ||
|
||||
XORSrc1.getOpcode() != ISD::SRA)
|
||||
return NULL;
|
||||
|
||||
SDValue ADDSrc0 = XORSrc0.getOperand(0);
|
||||
SDValue ADDSrc1 = XORSrc0.getOperand(1);
|
||||
SDValue SRASrc0 = XORSrc1.getOperand(0);
|
||||
SDValue SRASrc1 = XORSrc1.getOperand(1);
|
||||
ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
|
||||
EVT XType = SRASrc0.getValueType();
|
||||
unsigned Size = XType.getSizeInBits() - 1;
|
||||
|
||||
if (ADDSrc1 == XORSrc1 &&
|
||||
ADDSrc0 == SRASrc0 &&
|
||||
XType.isInteger() &&
|
||||
SRAConstant != NULL &&
|
||||
Size == SRAConstant->getZExtValue()) {
|
||||
|
||||
unsigned Opcode = ARM::ABS;
|
||||
if (Subtarget->isThumb2())
|
||||
Opcode = ARM::t2ABS;
|
||||
|
||||
return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
|
||||
// The only time a CONCAT_VECTORS operation can have legal types is when
|
||||
// two 64-bit vectors are concatenated to a 128-bit vector.
|
||||
@ -2331,6 +2386,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
|
||||
switch (N->getOpcode()) {
|
||||
default: break;
|
||||
case ISD::XOR: {
|
||||
// Select special operations if XOR node forms integer ABS pattern
|
||||
SDNode *ResNode = SelectABSOp(N);
|
||||
if (ResNode)
|
||||
return ResNode;
|
||||
// Other cases are autogenerated.
|
||||
break;
|
||||
}
|
||||
case ISD::Constant: {
|
||||
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
|
||||
bool UseCP = true;
|
||||
|
@ -6100,6 +6100,86 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||
MI->eraseFromParent(); // The pseudo instruction is gone now.
|
||||
return BB;
|
||||
}
|
||||
|
||||
case ARM::ABS:
|
||||
case ARM::t2ABS: {
|
||||
// To insert an ABS instruction, we have to insert the
|
||||
// diamond control-flow pattern. The incoming instruction knows the
|
||||
// source vreg to test against 0, the destination vreg to set,
|
||||
// the condition code register to branch on, the
|
||||
// true/false values to select between, and a branch opcode to use.
|
||||
// It transforms
|
||||
// V1 = ABS V0
|
||||
// into
|
||||
// V2 = MOVS V0
|
||||
// BCC (branch to SinkBB if V0 >= 0)
|
||||
// RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
|
||||
// SinkBB: V1 = PHI(V2, V3)
|
||||
const BasicBlock *LLVM_BB = BB->getBasicBlock();
|
||||
MachineFunction::iterator BBI = BB;
|
||||
++BBI;
|
||||
MachineFunction *Fn = BB->getParent();
|
||||
MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
|
||||
MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
|
||||
Fn->insert(BBI, RSBBB);
|
||||
Fn->insert(BBI, SinkBB);
|
||||
|
||||
unsigned int ABSSrcReg = MI->getOperand(1).getReg();
|
||||
unsigned int ABSDstReg = MI->getOperand(0).getReg();
|
||||
bool isThumb2 = Subtarget->isThumb2();
|
||||
MachineRegisterInfo &MRI = Fn->getRegInfo();
|
||||
// In Thumb mode S must not be specified if source register is the SP or
|
||||
// PC and if destination register is the SP, so restrict register class
|
||||
unsigned NewMovDstReg = MRI.createVirtualRegister(
|
||||
isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
|
||||
unsigned NewRsbDstReg = MRI.createVirtualRegister(
|
||||
isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
|
||||
|
||||
// Transfer the remainder of BB and its successor edges to sinkMBB.
|
||||
SinkBB->splice(SinkBB->begin(), BB,
|
||||
llvm::next(MachineBasicBlock::iterator(MI)),
|
||||
BB->end());
|
||||
SinkBB->transferSuccessorsAndUpdatePHIs(BB);
|
||||
|
||||
BB->addSuccessor(RSBBB);
|
||||
BB->addSuccessor(SinkBB);
|
||||
|
||||
// fall through to SinkMBB
|
||||
RSBBB->addSuccessor(SinkBB);
|
||||
|
||||
// insert a movs at the end of BB
|
||||
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr),
|
||||
NewMovDstReg)
|
||||
.addReg(ABSSrcReg, RegState::Kill)
|
||||
.addImm((unsigned)ARMCC::AL).addReg(0)
|
||||
.addReg(ARM::CPSR, RegState::Define);
|
||||
|
||||
// insert a bcc with opposite CC to ARMCC::MI at the end of BB
|
||||
BuildMI(BB, dl,
|
||||
TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
|
||||
.addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
|
||||
|
||||
// insert rsbri in RSBBB
|
||||
// Note: BCC and rsbri will be converted into predicated rsbmi
|
||||
// by if-conversion pass
|
||||
BuildMI(*RSBBB, RSBBB->begin(), dl,
|
||||
TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
|
||||
.addReg(NewMovDstReg, RegState::Kill)
|
||||
.addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
|
||||
|
||||
// insert PHI in SinkBB,
|
||||
// reuse ABSDstReg to not change uses of ABS instruction
|
||||
BuildMI(*SinkBB, SinkBB->begin(), dl,
|
||||
TII->get(ARM::PHI), ABSDstReg)
|
||||
.addReg(NewRsbDstReg).addMBB(RSBBB)
|
||||
.addReg(NewMovDstReg).addMBB(BB);
|
||||
|
||||
// remove ABS instruction
|
||||
MI->eraseFromParent();
|
||||
|
||||
// return last added BB
|
||||
return SinkBB;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2848,6 +2848,9 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
|
||||
let Inst{15-12} = Rd;
|
||||
}
|
||||
|
||||
def : ARMInstAlias<"movs${p} $Rd, $Rm",
|
||||
(MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
|
||||
|
||||
// A version for the smaller set of tail call registers.
|
||||
let neverHasSideEffects = 1 in
|
||||
def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
|
||||
@ -4025,6 +4028,14 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
|
||||
let Inst{3-0} = opt;
|
||||
}
|
||||
|
||||
// Pseudo isntruction that combines movs + predicated rsbmi
|
||||
// to implement integer ABS
|
||||
let usesCustomInserter = 1, Defs = [CPSR] in {
|
||||
def ABS : ARMPseudoInst<
|
||||
(outs GPR:$dst), (ins GPR:$src),
|
||||
8, NoItinerary, []>;
|
||||
}
|
||||
|
||||
let usesCustomInserter = 1 in {
|
||||
let Defs = [CPSR] in {
|
||||
def ATOMIC_LOAD_ADD_I8 : PseudoInst<
|
||||
|
@ -3433,6 +3433,14 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
|
||||
[(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
|
||||
imm:$cp))]>,
|
||||
Requires<[IsThumb2]>;
|
||||
|
||||
// Pseudo isntruction that combines movs + predicated rsbmi
|
||||
// to implement integer ABS
|
||||
let usesCustomInserter = 1, Defs = [CPSR] in {
|
||||
def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
|
||||
NoItinerary, []>, Requires<[IsThumb2]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Coprocessor load/store -- for disassembly only
|
||||
//
|
||||
|
@ -1,8 +1,8 @@
|
||||
; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s
|
||||
|
||||
;; Integer absolute value, should produce something as good as: ARM:
|
||||
;; add r3, r0, r0, asr #31
|
||||
;; eor r0, r3, r0, asr #31
|
||||
;; movs r0, r0
|
||||
;; rsbmi r0, r0, #0
|
||||
;; bx lr
|
||||
|
||||
define i32 @test(i32 %a) {
|
||||
@ -10,7 +10,7 @@ define i32 @test(i32 %a) {
|
||||
%b = icmp sgt i32 %a, -1
|
||||
%abs = select i1 %b, i32 %a, i32 %tmp1neg
|
||||
ret i32 %abs
|
||||
; CHECK: add r1, r0, r0, asr #31
|
||||
; CHECK: eor r0, r1, r0, asr #31
|
||||
; CHECK: movs r0, r0
|
||||
; CHECK: rsbmi r0, r0, #0
|
||||
; CHECK: bx lr
|
||||
}
|
||||
|
@ -3,9 +3,9 @@
|
||||
|
||||
;; Integer absolute value, should produce something as good as:
|
||||
;; Thumb:
|
||||
;; asr r2, r0, #31
|
||||
;; add r0, r0, r2
|
||||
;; eor r0, r2
|
||||
;; movs r0, r0
|
||||
;; bpl
|
||||
;; rsb r0, r0, #0 (with opitmization, bpl + rsb is if-converted into rsbmi)
|
||||
;; bx lr
|
||||
|
||||
define i32 @test(i32 %a) {
|
||||
@ -13,5 +13,10 @@ define i32 @test(i32 %a) {
|
||||
%b = icmp sgt i32 %a, -1
|
||||
%abs = select i1 %b, i32 %a, i32 %tmp1neg
|
||||
ret i32 %abs
|
||||
; CHECK: movs r0, r0
|
||||
; CHECK: bpl
|
||||
; CHECK: rsb r0, r0, #0
|
||||
; CHECK: bx lr
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user