Peephole optimization for ABS on ARM.

Patch by Ana Pazos!


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141365 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Anton Korobeynikov 2011-10-07 16:15:08 +00:00
parent 3f329cb781
commit 244455e6d6
6 changed files with 174 additions and 7 deletions

View File

@ -47,6 +47,11 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
cl::desc("Check fp vmla / vmls hazard at isel time"),
cl::init(true));
static cl::opt<bool>
DisableARMIntABS("disable-arm-int-abs", cl::Hidden,
cl::desc("Enable / disable ARM integer abs transform"),
cl::init(false));
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@ -252,6 +257,9 @@ private:
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
// Select special operations if node forms integer ABS pattern
SDNode *SelectABSOp(SDNode *N);
SDNode *SelectConcatVector(SDNode *N);
SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
@ -2295,6 +2303,53 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
}
/// Target-specific DAG combining for ISD::XOR.
/// Target-independent combining lowers SELECT_CC nodes of the form
/// select_cc setg[ge] X, 0, X, -X
/// select_cc setgt X, -1, X, -X
/// select_cc setl[te] X, 0, -X, X
/// select_cc setlt X, 1, -X, X
/// which represent Integer ABS into:
/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
/// ARM instruction selection detects the latter and matches it to
/// ARM::ABS or ARM::t2ABS machine node.
SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
SDValue XORSrc0 = N->getOperand(0);
SDValue XORSrc1 = N->getOperand(1);
DebugLoc DL = N->getDebugLoc();
EVT VT = N->getValueType(0);
if (DisableARMIntABS)
return NULL;
if (XORSrc0.getOpcode() != ISD::ADD ||
XORSrc1.getOpcode() != ISD::SRA)
return NULL;
SDValue ADDSrc0 = XORSrc0.getOperand(0);
SDValue ADDSrc1 = XORSrc0.getOperand(1);
SDValue SRASrc0 = XORSrc1.getOperand(0);
SDValue SRASrc1 = XORSrc1.getOperand(1);
ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
EVT XType = SRASrc0.getValueType();
unsigned Size = XType.getSizeInBits() - 1;
if (ADDSrc1 == XORSrc1 &&
ADDSrc0 == SRASrc0 &&
XType.isInteger() &&
SRAConstant != NULL &&
Size == SRAConstant->getZExtValue()) {
unsigned Opcode = ARM::ABS;
if (Subtarget->isThumb2())
Opcode = ARM::t2ABS;
return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
}
return NULL;
}
SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
// The only time a CONCAT_VECTORS operation can have legal types is when
// two 64-bit vectors are concatenated to a 128-bit vector.
@ -2331,6 +2386,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
case ISD::XOR: {
// Select special operations if XOR node forms integer ABS pattern
SDNode *ResNode = SelectABSOp(N);
if (ResNode)
return ResNode;
// Other cases are autogenerated.
break;
}
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
bool UseCP = true;

View File

@ -6100,6 +6100,86 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
case ARM::ABS:
case ARM::t2ABS: {
// To insert an ABS instruction, we have to insert the
// diamond control-flow pattern. The incoming instruction knows the
// source vreg to test against 0, the destination vreg to set,
// the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
// It transforms
// V1 = ABS V0
// into
// V2 = MOVS V0
// BCC (branch to SinkBB if V0 >= 0)
// RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
// SinkBB: V1 = PHI(V2, V3)
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator BBI = BB;
++BBI;
MachineFunction *Fn = BB->getParent();
MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
Fn->insert(BBI, RSBBB);
Fn->insert(BBI, SinkBB);
unsigned int ABSSrcReg = MI->getOperand(1).getReg();
unsigned int ABSDstReg = MI->getOperand(0).getReg();
bool isThumb2 = Subtarget->isThumb2();
MachineRegisterInfo &MRI = Fn->getRegInfo();
// In Thumb mode S must not be specified if source register is the SP or
// PC and if destination register is the SP, so restrict register class
unsigned NewMovDstReg = MRI.createVirtualRegister(
isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
unsigned NewRsbDstReg = MRI.createVirtualRegister(
isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass);
// Transfer the remainder of BB and its successor edges to sinkMBB.
SinkBB->splice(SinkBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
SinkBB->transferSuccessorsAndUpdatePHIs(BB);
BB->addSuccessor(RSBBB);
BB->addSuccessor(SinkBB);
// fall through to SinkMBB
RSBBB->addSuccessor(SinkBB);
// insert a movs at the end of BB
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr),
NewMovDstReg)
.addReg(ABSSrcReg, RegState::Kill)
.addImm((unsigned)ARMCC::AL).addReg(0)
.addReg(ARM::CPSR, RegState::Define);
// insert a bcc with opposite CC to ARMCC::MI at the end of BB
BuildMI(BB, dl,
TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
.addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
// insert rsbri in RSBBB
// Note: BCC and rsbri will be converted into predicated rsbmi
// by if-conversion pass
BuildMI(*RSBBB, RSBBB->begin(), dl,
TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
.addReg(NewMovDstReg, RegState::Kill)
.addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
// insert PHI in SinkBB,
// reuse ABSDstReg to not change uses of ABS instruction
BuildMI(*SinkBB, SinkBB->begin(), dl,
TII->get(ARM::PHI), ABSDstReg)
.addReg(NewRsbDstReg).addMBB(RSBBB)
.addReg(NewMovDstReg).addMBB(BB);
// remove ABS instruction
MI->eraseFromParent();
// return last added BB
return SinkBB;
}
}
}

View File

@ -2848,6 +2848,9 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
let Inst{15-12} = Rd;
}
def : ARMInstAlias<"movs${p} $Rd, $Rm",
(MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
// A version for the smaller set of tail call registers.
let neverHasSideEffects = 1 in
def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
@ -4025,6 +4028,14 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
let Inst{3-0} = opt;
}
// Pseudo isntruction that combines movs + predicated rsbmi
// to implement integer ABS
let usesCustomInserter = 1, Defs = [CPSR] in {
def ABS : ARMPseudoInst<
(outs GPR:$dst), (ins GPR:$src),
8, NoItinerary, []>;
}
let usesCustomInserter = 1 in {
let Defs = [CPSR] in {
def ATOMIC_LOAD_ADD_I8 : PseudoInst<

View File

@ -3433,6 +3433,14 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
[(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb2]>;
// Pseudo isntruction that combines movs + predicated rsbmi
// to implement integer ABS
let usesCustomInserter = 1, Defs = [CPSR] in {
def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
NoItinerary, []>, Requires<[IsThumb2]>;
}
//===----------------------------------------------------------------------===//
// Coprocessor load/store -- for disassembly only
//

View File

@ -1,8 +1,8 @@
; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s
;; Integer absolute value, should produce something as good as: ARM:
;; add r3, r0, r0, asr #31
;; eor r0, r3, r0, asr #31
;; movs r0, r0
;; rsbmi r0, r0, #0
;; bx lr
define i32 @test(i32 %a) {
@ -10,7 +10,7 @@ define i32 @test(i32 %a) {
%b = icmp sgt i32 %a, -1
%abs = select i1 %b, i32 %a, i32 %tmp1neg
ret i32 %abs
; CHECK: add r1, r0, r0, asr #31
; CHECK: eor r0, r1, r0, asr #31
; CHECK: movs r0, r0
; CHECK: rsbmi r0, r0, #0
; CHECK: bx lr
}

View File

@ -3,9 +3,9 @@
;; Integer absolute value, should produce something as good as:
;; Thumb:
;; asr r2, r0, #31
;; add r0, r0, r2
;; eor r0, r2
;; movs r0, r0
;; bpl
;; rsb r0, r0, #0 (with opitmization, bpl + rsb is if-converted into rsbmi)
;; bx lr
define i32 @test(i32 %a) {
@ -13,5 +13,10 @@ define i32 @test(i32 %a) {
%b = icmp sgt i32 %a, -1
%abs = select i1 %b, i32 %a, i32 %tmp1neg
ret i32 %abs
; CHECK: movs r0, r0
; CHECK: bpl
; CHECK: rsb r0, r0, #0
; CHECK: bx lr
}