From ef2c86f8760f717882821987664bf5e7604ffe20 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 10 Oct 2011 22:59:55 +0000 Subject: [PATCH] Reapply r141365 now that PR11107 is fixed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141591 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 66 ++++++++++++++++++++++++ lib/Target/ARM/ARMISelLowering.cpp | 80 ++++++++++++++++++++++++++++++ lib/Target/ARM/ARMInstrInfo.td | 11 ++++ lib/Target/ARM/ARMInstrThumb2.td | 8 +++ test/CodeGen/ARM/iabs.ll | 8 +-- test/CodeGen/Thumb/iabs.ll | 11 ++-- 6 files changed, 177 insertions(+), 7 deletions(-) diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index ce877b0d04e..5ee009c04c5 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -47,6 +47,11 @@ CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, cl::desc("Check fp vmla / vmls hazard at isel time"), cl::init(true)); +static cl::opt +DisableARMIntABS("disable-arm-int-abs", cl::Hidden, + cl::desc("Enable / disable ARM integer abs transform"), + cl::init(false)); + //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -252,6 +257,9 @@ private: ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag); + // Select special operations if node forms integer ABS pattern + SDNode *SelectABSOp(SDNode *N); + SDNode *SelectConcatVector(SDNode *N); SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); @@ -2295,6 +2303,56 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); } +/// Target-specific DAG combining for ISD::XOR. +/// Target-independent combining lowers SELECT_CC nodes of the form +/// select_cc setg[ge] X, 0, X, -X +/// select_cc setgt X, -1, X, -X +/// select_cc setl[te] X, 0, -X, X +/// select_cc setlt X, 1, -X, X +/// which represent Integer ABS into: +/// Y = sra (X, size(X)-1); xor (add (X, Y), Y) +/// ARM instruction selection detects the latter and matches it to +/// ARM::ABS or ARM::t2ABS machine node. +SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){ + SDValue XORSrc0 = N->getOperand(0); + SDValue XORSrc1 = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + if (DisableARMIntABS) + return NULL; + + if (Subtarget->isThumb1Only()) + return NULL; + + if (XORSrc0.getOpcode() != ISD::ADD || + XORSrc1.getOpcode() != ISD::SRA) + return NULL; + + SDValue ADDSrc0 = XORSrc0.getOperand(0); + SDValue ADDSrc1 = XORSrc0.getOperand(1); + SDValue SRASrc0 = XORSrc1.getOperand(0); + SDValue SRASrc1 = XORSrc1.getOperand(1); + ConstantSDNode *SRAConstant = dyn_cast(SRASrc1); + EVT XType = SRASrc0.getValueType(); + unsigned Size = XType.getSizeInBits() - 1; + + if (ADDSrc1 == XORSrc1 && + ADDSrc0 == SRASrc0 && + XType.isInteger() && + SRAConstant != NULL && + Size == SRAConstant->getZExtValue()) { + + unsigned Opcode = ARM::ABS; + if (Subtarget->isThumb2()) + Opcode = ARM::t2ABS; + + return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); + } + + return NULL; +} + SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { // The only time a CONCAT_VECTORS operation can have legal types is when // two 64-bit vectors are concatenated to a 128-bit vector. @@ -2331,6 +2389,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { switch (N->getOpcode()) { default: break; + case ISD::XOR: { + // Select special operations if XOR node forms integer ABS pattern + SDNode *ResNode = SelectABSOp(N); + if (ResNode) + return ResNode; + // Other cases are autogenerated. + break; + } case ISD::Constant: { unsigned Val = cast(N)->getZExtValue(); bool UseCP = true; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a50bf09b3d9..0313f99908c 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -6181,6 +6181,86 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } + + case ARM::ABS: + case ARM::t2ABS: { + // To insert an ABS instruction, we have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // source vreg to test against 0, the destination vreg to set, + // the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + // It transforms + // V1 = ABS V0 + // into + // V2 = MOVS V0 + // BCC (branch to SinkBB if V0 >= 0) + // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) + // SinkBB: V1 = PHI(V2, V3) + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator BBI = BB; + ++BBI; + MachineFunction *Fn = BB->getParent(); + MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); + Fn->insert(BBI, RSBBB); + Fn->insert(BBI, SinkBB); + + unsigned int ABSSrcReg = MI->getOperand(1).getReg(); + unsigned int ABSDstReg = MI->getOperand(0).getReg(); + bool isThumb2 = Subtarget->isThumb2(); + MachineRegisterInfo &MRI = Fn->getRegInfo(); + // In Thumb mode S must not be specified if source register is the SP or + // PC and if destination register is the SP, so restrict register class + unsigned NewMovDstReg = MRI.createVirtualRegister( + isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass); + unsigned NewRsbDstReg = MRI.createVirtualRegister( + isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + SinkBB->splice(SinkBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + SinkBB->transferSuccessorsAndUpdatePHIs(BB); + + BB->addSuccessor(RSBBB); + BB->addSuccessor(SinkBB); + + // fall through to SinkMBB + RSBBB->addSuccessor(SinkBB); + + // insert a movs at the end of BB + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVr : ARM::MOVr), + NewMovDstReg) + .addReg(ABSSrcReg, RegState::Kill) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addReg(ARM::CPSR, RegState::Define); + + // insert a bcc with opposite CC to ARMCC::MI at the end of BB + BuildMI(BB, dl, + TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) + .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); + + // insert rsbri in RSBBB + // Note: BCC and rsbri will be converted into predicated rsbmi + // by if-conversion pass + BuildMI(*RSBBB, RSBBB->begin(), dl, + TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) + .addReg(NewMovDstReg, RegState::Kill) + .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + + // insert PHI in SinkBB, + // reuse ABSDstReg to not change uses of ABS instruction + BuildMI(*SinkBB, SinkBB->begin(), dl, + TII->get(ARM::PHI), ABSDstReg) + .addReg(NewRsbDstReg).addMBB(RSBBB) + .addReg(NewMovDstReg).addMBB(BB); + + // remove ABS instruction + MI->eraseFromParent(); + + // return last added BB + return SinkBB; + } } } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 31c1fbb404b..7ac8ed944e3 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -2848,6 +2848,9 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, let Inst{15-12} = Rd; } +def : ARMInstAlias<"movs${p} $Rd, $Rm", + (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>; + // A version for the smaller set of tail call registers. let neverHasSideEffects = 1 in def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, @@ -4025,6 +4028,14 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, let Inst{3-0} = opt; } +// Pseudo isntruction that combines movs + predicated rsbmi +// to implement integer ABS +let usesCustomInserter = 1, Defs = [CPSR] in { +def ABS : ARMPseudoInst< + (outs GPR:$dst), (ins GPR:$src), + 8, NoItinerary, []>; +} + let usesCustomInserter = 1 in { let Defs = [CPSR] in { def ATOMIC_LOAD_ADD_I8 : PseudoInst< diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 9be4d4689e7..7f24f81c57e 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3433,6 +3433,14 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp), [(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb2]>; + +// Pseudo isntruction that combines movs + predicated rsbmi +// to implement integer ABS +let usesCustomInserter = 1, Defs = [CPSR] in { +def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src), + NoItinerary, []>, Requires<[IsThumb2]>; +} + //===----------------------------------------------------------------------===// // Coprocessor load/store -- for disassembly only // diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll index c01c041cfe8..89e309d1606 100644 --- a/test/CodeGen/ARM/iabs.ll +++ b/test/CodeGen/ARM/iabs.ll @@ -1,8 +1,8 @@ ; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s ;; Integer absolute value, should produce something as good as: ARM: -;; add r3, r0, r0, asr #31 -;; eor r0, r3, r0, asr #31 +;; movs r0, r0 +;; rsbmi r0, r0, #0 ;; bx lr define i32 @test(i32 %a) { @@ -10,7 +10,7 @@ define i32 @test(i32 %a) { %b = icmp sgt i32 %a, -1 %abs = select i1 %b, i32 %a, i32 %tmp1neg ret i32 %abs -; CHECK: add r1, r0, r0, asr #31 -; CHECK: eor r0, r1, r0, asr #31 +; CHECK: movs r0, r0 +; CHECK: rsbmi r0, r0, #0 ; CHECK: bx lr } diff --git a/test/CodeGen/Thumb/iabs.ll b/test/CodeGen/Thumb/iabs.ll index d7cdcd8149a..d03b5b2e3be 100644 --- a/test/CodeGen/Thumb/iabs.ll +++ b/test/CodeGen/Thumb/iabs.ll @@ -3,9 +3,9 @@ ;; Integer absolute value, should produce something as good as: ;; Thumb: -;; asr r2, r0, #31 -;; add r0, r0, r2 -;; eor r0, r2 +;; movs r0, r0 +;; bpl +;; rsb r0, r0, #0 (with opitmization, bpl + rsb is if-converted into rsbmi) ;; bx lr define i32 @test(i32 %a) { @@ -13,5 +13,10 @@ define i32 @test(i32 %a) { %b = icmp sgt i32 %a, -1 %abs = select i1 %b, i32 %a, i32 %tmp1neg ret i32 %abs +; CHECK: movs r0, r0 +; CHECK: bpl +; CHECK: rsb r0, r0, #0 +; CHECK: bx lr } +