From 098c6a547fe540b3bbace4c3d4713f400c67b8a9 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 5 Sep 2012 23:58:02 +0000 Subject: [PATCH] Use predication instead of pseudo-opcodes when folding into MOVCC. Now that it is possible to dynamically tie MachineInstr operands, predicated instructions are possible in SSA form: %vreg3 = SUBri %vreg1, -2147483647, pred:14, pred:%noreg, %opt:%noreg %vreg4 = MOVCCr %vreg3, %vreg1, %pred:12, pred:%CPSR Becomes a predicated SUBri with a tied imp-use: SUBri %vreg1, -2147483647, pred:13, pred:%CPSR, opt:%noreg, %vreg1 This means that any instruction that is safe to move can be folded into a MOVCC, and the *CC pseudo-instructions are no longer needed. The test case changes reflect that Thumb2SizeReduce recognizes the predicated instructions. It didn't understand the pseudos. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163274 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 87 ++++++++-------------- test/CodeGen/ARM/select.ll | 2 +- test/CodeGen/ARM/select_xform.ll | 14 ++-- test/CodeGen/Thumb2/thumb2-select_xform.ll | 4 +- 4 files changed, 41 insertions(+), 66 deletions(-) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index f8e554fcbfd..10c41ee6c49 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1580,16 +1580,20 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { } /// Identify instructions that can be folded into a MOVCC instruction, and -/// return the corresponding opcode for the predicated pseudo-instruction. -static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI, - const MachineRegisterInfo &MRI) { +/// return the defining instruction. +static MachineInstr *canFoldIntoMOVCC(unsigned Reg, + const MachineRegisterInfo &MRI, + const TargetInstrInfo *TII) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) return 0; if (!MRI.hasOneNonDBGUse(Reg)) return 0; - MI = MRI.getVRegDef(Reg); + MachineInstr *MI = MRI.getVRegDef(Reg); if (!MI) return 0; + // MI is folded into the MOVCC by predicating it. + if (!MI->isPredicable()) + return 0; // Check if MI has any non-dead defs or physreg uses. This also detects // predicated instructions which will be reading CPSR. for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) { @@ -1599,55 +1603,18 @@ static unsigned canFoldIntoMOVCC(unsigned Reg, MachineInstr *&MI, return 0; if (!MO.isReg()) continue; + // MI can't have any tied operands, that would conflict with predication. + if (MO.isTied()) + return 0; if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) return 0; if (MO.isDef() && !MO.isDead()) return 0; } - switch (MI->getOpcode()) { - default: return 0; - case ARM::ANDri: return ARM::ANDCCri; - case ARM::ANDrr: return ARM::ANDCCrr; - case ARM::ANDrsi: return ARM::ANDCCrsi; - case ARM::ANDrsr: return ARM::ANDCCrsr; - case ARM::t2ANDri: return ARM::t2ANDCCri; - case ARM::t2ANDrr: return ARM::t2ANDCCrr; - case ARM::t2ANDrs: return ARM::t2ANDCCrs; - case ARM::EORri: return ARM::EORCCri; - case ARM::EORrr: return ARM::EORCCrr; - case ARM::EORrsi: return ARM::EORCCrsi; - case ARM::EORrsr: return ARM::EORCCrsr; - case ARM::t2EORri: return ARM::t2EORCCri; - case ARM::t2EORrr: return ARM::t2EORCCrr; - case ARM::t2EORrs: return ARM::t2EORCCrs; - case ARM::ORRri: return ARM::ORRCCri; - case ARM::ORRrr: return ARM::ORRCCrr; - case ARM::ORRrsi: return ARM::ORRCCrsi; - case ARM::ORRrsr: return ARM::ORRCCrsr; - case ARM::t2ORRri: return ARM::t2ORRCCri; - case ARM::t2ORRrr: return ARM::t2ORRCCrr; - case ARM::t2ORRrs: return ARM::t2ORRCCrs; - - // ARM ADD/SUB - case ARM::ADDri: return ARM::ADDCCri; - case ARM::ADDrr: return ARM::ADDCCrr; - case ARM::ADDrsi: return ARM::ADDCCrsi; - case ARM::ADDrsr: return ARM::ADDCCrsr; - case ARM::SUBri: return ARM::SUBCCri; - case ARM::SUBrr: return ARM::SUBCCrr; - case ARM::SUBrsi: return ARM::SUBCCrsi; - case ARM::SUBrsr: return ARM::SUBCCrsr; - - // Thumb2 ADD/SUB - case ARM::t2ADDri: return ARM::t2ADDCCri; - case ARM::t2ADDri12: return ARM::t2ADDCCri12; - case ARM::t2ADDrr: return ARM::t2ADDCCrr; - case ARM::t2ADDrs: return ARM::t2ADDCCrs; - case ARM::t2SUBri: return ARM::t2SUBCCri; - case ARM::t2SUBri12: return ARM::t2SUBCCri12; - case ARM::t2SUBrr: return ARM::t2SUBCCrr; - case ARM::t2SUBrs: return ARM::t2SUBCCrs; - } + bool DontMoveAcrossStores = true; + if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores)) + return 0; + return MI; } bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, @@ -1676,19 +1643,18 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); - MachineInstr *DefMI = 0; - unsigned Opc = canFoldIntoMOVCC(MI->getOperand(2).getReg(), DefMI, MRI); - bool Invert = !Opc; - if (!Opc) - Opc = canFoldIntoMOVCC(MI->getOperand(1).getReg(), DefMI, MRI); - if (!Opc) + MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); + bool Invert = !DefMI; + if (!DefMI) + DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this); + if (!DefMI) return 0; // Create a new predicated version of DefMI. // Rfalse is the first use. MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - get(Opc), MI->getOperand(0).getReg()) - .addOperand(MI->getOperand(Invert ? 2 : 1)); + DefMI->getDesc(), + MI->getOperand(0).getReg()); // Copy all the DefMI operands, excluding its (null) predicate. const MCInstrDesc &DefDesc = DefMI->getDesc(); @@ -1707,6 +1673,15 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, if (NewMI->hasOptionalDef()) AddDefaultCC(NewMI); + // The output register value when the predicate is false is an implicit + // register operand tied to the first def. + // The tie makes the register allocator ensure the FalseReg is allocated the + // same register as operand 0. + MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); + FalseReg.setImplicit(); + NewMI->addOperand(FalseReg); + NewMI->tieOperands(0, NewMI->getNumOperands() - 1); + // The caller will erase MI, but not DefMI. DefMI->eraseFromParent(); return NewMI; diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll index 55755666289..62708ed53d0 100644 --- a/test/CodeGen/ARM/select.ll +++ b/test/CodeGen/ARM/select.ll @@ -80,7 +80,7 @@ define double @f7(double %a, double %b) { ; CHECK-NEON: adr [[R2:r[0-9]+]], LCPI7_0 ; CHECK-NEON-NEXT: cmp r0, [[R3]] ; CHECK-NEON-NEXT: it eq -; CHECK-NEON-NEXT: addeq.w {{r.*}}, [[R2]] +; CHECK-NEON-NEXT: addeq{{.*}} [[R2]], #4 ; CHECK-NEON-NEXT: ldr ; CHECK-NEON: bx diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll index cfc0e701200..75078089126 100644 --- a/test/CodeGen/ARM/select_xform.ll +++ b/test/CodeGen/ARM/select_xform.ll @@ -9,7 +9,7 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; T2: t1: ; T2: mvn r0, #-2147483648 -; T2: addle.w r1, r1 +; T2: addle r1, r0 ; T2: mov r0, r1 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 2147483647 @@ -23,7 +23,7 @@ define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; ARM: mov r0, r1 ; T2: t2: -; T2: suble.w r1, r1, #10 +; T2: suble r1, #10 ; T2: mov r0, r1 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 10 @@ -37,7 +37,7 @@ define i32 @t3(i32 %a, i32 %b, i32 %x, i32 %y) nounwind { ; ARM: mov r0, r3 ; T2: t3: -; T2: andge.w r3, r3, r2 +; T2: andge r3, r2 ; T2: mov r0, r3 %cond = icmp slt i32 %a, %b %z = select i1 %cond, i32 -1, i32 %x @@ -51,7 +51,7 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind { ; ARM: mov r0, r3 ; T2: t4: -; T2: orrge.w r3, r3, r2 +; T2: orrge r3, r2 ; T2: mov r0, r3 %cond = icmp slt i32 %a, %b %z = select i1 %cond, i32 0, i32 %x @@ -81,7 +81,7 @@ define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; T2: t6: ; T2-NOT: movge -; T2: eorlt.w r3, r3, r2 +; T2: eorlt r3, r2 %cond = icmp slt i32 %a, %b %tmp1 = select i1 %cond, i32 %c, i32 0 %tmp2 = xor i32 %tmp1, %d @@ -200,7 +200,7 @@ entry: ; T2: t13 ; T2: cmp r1, #10 -; T2: addgt.w r0, r0, #1 +; T2: addgt r0, #1 %cmp = icmp sgt i32 %a, 10 %conv = zext i1 %cmp to i32 %add = add i32 %conv, %c @@ -216,7 +216,7 @@ entry: ; T2: t14 ; T2: cmp r1, #10 -; T2: subgt.w r0, r0, #1 +; T2: subgt r0, #1 %cmp = icmp sgt i32 %a, 10 %conv = sext i1 %cmp to i32 %add = add i32 %conv, %c diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll index ead198f2162..ed4d26d746c 100644 --- a/test/CodeGen/Thumb2/thumb2-select_xform.ll +++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll @@ -5,7 +5,7 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK: mvn r0, #-2147483648 ; CHECK: cmp r2, #10 ; CHECK: it le -; CHECK: addle.w r1, r1, r0 +; CHECK: addle r1, r0 ; CHECK: mov r0, r1 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 2147483647 @@ -30,7 +30,7 @@ define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { ; CHECK: t3 ; CHECK: cmp r2, #10 ; CHECK: it le -; CHECK: suble.w r1, r1, #10 +; CHECK: suble r1, #10 ; CHECK: mov r0, r1 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 10