diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index bd629f16e25..f7a029bd611 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -199,6 +199,7 @@ class Instruction { bit isBranch = 0; // Is this instruction a branch instruction? bit isIndirectBranch = 0; // Is this instruction an indirect branch? bit isCompare = 0; // Is this instruction a comparison instruction? + bit isMoveImm = 0; // Is this instruction a move immediate instruction? bit isBarrier = 0; // Can control flow fall through this instruction? bit isCall = 0; // Is this instruction a call instruction? bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand? diff --git a/include/llvm/Target/TargetInstrDesc.h b/include/llvm/Target/TargetInstrDesc.h index a127aed8f6d..11b7a5722b9 100644 --- a/include/llvm/Target/TargetInstrDesc.h +++ b/include/llvm/Target/TargetInstrDesc.h @@ -103,13 +103,14 @@ namespace TID { Terminator, Branch, IndirectBranch, - Predicable, - NotDuplicable, Compare, + MoveImm, DelaySlot, FoldableAsLoad, MayLoad, MayStore, + Predicable, + NotDuplicable, UnmodeledSideEffects, Commutable, ConvertibleTo3Addr, @@ -352,6 +353,12 @@ public: return Flags & (1 << TID::Compare); } + /// isMoveImmediate - Return true if this instruction is a move immediate + /// (including conditional moves) instruction. + bool isMoveImmediate() const { + return Flags & (1 << TID::MoveImm); + } + /// isNotDuplicable - Return true if this instruction cannot be safely /// duplicated. For example, if the instruction has a unique labels attached /// to it, duplicating it would cause multiple definition errors. diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 2bb01f483a1..09400b3b837 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -608,6 +608,13 @@ public: const MachineRegisterInfo *MRI) const { return false; } + + /// FoldImmediate - 'Reg' is known to be defined by a move immediate + /// instruction, try to fold the immediate into the use instruction. + virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, + unsigned Reg, MachineRegisterInfo *MRI) const { + return false; + } /// getNumMicroOps - Return the number of u-operations the given machine /// instruction will be decoded to on the target cpu. diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index d19b319884e..e6e912199ff 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -365,13 +365,15 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createDeadMachineInstructionElimPass()); printAndVerify(PM, "After codegen DCE pass"); - PM.add(createPeepholeOptimizerPass()); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); PM.add(createMachineCSEPass()); if (!DisableMachineSink) PM.add(createMachineSinkingPass()); printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); + + PM.add(createPeepholeOptimizerPass()); + printAndVerify(PM, "After codegen peephole optimization pass"); } // Pre-ra tail duplication. diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 75f453ad71d..07c2b80dcb0 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -41,7 +41,9 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; @@ -56,6 +58,7 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumEliminated, "Number of compares eliminated"); +STATISTIC(NumImmFold, "Number of move immediate foled"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -85,6 +88,12 @@ namespace { bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet &LocalMIs); + bool isMoveImmediate(MachineInstr *MI, + SmallSet &ImmDefRegs, + DenseMap &ImmDefMIs); + bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, + SmallSet &ImmDefRegs, + DenseMap &ImmDefMIs); }; } @@ -257,6 +266,49 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, return false; } +bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, + SmallSet &ImmDefRegs, + DenseMap &ImmDefMIs) { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isMoveImmediate()) + return false; + if (TID.getNumDefs() != 1) + return false; + unsigned Reg = MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + ImmDefMIs.insert(std::make_pair(Reg, MI)); + ImmDefRegs.insert(Reg); + return true; + } + + return false; +} + +/// FoldImmediate - Try folding register operands that are defined by move +/// immediate instructions, i.e. a trivial constant folding optimization, if +/// and only if the def and use are in the same BB. +bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, + SmallSet &ImmDefRegs, + DenseMap &ImmDefMIs) { + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (ImmDefRegs.count(Reg) == 0) + continue; + DenseMap::iterator II = ImmDefMIs.find(Reg); + assert(II != ImmDefMIs.end()); + if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { + ++NumImmFold; + return true; + } + } + return false; +} + bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (DisablePeephole) return false; @@ -269,9 +321,15 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; SmallPtrSet LocalMIs; + SmallSet ImmDefRegs; + DenseMap ImmDefMIs; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock *MBB = &*I; + + bool SeenMoveImm = false; LocalMIs.clear(); + ImmDefRegs.clear(); + ImmDefMIs.clear(); for (MachineBasicBlock::iterator MII = I->begin(), MIE = I->end(); MII != MIE; ) { @@ -283,8 +341,12 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->getDesc().isCompare()) { Changed |= OptimizeCmpInstr(MI, MBB); + } else if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { + SeenMoveImm = true; } else { Changed |= OptimizeExtInstr(MI, MBB, LocalMIs); + if (SeenMoveImm) + Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); } } } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index c743628df4e..f61dfb76292 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1556,6 +1556,103 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, return false; } +bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI, + MachineInstr *DefMI, unsigned Reg, + MachineRegisterInfo *MRI) const { + // Fold large immediates into add, sub, or, xor. + unsigned DefOpc = DefMI->getOpcode(); + if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) + return false; + if (!DefMI->getOperand(1).isImm()) + // Could be t2MOVi32imm + return false; + + if (!MRI->hasOneNonDBGUse(Reg)) + return false; + + unsigned UseOpc = UseMI->getOpcode(); + unsigned NewUseOpc; + uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm(); + uint32_t SOImmValV1, SOImmValV2; + bool Commute = false; + switch (UseOpc) { + default: return false; + case ARM::SUBrr: + case ARM::ADDrr: + case ARM::ORRrr: + case ARM::EORrr: + case ARM::t2SUBrr: + case ARM::t2ADDrr: + case ARM::t2ORRrr: + case ARM::t2EORrr: { + Commute = UseMI->getOperand(2).getReg() != Reg; + switch (UseOpc) { + default: break; + case ARM::SUBrr: { + if (Commute) + return false; + ImmVal = -ImmVal; + NewUseOpc = ARM::SUBri; + // Fallthrough + } + case ARM::ADDrr: + case ARM::ORRrr: + case ARM::EORrr: { + if (!ARM_AM::isSOImmTwoPartVal(ImmVal)) + return false; + SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal); + SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal); + switch (UseOpc) { + default: break; + case ARM::ADDrr: NewUseOpc = ARM::ADDri; break; + case ARM::ORRrr: NewUseOpc = ARM::ORRri; break; + case ARM::EORrr: NewUseOpc = ARM::EORri; break; + } + break; + } + case ARM::t2SUBrr: { + if (Commute) + return false; + ImmVal = -ImmVal; + NewUseOpc = ARM::t2SUBri; + // Fallthrough + } + case ARM::t2ADDrr: + case ARM::t2ORRrr: + case ARM::t2EORrr: { + if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal)) + return false; + SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal); + SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal); + switch (UseOpc) { + default: break; + case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break; + case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break; + case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break; + } + break; + } + } + } + } + + unsigned OpIdx = Commute ? 2 : 1; + unsigned Reg1 = UseMI->getOperand(OpIdx).getReg(); + bool isKill = UseMI->getOperand(OpIdx).isKill(); + unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg)); + AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(), + *UseMI, UseMI->getDebugLoc(), + get(NewUseOpc), NewReg) + .addReg(Reg1, getKillRegState(isKill)) + .addImm(SOImmValV1))); + UseMI->setDesc(get(NewUseOpc)); + UseMI->getOperand(1).setReg(NewReg); + UseMI->getOperand(1).setIsKill(); + UseMI->getOperand(2).ChangeToImmediate(SOImmValV2); + DefMI->eraseFromParent(); + return true; +} + unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr *MI) const { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index c929fce9e7b..cbcc428b712 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -346,6 +346,11 @@ public: int CmpMask, int CmpValue, const MachineRegisterInfo *MRI) const; + /// FoldImmediate - 'Reg' is known to be defined by a move immediate + /// instruction, try to fold the immediate into the use instruction. + virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, + unsigned Reg, MachineRegisterInfo *MRI) const; + virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr *MI) const; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index b2a0a614e9c..3696fef1181 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -269,6 +269,16 @@ def sube_live_carry : PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS), [{return N->hasAnyUseOfValue(1);}]>; +// An 'and' node with a single use. +def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; + +// An 'xor' node with a single use. +def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs), [{ + return N->hasOneUse(); +}]>; + //===----------------------------------------------------------------------===// // Operand Definitions. // @@ -1941,7 +1951,7 @@ def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src), let Inst{25} = 0; } -let isReMaterializable = 1, isAsCheapAsAMove = 1 in +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi, "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP { bits<4> Rd; @@ -1952,7 +1962,7 @@ def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi, let Inst{11-0} = imm; } -let isReMaterializable = 1, isAsCheapAsAMove = 1 in +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm:$imm), DPFrm, IIC_iMOVi, "movw", "\t$Rd, $imm", @@ -2510,7 +2520,7 @@ def MVNs : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg:$shift), DPSoRegFrm, let Inst{15-12} = Rd; let Inst{11-0} = shift; } -let isReMaterializable = 1, isAsCheapAsAMove = 1 in +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMVNi, "mvn", "\t$Rd, $imm", [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP { @@ -2972,10 +2982,10 @@ defm CMP : AI1_cmp_irs<0b1010, "cmp", // Note that TST/TEQ don't set all the same flags that CMP does! defm TST : AI1_cmp_irs<0b1000, "tst", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, - BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>, 1>; + BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>; defm TEQ : AI1_cmp_irs<0b1001, "teq", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr, - BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>, 1>; + BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>; defm CMPz : AI1_cmp_irs<0b1010, "cmp", IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr, @@ -3038,6 +3048,7 @@ def MOVCCs : AI1<0b1101, (outs GPR:$Rd), let Inst{11-0} = shift; } +let isMoveImm = 1 in def MOVCCi16 : AI1<0b1000, (outs GPR:$Rd), (ins GPR:$false, i32imm:$imm), DPFrm, IIC_iMOVi, "movw", "\t$Rd, $imm", @@ -3053,6 +3064,7 @@ def MOVCCi16 : AI1<0b1000, (outs GPR:$Rd), (ins GPR:$false, i32imm:$imm), let Inst{11-0} = imm{11-0}; } +let isMoveImm = 1 in def MOVCCi : AI1<0b1101, (outs GPR:$Rd), (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi, "mov", "\t$Rd, $imm", @@ -3068,10 +3080,12 @@ def MOVCCi : AI1<0b1101, (outs GPR:$Rd), } // Two instruction predicate mov immediate. +let isMoveImm = 1 in def MOVCCi32imm : PseudoInst<(outs GPR:$Rd), (ins GPR:$false, i32imm:$src, pred:$p), IIC_iCMOVix2, "", []>, RegConstraint<"$false = $Rd">; +let isMoveImm = 1 in def MVNCCi : AI1<0b1111, (outs GPR:$Rd), (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi, "mvn", "\t$Rd, $imm", @@ -3337,27 +3351,11 @@ def Int_eh_sjlj_dispatchsetup : // Large immediate handling. -// FIXME: Folding immediates into these logical operations aren't necessary -// good ideas. If it's in a loop machine licm could have hoisted the immediate -// computation out of the loop. -def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS), - (ORRri (ORRri GPR:$LHS, (so_imm2part_1 imm:$RHS)), - (so_imm2part_2 imm:$RHS))>; -def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS), - (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)), - (so_imm2part_2 imm:$RHS))>; -def : ARMPat<(add GPR:$LHS, so_imm2part:$RHS), - (ADDri (ADDri GPR:$LHS, (so_imm2part_1 imm:$RHS)), - (so_imm2part_2 imm:$RHS))>; -def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS), - (SUBri (SUBri GPR:$LHS, (so_neg_imm2part_1 imm:$RHS)), - (so_neg_imm2part_2 imm:$RHS))>; - // 32-bit immediate using two piece so_imms or movw + movt. // This is a single pseudo instruction, the benefit is that it can be remat'd // as a single unit instead of having to handle reg inputs. // FIXME: Remove this when we can do generalized remat. -let isReMaterializable = 1 in +let isReMaterializable = 1, isMoveImm = 1 in def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, "", [(set GPR:$dst, (arm_i32imm:$src))]>, Requires<[IsARM]>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index d7a22a48424..253b541c192 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -711,6 +711,7 @@ def tLSRrr : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMOVsr, T1DataProcessing<0b0011>; // move register +let isMoveImm = 1 in def tMOVi8 : T1sI<(outs tGPR:$dst), (ins i32imm:$src), IIC_iMOVi, "mov", "\t$dst, $src", [(set tGPR:$dst, imm0_255:$src)]>, @@ -844,7 +845,7 @@ def tSXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr, let isCompare = 1, isCommutable = 1, Defs = [CPSR] in def tTST : T1pI<(outs), (ins tGPR:$lhs, tGPR:$rhs), IIC_iTSTr, "tst", "\t$lhs, $rhs", - [(ARMcmpZ (and tGPR:$lhs, tGPR:$rhs), 0)]>, + [(ARMcmpZ (and_su tGPR:$lhs, tGPR:$rhs), 0)]>, T1DataProcessing<0b1000>; // zero-extend byte @@ -877,6 +878,7 @@ def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr, "mov", "\t$dst, $rhs", []>, T1Special<{1,0,?,?}>; +let isMoveImm = 1 in def tMOVCCi : T1pIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMOVi, "mov", "\t$dst, $rhs", []>, T1General<{1,0,0,?,?}>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index fd7fb441492..ace133339e3 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1591,7 +1591,8 @@ def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr, } // AddedComplexity to ensure isel tries t2MOVi before t2MOVi16. -let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, + AddedComplexity = 1 in def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi, "mov", ".w\t$Rd, $imm", [(set rGPR:$Rd, t2_so_imm:$imm)]> { @@ -1603,7 +1604,7 @@ def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi, let Inst{15} = 0; } -let isReMaterializable = 1, isAsCheapAsAMove = 1 in +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm:$imm), IIC_iMOVi, "movw", "\t$Rd, $imm", [(set rGPR:$Rd, imm0_65535:$imm)]> { @@ -2519,10 +2520,10 @@ def : T2Pat<(ARMcmpZ GPR:$src, t2_so_imm_neg:$imm), defm t2TST : T2I_cmp_irs<0b0000, "tst", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, - BinOpFrag<(ARMcmpZ (and node:$LHS, node:$RHS), 0)>>; + BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>; defm t2TEQ : T2I_cmp_irs<0b0100, "teq", IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi, - BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>; + BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>; // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use @@ -2541,6 +2542,7 @@ def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr, let Inst{7-4} = 0b0000; } +let isMoveImm = 1 in def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true), IIC_iCMOVi, "mov", ".w\t$dst, $true", [/*(set rGPR:$dst,(ARMcmov rGPR:$false,t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>, @@ -2553,6 +2555,7 @@ def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true), let Inst{15} = 0; } +let isMoveImm = 1 in def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm:$imm), IIC_iCMOVi, "movw", "\t$Rd, $imm", []>, @@ -2573,10 +2576,12 @@ def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm:$imm), let Inst{7-0} = imm{7-0}; } +let isMoveImm = 1 in def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst), (ins rGPR:$false, i32imm:$src, pred:$p), IIC_iCMOVix2, "", []>, RegConstraint<"$false = $dst">; +let isMoveImm = 1 in def t2MVNCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true), IIC_iCMOVi, "mvn", ".w\t$dst, $true", [/*(set rGPR:$dst,(ARMcmov rGPR:$false,t2_so_imm_not:$true, @@ -3014,24 +3019,10 @@ def t2RFEIA : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base", // Non-Instruction Patterns // -// Two piece so_imms. -def : T2Pat<(or rGPR:$LHS, t2_so_imm2part:$RHS), - (t2ORRri (t2ORRri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), - (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(xor rGPR:$LHS, t2_so_imm2part:$RHS), - (t2EORri (t2EORri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), - (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(add rGPR:$LHS, t2_so_imm2part:$RHS), - (t2ADDri (t2ADDri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), - (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(add rGPR:$LHS, t2_so_neg_imm2part:$RHS), - (t2SUBri (t2SUBri rGPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)), - (t2_so_neg_imm2part_2 imm:$RHS))>; - // 32-bit immediate using movw + movt. // This is a single pseudo instruction to make it re-materializable. // FIXME: Remove this when we can do generalized remat. -let isReMaterializable = 1 in +let isReMaterializable = 1, isMoveImm = 1 in def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2, "", [(set rGPR:$dst, (i32 imm:$src))]>, Requires<[IsThumb, HasV6T2]>; diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll index 461150f06d7..50c638b7393 100644 --- a/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -27,9 +27,8 @@ tailrecurse: ; preds = %sw.bb, %entry ; ARM-NEXT: beq ; THUMB: movs r5, #3 -; THUMB-NEXT: mov r6, r4 -; THUMB-NEXT: ands r6, r5 -; THUMB-NEXT: tst r4, r5 +; THUMB-NEXT: ands r5, r4 +; THUMB-NEXT: cmp r5, #0 ; THUMB-NEXT: beq ; T2: ands r12, r12, #3 diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll index 21bc5fa8279..5dabfc3a82a 100644 --- a/test/CodeGen/ARM/select_xform.ll +++ b/test/CodeGen/ARM/select_xform.ll @@ -8,7 +8,8 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; ARM: movgt r0, r1 ; T2: t1: -; T2: sub.w r0, r1, #-2147483648 +; T2: mvn r0, #-2147483648 +; T2: add r0, r1 ; T2: movgt r0, r1 %tmp1 = icmp sgt i32 %c, 10 %tmp2 = select i1 %tmp1, i32 0, i32 2147483647 diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll index 14d04a4d8f8..37a15ff7b52 100644 --- a/test/CodeGen/Thumb2/machine-licm.ll +++ b/test/CodeGen/Thumb2/machine-licm.ll @@ -77,10 +77,49 @@ bb2: } ; CHECK-NOT: LCPI1_0: -; CHECK: .subsections_via_symbols declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone + +; rdar://8241368 +; isel should not fold immediate into eor's which would have prevented LICM. +define zeroext i16 @t3(i8 zeroext %data, i16 zeroext %crc) nounwind readnone { +; CHECK: t3: +bb.nph: +; CHECK: bb.nph +; CHECK: movw {{(r[0-9])|(lr)}}, #32768 +; CHECK: movs {{(r[0-9])|(lr)}}, #8 +; CHECK: movw [[REGISTER:(r[0-9])|(lr)]], #16386 +; CHECK: movw {{(r[0-9])|(lr)}}, #65534 +; CHECK: movt {{(r[0-9])|(lr)}}, #65535 + br label %bb + +bb: ; preds = %bb, %bb.nph +; CHECK: bb +; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]] +; CHECK: eor.w +; CHECK-NOT: eor +; CHECK: and + %data_addr.013 = phi i8 [ %data, %bb.nph ], [ %8, %bb ] ; [#uses=2] + %crc_addr.112 = phi i16 [ %crc, %bb.nph ], [ %crc_addr.2, %bb ] ; [#uses=3] + %i.011 = phi i8 [ 0, %bb.nph ], [ %7, %bb ] ; [#uses=1] + %0 = trunc i16 %crc_addr.112 to i8 ; [#uses=1] + %1 = xor i8 %data_addr.013, %0 ; [#uses=1] + %2 = and i8 %1, 1 ; [#uses=1] + %3 = icmp eq i8 %2, 0 ; [#uses=2] + %4 = xor i16 %crc_addr.112, 16386 ; [#uses=1] + %crc_addr.0 = select i1 %3, i16 %crc_addr.112, i16 %4 ; [#uses=1] + %5 = lshr i16 %crc_addr.0, 1 ; [#uses=2] + %6 = or i16 %5, -32768 ; [#uses=1] + %crc_addr.2 = select i1 %3, i16 %5, i16 %6 ; [#uses=2] + %7 = add i8 %i.011, 1 ; [#uses=2] + %8 = lshr i8 %data_addr.013, 1 ; [#uses=1] + %exitcond = icmp eq i8 %7, 8 ; [#uses=1] + br i1 %exitcond, label %bb8, label %bb + +bb8: ; preds = %bb + ret i16 %crc_addr.2 +} diff --git a/test/CodeGen/Thumb2/thumb2-mov.ll b/test/CodeGen/Thumb2/thumb2-mov.ll index 1dc3614993b..adb6dde2c78 100644 --- a/test/CodeGen/Thumb2/thumb2-mov.ll +++ b/test/CodeGen/Thumb2/thumb2-mov.ll @@ -53,7 +53,7 @@ define i32 @t2_const_var2_2_ok_1(i32 %lhs) { define i32 @t2_const_var2_2_ok_2(i32 %lhs) { ;CHECK: t2_const_var2_2_ok_2: -;CHECK: add.w r0, r0, #-1426063360 +;CHECK: add.w r0, r0, #2868903936 ;CHECK: add.w r0, r0, #47616 %ret = add i32 %lhs, 2868951552 ; 0xab00ba00 ret i32 %ret @@ -61,7 +61,7 @@ define i32 @t2_const_var2_2_ok_2(i32 %lhs) { define i32 @t2_const_var2_2_ok_3(i32 %lhs) { ;CHECK: t2_const_var2_2_ok_3: -;CHECK: add.w r0, r0, #-1426019584 +;CHECK: add.w r0, r0, #2868947712 ;CHECK: adds r0, #16 %ret = add i32 %lhs, 2868947728 ; 0xab00ab10 ret i32 %ret @@ -69,7 +69,7 @@ define i32 @t2_const_var2_2_ok_3(i32 %lhs) { define i32 @t2_const_var2_2_ok_4(i32 %lhs) { ;CHECK: t2_const_var2_2_ok_4: -;CHECK: add.w r0, r0, #-1426019584 +;CHECK: add.w r0, r0, #2868947712 ;CHECK: add.w r0, r0, #1048592 %ret = add i32 %lhs, 2869996304 ; 0xab10ab10 ret i32 %ret diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll index 56cb1f6fb40..ceefabbbfa2 100644 --- a/test/CodeGen/Thumb2/thumb2-select_xform.ll +++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll @@ -2,8 +2,8 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK: t1 -; CHECK: sub.w r0, r1, #-2147483648 -; CHECK: subs r0, #1 +; CHECK: mvn r0, #-2147483648 +; CHECK: add r0, r1 ; CHECK: cmp r2, #10 ; CHECK: it gt ; CHECK: movgt r0, r1 diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp index 575852b1ad7..a28b1d58d7e 100644 --- a/utils/TableGen/CodeGenInstruction.cpp +++ b/utils/TableGen/CodeGenInstruction.cpp @@ -286,6 +286,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R) : TheDef(R), Operands(R) { isBranch = R->getValueAsBit("isBranch"); isIndirectBranch = R->getValueAsBit("isIndirectBranch"); isCompare = R->getValueAsBit("isCompare"); + isMoveImm = R->getValueAsBit("isMoveImm"); isBarrier = R->getValueAsBit("isBarrier"); isCall = R->getValueAsBit("isCall"); canFoldAsLoad = R->getValueAsBit("canFoldAsLoad"); diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h index 9cd23e6fa82..d58bfb12968 100644 --- a/utils/TableGen/CodeGenInstruction.h +++ b/utils/TableGen/CodeGenInstruction.h @@ -213,6 +213,7 @@ namespace llvm { bool isBranch; bool isIndirectBranch; bool isCompare; + bool isMoveImm; bool isBarrier; bool isCall; bool canFoldAsLoad; diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp index e04ab6c4ef9..2b684bede3e 100644 --- a/utils/TableGen/InstrInfoEmitter.cpp +++ b/utils/TableGen/InstrInfoEmitter.cpp @@ -271,6 +271,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num, if (Inst.isBranch) OS << "|(1<