diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 0223febe967..278de0269b6 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -295,6 +295,10 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printAndVerify(PM, "After Instruction Selection", /* allowDoubleDefs= */ true); + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + if (OptLevel != CodeGenOpt::None) + PM.add(createOptimizePHIsPass()); // Delete dead machine instructions regardless of optimization level. PM.add(createDeadMachineInstructionElimPass()); @@ -303,7 +307,6 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None) { PM.add(createOptimizeExtsPass()); - PM.add(createOptimizePHIsPass()); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); if (!DisableMachineSink) diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 5b3fa6a68ab..2717d4d5cef 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -19,11 +19,12 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Function.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumPHICycles, "Number of PHI cycles replaced"); +STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles"); namespace { class OptimizePHIs : public MachineFunctionPass { @@ -42,9 +43,13 @@ namespace { } private: - bool IsSingleValuePHICycle(const MachineInstr *MI, unsigned &SingleValReg, - SmallSet &RegsInCycle); - bool ReplacePHICycles(MachineBasicBlock &MBB); + typedef SmallPtrSet InstrSet; + typedef SmallPtrSetIterator InstrSetIterator; + + bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg, + InstrSet &PHIsInCycle); + bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle); + bool OptimizeBB(MachineBasicBlock &MBB); }; } @@ -58,12 +63,13 @@ bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { MRI = &Fn.getRegInfo(); TII = Fn.getTarget().getInstrInfo(); - // Find PHI cycles that can be replaced by a single value. InstCombine - // does this, but DAG legalization may introduce new opportunities, e.g., - // when i64 values are split up for 32-bit targets. + // Find dead PHI cycles and PHI cycles that can be replaced by a single + // value. InstCombine does these optimizations, but DAG legalization may + // introduce new opportunities, e.g., when i64 values are split up for + // 32-bit targets. bool Changed = false; for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) - Changed |= ReplacePHICycles(*I); + Changed |= OptimizeBB(*I); return Changed; } @@ -71,20 +77,20 @@ bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { /// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands /// are copies of SingleValReg, possibly via copies through other PHIs. If /// SingleValReg is zero on entry, it is set to the register with the single -/// non-copy value. RegsInCycle is a set used to keep track of the PHIs that +/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that /// have been scanned. -bool OptimizePHIs::IsSingleValuePHICycle(const MachineInstr *MI, +bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg, - SmallSet &RegsInCycle) { + InstrSet &PHIsInCycle) { assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction"); unsigned DstReg = MI->getOperand(0).getReg(); // See if we already saw this register. - if (!RegsInCycle.insert(DstReg)) + if (!PHIsInCycle.insert(MI)) return true; // Don't scan crazily complex things. - if (RegsInCycle.size() == 16) + if (PHIsInCycle.size() == 16) return false; // Scan the PHI operands. @@ -92,7 +98,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(const MachineInstr *MI, unsigned SrcReg = MI->getOperand(i).getReg(); if (SrcReg == DstReg) continue; - const MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); // Skip over register-to-register moves. unsigned MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx; @@ -105,7 +111,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(const MachineInstr *MI, return false; if (SrcMI->isPHI()) { - if (!IsSingleValuePHICycle(SrcMI, SingleValReg, RegsInCycle)) + if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle)) return false; } else { // Fail if there is more than one non-phi/non-move register. @@ -117,9 +123,35 @@ bool OptimizePHIs::IsSingleValuePHICycle(const MachineInstr *MI, return true; } -/// ReplacePHICycles - Find PHI cycles that can be replaced by a single -/// value and remove them. -bool OptimizePHIs::ReplacePHICycles(MachineBasicBlock &MBB) { +/// IsDeadPHICycle - Check if the register defined by a PHI is only used by +/// other PHIs in a cycle. +bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { + assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction"); + unsigned DstReg = MI->getOperand(0).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + "PHI destination is not a virtual register"); + + // See if we already saw this register. + if (!PHIsInCycle.insert(MI)) + return true; + + // Don't scan crazily complex things. + if (PHIsInCycle.size() == 16) + return false; + + for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg), + E = MRI->use_end(); I != E; ++I) { + MachineInstr *UseMI = &*I; + if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle)) + return false; + } + + return true; +} + +/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by +/// a single value. +bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { bool Changed = false; for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MII != E; ) { @@ -127,14 +159,30 @@ bool OptimizePHIs::ReplacePHICycles(MachineBasicBlock &MBB) { if (!MI->isPHI()) break; + // Check for single-value PHI cycles. unsigned SingleValReg = 0; - SmallSet RegsInCycle; - if (IsSingleValuePHICycle(MI, SingleValReg, RegsInCycle) && + InstrSet PHIsInCycle; + if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) && SingleValReg != 0) { MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg); MI->eraseFromParent(); ++NumPHICycles; Changed = true; + continue; + } + + // Check for dead PHI cycles. + PHIsInCycle.clear(); + if (IsDeadPHICycle(MI, PHIsInCycle)) { + for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end(); + PI != PE; ++PI) { + MachineInstr *PhiMI = *PI; + if (&*MII == PhiMI) + ++MII; + PhiMI->eraseFromParent(); + } + ++NumDeadPHICycles; + Changed = true; } } return Changed; diff --git a/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll b/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll index 997257885e6..0f23ee75683 100644 --- a/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll +++ b/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll @@ -29,6 +29,44 @@ return: ; preds = %bb, %entry ret i32 undef } +define arm_apcscc i32 @test_dead_cycle(i32 %n) nounwind { +; CHECK: test_dead_cycle: +; CHECK: blx +; CHECK-NOT: mov +; CHECK: blx +entry: + %0 = icmp eq i32 %n, 1 ; [#uses=1] + br i1 %0, label %return, label %bb.nph + +bb.nph: ; preds = %entry + %tmp = add i32 %n, -1 ; [#uses=2] + br label %bb + +bb: ; preds = %bb.nph, %bb2 + %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb2 ] ; [#uses=2] + %u.17 = phi i64 [ undef, %bb.nph ], [ %u.0, %bb2 ] ; [#uses=2] + %tmp9 = sub i32 %tmp, %indvar ; [#uses=1] + %1 = icmp sgt i32 %tmp9, 1 ; [#uses=1] + br i1 %1, label %bb1, label %bb2 + +bb1: ; preds = %bb + %2 = tail call arm_apcscc i32 @f() nounwind ; [#uses=1] + %tmp6 = zext i32 %2 to i64 ; [#uses=1] + %mask = and i64 %u.17, -4294967296 ; [#uses=1] + %ins = or i64 %tmp6, %mask ; [#uses=1] + tail call arm_apcscc void @g(i64 %ins) nounwind + br label %bb2 + +bb2: ; preds = %bb1, %bb + %u.0 = phi i64 [ %ins, %bb1 ], [ %u.17, %bb ] ; [#uses=2] + %indvar.next = add i32 %indvar, 1 ; [#uses=2] + %exitcond = icmp eq i32 %indvar.next, %tmp ; [#uses=1] + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb2, %entry + ret i32 undef +} + declare arm_apcscc i32 @f() declare arm_apcscc void @g(i64) diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll index 79351637618..ff178b42fb3 100644 --- a/test/CodeGen/Thumb2/thumb2-spill-q.ll +++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll @@ -12,8 +12,8 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly define arm_apcscc void @aaa(%quuz* %this, i8* %block) { ; CHECK: aaa: ; CHECK: bic r4, r4, #15 -; CHECK: vst1.64 {{.*}}[r{{.*}}, :128] -; CHECK: vld1.64 {{.*}}[r{{.*}}, :128] +; CHECK: vst1.64 {{.*}}[{{.*}}, :128] +; CHECK: vld1.64 {{.*}}[{{.*}}, :128] entry: %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] store float 6.300000e+01, float* undef, align 4 diff --git a/test/CodeGen/X86/pre-split8.ll b/test/CodeGen/X86/pre-split8.ll index ea4b9496b3c..0684bd036ce 100644 --- a/test/CodeGen/X86/pre-split8.ll +++ b/test/CodeGen/X86/pre-split8.ll @@ -20,7 +20,7 @@ bb: ; preds = %bb9.i, %entry bb9.i: ; preds = %bb %2 = fsub double %.rle4, %0 ; [#uses=0] - %3 = tail call double @asin(double 0.000000e+00) nounwind readonly ; [#uses=0] + %3 = tail call double @asin(double %.rle4) nounwind readonly ; [#uses=0] %4 = fmul double 0.000000e+00, %0 ; [#uses=1] %5 = tail call double @tan(double 0.000000e+00) nounwind readonly ; [#uses=0] %6 = fmul double %4, 0.000000e+00 ; [#uses=1] diff --git a/test/CodeGen/X86/pre-split9.ll b/test/CodeGen/X86/pre-split9.ll index c27d925d43e..86dda33533f 100644 --- a/test/CodeGen/X86/pre-split9.ll +++ b/test/CodeGen/X86/pre-split9.ll @@ -22,7 +22,7 @@ bb: ; preds = %bb9.i, %entry bb9.i: ; preds = %bb %2 = fsub double %.rle4, %0 ; [#uses=0] - %3 = tail call double @asin(double 0.000000e+00) nounwind readonly ; [#uses=0] + %3 = tail call double @asin(double %.rle4) nounwind readonly ; [#uses=0] %4 = tail call double @sin(double 0.000000e+00) nounwind readonly ; [#uses=1] %5 = fmul double %4, %0 ; [#uses=1] %6 = tail call double @tan(double 0.000000e+00) nounwind readonly ; [#uses=0]