diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 335ceae2dbf..1406748e317 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -36,16 +36,17 @@ #include using namespace llvm; -STATISTIC(NumSpills, "Number of register spills"); -STATISTIC(NumPSpills,"Number of physical register spills"); -STATISTIC(NumReMats, "Number of re-materialization"); -STATISTIC(NumDRM , "Number of re-materializable defs elided"); -STATISTIC(NumStores, "Number of stores added"); -STATISTIC(NumLoads , "Number of loads added"); -STATISTIC(NumReused, "Number of values reused"); -STATISTIC(NumDSE , "Number of dead stores elided"); -STATISTIC(NumDCE , "Number of copies elided"); -STATISTIC(NumDSS , "Number of dead spill slots removed"); +STATISTIC(NumSpills , "Number of register spills"); +STATISTIC(NumPSpills ,"Number of physical register spills"); +STATISTIC(NumReMats , "Number of re-materialization"); +STATISTIC(NumDRM , "Number of re-materializable defs elided"); +STATISTIC(NumStores , "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumReused , "Number of values reused"); +STATISTIC(NumDSE , "Number of dead stores elided"); +STATISTIC(NumDCE , "Number of copies elided"); +STATISTIC(NumDSS , "Number of dead spill slots removed"); +STATISTIC(NumCommutes, "Number of instructions commuted"); namespace { enum SpillerName { simple, local }; @@ -356,6 +357,13 @@ namespace { AvailableSpills &Spills, BitVector &RegKills, std::vector &KillOps, VirtRegMap &VRM); + bool CommuteToFoldReload(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + unsigned VirtReg, unsigned SrcReg, int SS, + BitVector &RegKills, + std::vector &KillOps, + const TargetRegisterInfo *TRI, + VirtRegMap &VRM); void SpillRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MII, int Idx, unsigned PhysReg, int StackSlot, @@ -874,12 +882,12 @@ namespace { /// This enables unfolding optimization for a subsequent instruction which will /// also eliminate the newly introduced store instruction. bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MII, + MachineBasicBlock::iterator &MII, std::vector &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps, - VirtRegMap &VRM) { + AvailableSpills &Spills, + BitVector &RegKills, + std::vector &KillOps, + VirtRegMap &VRM) { MachineFunction &MF = *MBB.getParent(); MachineInstr &MI = *MII; unsigned UnfoldedOpc = 0; @@ -971,6 +979,97 @@ bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB, return false; } +/// CommuteToFoldReload - +/// Look for +/// r1 = load fi#1 +/// r1 = op r1, r2 +/// store r1, fi#1 +/// +/// If op is commutable and r2 is killed, then we can xform these to +/// r2 = op r2, fi#1 +/// store r2, fi#1 +bool LocalSpiller::CommuteToFoldReload(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + unsigned VirtReg, unsigned SrcReg, int SS, + BitVector &RegKills, + std::vector &KillOps, + const TargetRegisterInfo *TRI, + VirtRegMap &VRM) { + if (MII == MBB.begin() || !MII->killsRegister(SrcReg)) + return false; + + MachineFunction &MF = *MBB.getParent(); + MachineInstr &MI = *MII; + MachineBasicBlock::iterator DefMII = prior(MII); + MachineInstr *DefMI = DefMII; + const TargetInstrDesc &TID = DefMI->getDesc(); + unsigned NewDstIdx; + if (DefMII != MBB.begin() && + TID.isCommutable() && + TII->CommuteChangesDestination(DefMI, NewDstIdx)) { + MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); + unsigned NewReg = NewDstMO.getReg(); + if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg)) + return false; + MachineInstr *ReloadMI = prior(DefMII); + int FrameIdx; + unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx); + if (DestReg != SrcReg || FrameIdx != SS) + return false; + int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false); + if (UseIdx == -1) + return false; + int DefIdx = TID.getOperandConstraint(UseIdx, TOI::TIED_TO); + if (DefIdx == -1) + return false; + assert(DefMI->getOperand(DefIdx).isRegister() && + DefMI->getOperand(DefIdx).getReg() == SrcReg); + + // Now commute def instruction. + MachineInstr *CommutedMI = TII->commuteInstruction(DefMI); + if (!CommutedMI) + return false; + SmallVector Ops; + Ops.push_back(NewDstIdx); + MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS); + if (!FoldedMI) { + if (CommutedMI == DefMI) + TII->commuteInstruction(CommutedMI); + else + MBB.erase(CommutedMI); + return false; + } + + VRM.addSpillSlotUse(SS, FoldedMI); + VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); + // Insert new def MI and spill MI. + const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg); + TII->storeRegToStackSlot(MBB, MI, NewReg, true, SS, RC); + MII = prior(MII); + MachineInstr *StoreMI = MII; + VRM.addSpillSlotUse(SS, StoreMI); + VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); + MII = MBB.insert(MII, FoldedMI); // Update MII to backtrack. + + // Delete all 3 old instructions. + InvalidateKills(MI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(&MI); + MBB.erase(&MI); + if (CommutedMI != DefMI) + MBB.erase(CommutedMI); + InvalidateKills(*DefMI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(DefMI); + MBB.erase(DefMI); + InvalidateKills(*ReloadMI, RegKills, KillOps); + VRM.RemoveMachineInstrFromMaps(ReloadMI); + MBB.erase(ReloadMI); + ++NumCommutes; + return true; + } + + return false; +} + /// findSuperReg - Find the SubReg's super-register of given register class /// where its SubIdx sub-register is SubReg. static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg, @@ -1587,15 +1686,23 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) { assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && "Src hasn't been allocated yet?"); + + if (CommuteToFoldReload(MBB, MII, VirtReg, SrcReg, StackSlot, + RegKills, KillOps, TRI, VRM)) { + NextMII = next(MII); + BackTracked = true; + goto ProcessNextInst; + } + // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark // this as a potentially dead store in case there is a subsequent // store into the stack slot without a read from it. MaybeDeadStores[StackSlot] = &MI; // If the stack slot value was previously available in some other - // register, change it now. Otherwise, make the register available, - // in PhysReg. - Spills.addAvailable(StackSlot, &MI, SrcReg, false/*don't clobber*/); + // register, change it now. Otherwise, make the register + // available in PhysReg. + Spills.addAvailable(StackSlot, &MI, SrcReg, false/*!clobber*/); } } } diff --git a/test/CodeGen/X86/2008-06-13-SpillerCommuting.ll b/test/CodeGen/X86/2008-06-13-SpillerCommuting.ll new file mode 100644 index 00000000000..5183e5b0c05 --- /dev/null +++ b/test/CodeGen/X86/2008-06-13-SpillerCommuting.ll @@ -0,0 +1,42 @@ +; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -relocation-model=pic -stats |& grep {spiller - Number of instructions commuted} + + %struct.CABAC_context_element = type { i8, i8 } + %struct.MB_Info_CABAC = type { i8, i8, [2 x i8], i8, i8, i8, i16, i16, [4 x i8], [8 x %struct.MotionVector] } + %struct.MotionVector = type { i16, i16 } + %struct.RBSP2 = type { i32, i32, i16, i8, i16, i16, <1 x i64>, i32, i32, i32*, i32*, i32*, i32*, i32, i32, i32, i32, i32, i8, i16, i8, %struct.MB_Info_CABAC*, %struct.MB_Info_CABAC*, [2 x %struct.MB_Info_CABAC], [12 x i8], [460 x %struct.CABAC_context_element], [10 x i8], [10 x i8], [10 x i16], [4 x [120 x i32]], [15 x [36 x i8]], [6 x [8 x i8]], i16* } + %struct.Slice_Info = type { i32, i8, %struct.seq_parameter_set_rbsp_t*, %struct.seq_parameter_set_rbsp_t, i32, i16*, i8, i8, i8, i8, i16, i32 } + %struct.seq_parameter_set_rbsp_t = type { i32, i32, i32 } +@_ZL21CABAC_CTX_state_table = external constant [64 x i16] ; <[64 x i16]*> [#uses=1] +@_ZL15rLPS_table_64x4 = external constant [64 x [4 x i8]] ; <[64 x [4 x i8]]*> [#uses=1] + +define i32 @_ZN5RBSP220residual_block_cabacEP10Slice_InfoP13MB_Info_CABACS3_hjhhbPtPs(%struct.RBSP2* %this, %struct.Slice_Info* %slice, %struct.MB_Info_CABAC* %up, %struct.MB_Info_CABAC* %left, i8 zeroext %maxNumCoeff, i32 %blk_i, i8 zeroext %iCbCr, i8 zeroext %ctxBlockCat, i8 zeroext %intra_flag, i16* %mask, i16* %res) nounwind { +entry: + %tmp43.i1590 = getelementptr %struct.RBSP2* %this, i32 0, i32 0 ; [#uses=1] + br label %bb803 + +bb803: ; preds = %_ZN5RBSP211decode_1bitEP21CABAC_context_element.exit1581, %entry + %numCoeff.11749 = phi i32 [ 0, %entry ], [ %numCoeff.11749.tmp868, %_ZN5RBSP211decode_1bitEP21CABAC_context_element.exit1581 ] ; [#uses=1] + %tmp28.i1503 = load i8* null, align 1 ; [#uses=1] + %tmp30.i1504 = getelementptr %struct.RBSP2* %this, i32 0, i32 25, i32 0, i32 0 ; [#uses=2] + %tmp31.i1505 = load i8* %tmp30.i1504, align 1 ; [#uses=1] + %tmp3233.i1506 = zext i8 %tmp31.i1505 to i32 ; [#uses=2] + %tmp35.i1507 = getelementptr [64 x i16]* @_ZL21CABAC_CTX_state_table, i32 0, i32 %tmp3233.i1506 ; [#uses=1] + %tmp36.i1508 = load i16* %tmp35.i1507, align 2 ; [#uses=1] + %tmp363738.i1509 = zext i16 %tmp36.i1508 to i32 ; [#uses=1] + %tmp51.i1514 = getelementptr [64 x [4 x i8]]* @_ZL15rLPS_table_64x4, i32 0, i32 %tmp3233.i1506, i32 0 ; [#uses=1] + %tmp52.i1515 = load i8* %tmp51.i1514, align 1 ; [#uses=1] + %tmp5758.i1516 = zext i8 %tmp52.i1515 to i32 ; [#uses=1] + %tmp60.i1517 = sub i32 0, %tmp5758.i1516 ; [#uses=1] + store i32 %tmp60.i1517, i32* %tmp43.i1590, align 16 + br i1 false, label %_ZN5RBSP211decode_1bitEP21CABAC_context_element.exit1581, label %bb.i1537 + +bb.i1537: ; preds = %bb803 + unreachable + +_ZN5RBSP211decode_1bitEP21CABAC_context_element.exit1581: ; preds = %bb803 + %tmp328329.i1580 = trunc i32 %tmp363738.i1509 to i8 ; [#uses=1] + store i8 %tmp328329.i1580, i8* %tmp30.i1504, align 1 + %toBool865 = icmp eq i8 %tmp28.i1503, 0 ; [#uses=1] + %numCoeff.11749.tmp868 = select i1 %toBool865, i32 %numCoeff.11749, i32 0 ; [#uses=1] + br label %bb803 +}