//===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // /// \file //===----------------------------------------------------------------------===// // #include "AMDGPU.h" #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "si-fold-operands" using namespace llvm; namespace { class SIFoldOperands : public MachineFunctionPass { public: static char ID; public: SIFoldOperands() : MachineFunctionPass(ID) { initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; const char *getPassName() const override { return "SI Fold Operands"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } }; } // End anonymous namespace. INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE, "SI Fold Operands", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE, "SI Fold Operands", false, false) char SIFoldOperands::ID = 0; char &llvm::SIFoldOperandsID = SIFoldOperands::ID; FunctionPass *llvm::createSIFoldOperandsPass() { return new SIFoldOperands(); } static bool isSafeToFold(unsigned Opcode) { switch(Opcode) { case AMDGPU::V_MOV_B32_e32: case AMDGPU::V_MOV_B32_e64: case AMDGPU::S_MOV_B32: case AMDGPU::S_MOV_B64: case AMDGPU::COPY: return true; default: return false; } } static bool updateOperand(MachineInstr *MI, unsigned OpNo, const MachineOperand &New, const TargetRegisterInfo &TRI) { MachineOperand &Old = MI->getOperand(OpNo); assert(Old.isReg()); if (New.isImm()) { Old.ChangeToImmediate(New.getImm()); return true; } if (New.isFPImm()) { Old.ChangeToFPImmediate(New.getFPImm()); return true; } if (New.isReg()) { if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) && TargetRegisterInfo::isVirtualRegister(New.getReg())) { Old.substVirtReg(New.getReg(), New.getSubReg(), TRI); return true; } } // FIXME: Handle physical registers. return false; } bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = static_cast(MF.getSubtarget().getInstrInfo()); const SIRegisterInfo &TRI = TII->getRegisterInfo(); for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE; ++BI) { MachineBasicBlock &MBB = *BI; MachineBasicBlock::iterator I, Next; for (I = MBB.begin(); I != MBB.end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; if (!isSafeToFold(MI.getOpcode())) continue; MachineOperand &OpToFold = MI.getOperand(1); // FIXME: Fold operands with subregs. if (OpToFold.isReg() && (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) || OpToFold.getSubReg())) continue; std::vector> FoldList; for (MachineRegisterInfo::use_iterator Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end(); Use != E; ++Use) { MachineInstr *UseMI = Use->getParent(); const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo()); // FIXME: Fold operands with subregs. if (UseOp.isReg() && UseOp.getSubReg()) { continue; } // In order to fold immediates into copies, we need to change the // copy to a MOV. if ((OpToFold.isImm() || OpToFold.isFPImm()) && UseMI->getOpcode() == AMDGPU::COPY) { const TargetRegisterClass *TRC = MRI.getRegClass(UseMI->getOperand(0).getReg()); if (TRC->getSize() == 4) { if (TRI.isSGPRClass(TRC)) UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32)); else UseMI->setDesc(TII->get(AMDGPU::V_MOV_B32_e32)); } else if (TRC->getSize() == 8 && TRI.isSGPRClass(TRC)) { UseMI->setDesc(TII->get(AMDGPU::S_MOV_B64)); } else { continue; } } const MCInstrDesc &UseDesc = UseMI->getDesc(); // Don't fold into target independent nodes. Target independent opcodes // don't have defined register classes. if (UseDesc.isVariadic() || UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1) continue; // Normal substitution if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &OpToFold)) { FoldList.push_back(std::make_pair(UseMI, Use.getOperandNo())); continue; } // FIXME: We could commute the instruction to create more opportunites // for folding. This will only be useful if we have 32-bit instructions. // FIXME: We could try to change the instruction from 64-bit to 32-bit // to enable more folding opportunites. The shrink operands pass // already does this. } for (std::pair Fold : FoldList) { if (updateOperand(Fold.first, Fold.second, OpToFold, TRI)) { // Clear kill flags. if (OpToFold.isReg()) OpToFold.setIsKill(false); DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " << Fold.second << " of " << *Fold.first << '\n'); } } } } return false; }