mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-11-04 05:17:07 +00:00 
			
		
		
		
	The two-address instruction pass will convert these back to v_mad_f32 if necessary. Differential Revision: http://reviews.llvm.org/D11060 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@242038 91177308-0d34-0410-b5e6-96231b3b80d8
		
			
				
	
	
		
			320 lines
		
	
	
		
			9.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			320 lines
		
	
	
		
			9.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
//===-- SIFoldOperands.cpp - Fold operands --- ----------------------------===//
 | 
						|
//
 | 
						|
//                     The LLVM Compiler Infrastructure
 | 
						|
//
 | 
						|
// This file is distributed under the University of Illinois Open Source
 | 
						|
// License. See LICENSE.TXT for details.
 | 
						|
//
 | 
						|
/// \file
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
//
 | 
						|
 | 
						|
#include "AMDGPU.h"
 | 
						|
#include "AMDGPUSubtarget.h"
 | 
						|
#include "SIInstrInfo.h"
 | 
						|
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 | 
						|
#include "llvm/CodeGen/MachineDominators.h"
 | 
						|
#include "llvm/CodeGen/MachineFunctionPass.h"
 | 
						|
#include "llvm/CodeGen/MachineInstrBuilder.h"
 | 
						|
#include "llvm/CodeGen/MachineRegisterInfo.h"
 | 
						|
#include "llvm/IR/Function.h"
 | 
						|
#include "llvm/IR/LLVMContext.h"
 | 
						|
#include "llvm/Support/Debug.h"
 | 
						|
#include "llvm/Support/raw_ostream.h"
 | 
						|
#include "llvm/Target/TargetMachine.h"
 | 
						|
 | 
						|
#define DEBUG_TYPE "si-fold-operands"
 | 
						|
using namespace llvm;
 | 
						|
 | 
						|
namespace {
 | 
						|
 | 
						|
class SIFoldOperands : public MachineFunctionPass {
 | 
						|
public:
 | 
						|
  static char ID;
 | 
						|
 | 
						|
public:
 | 
						|
  SIFoldOperands() : MachineFunctionPass(ID) {
 | 
						|
    initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
 | 
						|
  }
 | 
						|
 | 
						|
  bool runOnMachineFunction(MachineFunction &MF) override;
 | 
						|
 | 
						|
  const char *getPassName() const override {
 | 
						|
    return "SI Fold Operands";
 | 
						|
  }
 | 
						|
 | 
						|
  void getAnalysisUsage(AnalysisUsage &AU) const override {
 | 
						|
    AU.addRequired<MachineDominatorTree>();
 | 
						|
    AU.setPreservesCFG();
 | 
						|
    MachineFunctionPass::getAnalysisUsage(AU);
 | 
						|
  }
 | 
						|
};
 | 
						|
 | 
						|
struct FoldCandidate {
 | 
						|
  MachineInstr *UseMI;
 | 
						|
  unsigned UseOpNo;
 | 
						|
  MachineOperand *OpToFold;
 | 
						|
  uint64_t ImmToFold;
 | 
						|
 | 
						|
  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
 | 
						|
                UseMI(MI), UseOpNo(OpNo) {
 | 
						|
 | 
						|
    if (FoldOp->isImm()) {
 | 
						|
      OpToFold = nullptr;
 | 
						|
      ImmToFold = FoldOp->getImm();
 | 
						|
    } else {
 | 
						|
      assert(FoldOp->isReg());
 | 
						|
      OpToFold = FoldOp;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  bool isImm() const {
 | 
						|
    return !OpToFold;
 | 
						|
  }
 | 
						|
};
 | 
						|
 | 
						|
} // End anonymous namespace.
 | 
						|
 | 
						|
INITIALIZE_PASS_BEGIN(SIFoldOperands, DEBUG_TYPE,
 | 
						|
                      "SI Fold Operands", false, false)
 | 
						|
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 | 
						|
INITIALIZE_PASS_END(SIFoldOperands, DEBUG_TYPE,
 | 
						|
                    "SI Fold Operands", false, false)
 | 
						|
 | 
						|
char SIFoldOperands::ID = 0;
 | 
						|
 | 
						|
char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
 | 
						|
 | 
						|
FunctionPass *llvm::createSIFoldOperandsPass() {
 | 
						|
  return new SIFoldOperands();
 | 
						|
}
 | 
						|
 | 
						|
static bool isSafeToFold(unsigned Opcode) {
 | 
						|
  switch(Opcode) {
 | 
						|
  case AMDGPU::V_MOV_B32_e32:
 | 
						|
  case AMDGPU::V_MOV_B32_e64:
 | 
						|
  case AMDGPU::V_MOV_B64_PSEUDO:
 | 
						|
  case AMDGPU::S_MOV_B32:
 | 
						|
  case AMDGPU::S_MOV_B64:
 | 
						|
  case AMDGPU::COPY:
 | 
						|
    return true;
 | 
						|
  default:
 | 
						|
    return false;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
static bool updateOperand(FoldCandidate &Fold,
 | 
						|
                          const TargetRegisterInfo &TRI) {
 | 
						|
  MachineInstr *MI = Fold.UseMI;
 | 
						|
  MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
 | 
						|
  assert(Old.isReg());
 | 
						|
 | 
						|
  if (Fold.isImm()) {
 | 
						|
    Old.ChangeToImmediate(Fold.ImmToFold);
 | 
						|
    return true;
 | 
						|
  }
 | 
						|
 | 
						|
  MachineOperand *New = Fold.OpToFold;
 | 
						|
  if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
 | 
						|
      TargetRegisterInfo::isVirtualRegister(New->getReg())) {
 | 
						|
    Old.substVirtReg(New->getReg(), New->getSubReg(), TRI);
 | 
						|
    return true;
 | 
						|
  }
 | 
						|
 | 
						|
  // FIXME: Handle physical registers.
 | 
						|
 | 
						|
  return false;
 | 
						|
}
 | 
						|
 | 
						|
static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList,
 | 
						|
                              const MachineInstr *MI) {
 | 
						|
  for (auto Candidate : FoldList) {
 | 
						|
    if (Candidate.UseMI == MI)
 | 
						|
      return true;
 | 
						|
  }
 | 
						|
  return false;
 | 
						|
}
 | 
						|
 | 
						|
static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
 | 
						|
                             MachineInstr *MI, unsigned OpNo,
 | 
						|
                             MachineOperand *OpToFold,
 | 
						|
                             const SIInstrInfo *TII) {
 | 
						|
  if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
 | 
						|
 | 
						|
    // Special case for v_mac_f32_e64 if we are trying to fold into src2
 | 
						|
    unsigned Opc = MI->getOpcode();
 | 
						|
    if (Opc == AMDGPU::V_MAC_F32_e64 &&
 | 
						|
        (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
 | 
						|
      // Check if changing this to a v_mad_f32 instruction will allow us to
 | 
						|
      // fold the operand.
 | 
						|
      MI->setDesc(TII->get(AMDGPU::V_MAD_F32));
 | 
						|
      bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
 | 
						|
      if (FoldAsMAD) {
 | 
						|
        MI->untieRegOperand(OpNo);
 | 
						|
        return true;
 | 
						|
      }
 | 
						|
      MI->setDesc(TII->get(Opc));
 | 
						|
    }
 | 
						|
 | 
						|
    // If we are already folding into another operand of MI, then
 | 
						|
    // we can't commute the instruction, otherwise we risk making the
 | 
						|
    // other fold illegal.
 | 
						|
    if (isUseMIInFoldList(FoldList, MI))
 | 
						|
      return false;
 | 
						|
 | 
						|
    // Operand is not legal, so try to commute the instruction to
 | 
						|
    // see if this makes it possible to fold.
 | 
						|
    unsigned CommuteIdx0;
 | 
						|
    unsigned CommuteIdx1;
 | 
						|
    bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
 | 
						|
 | 
						|
    if (CanCommute) {
 | 
						|
      if (CommuteIdx0 == OpNo)
 | 
						|
        OpNo = CommuteIdx1;
 | 
						|
      else if (CommuteIdx1 == OpNo)
 | 
						|
        OpNo = CommuteIdx0;
 | 
						|
    }
 | 
						|
 | 
						|
    if (!CanCommute || !TII->commuteInstruction(MI))
 | 
						|
      return false;
 | 
						|
 | 
						|
    if (!TII->isOperandLegal(MI, OpNo, OpToFold))
 | 
						|
      return false;
 | 
						|
  }
 | 
						|
 | 
						|
  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
 | 
						|
  return true;
 | 
						|
}
 | 
						|
 | 
						|
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
 | 
						|
  MachineRegisterInfo &MRI = MF.getRegInfo();
 | 
						|
  const SIInstrInfo *TII =
 | 
						|
      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
 | 
						|
  const SIRegisterInfo &TRI = TII->getRegisterInfo();
 | 
						|
 | 
						|
  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
 | 
						|
                                                  BI != BE; ++BI) {
 | 
						|
 | 
						|
    MachineBasicBlock &MBB = *BI;
 | 
						|
    MachineBasicBlock::iterator I, Next;
 | 
						|
    for (I = MBB.begin(); I != MBB.end(); I = Next) {
 | 
						|
      Next = std::next(I);
 | 
						|
      MachineInstr &MI = *I;
 | 
						|
 | 
						|
      if (!isSafeToFold(MI.getOpcode()))
 | 
						|
        continue;
 | 
						|
 | 
						|
      unsigned OpSize = TII->getOpSize(MI, 1);
 | 
						|
      MachineOperand &OpToFold = MI.getOperand(1);
 | 
						|
      bool FoldingImm = OpToFold.isImm();
 | 
						|
 | 
						|
      // FIXME: We could also be folding things like FrameIndexes and
 | 
						|
      // TargetIndexes.
 | 
						|
      if (!FoldingImm && !OpToFold.isReg())
 | 
						|
        continue;
 | 
						|
 | 
						|
      // Folding immediates with more than one use will increase program size.
 | 
						|
      // FIXME: This will also reduce register usage, which may be better
 | 
						|
      // in some cases.  A better heuristic is needed.
 | 
						|
      if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
 | 
						|
          !MRI.hasOneUse(MI.getOperand(0).getReg()))
 | 
						|
        continue;
 | 
						|
 | 
						|
      // FIXME: Fold operands with subregs.
 | 
						|
      if (OpToFold.isReg() &&
 | 
						|
          (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
 | 
						|
           OpToFold.getSubReg()))
 | 
						|
        continue;
 | 
						|
 | 
						|
      std::vector<FoldCandidate> FoldList;
 | 
						|
      for (MachineRegisterInfo::use_iterator
 | 
						|
           Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
 | 
						|
           Use != E; ++Use) {
 | 
						|
 | 
						|
        MachineInstr *UseMI = Use->getParent();
 | 
						|
        const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
 | 
						|
 | 
						|
        // FIXME: Fold operands with subregs.
 | 
						|
        if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
 | 
						|
            UseOp.isImplicit())) {
 | 
						|
          continue;
 | 
						|
        }
 | 
						|
 | 
						|
        APInt Imm;
 | 
						|
 | 
						|
        if (FoldingImm) {
 | 
						|
          unsigned UseReg = UseOp.getReg();
 | 
						|
          const TargetRegisterClass *UseRC
 | 
						|
            = TargetRegisterInfo::isVirtualRegister(UseReg) ?
 | 
						|
            MRI.getRegClass(UseReg) :
 | 
						|
            TRI.getPhysRegClass(UseReg);
 | 
						|
 | 
						|
          Imm = APInt(64, OpToFold.getImm());
 | 
						|
 | 
						|
          // Split 64-bit constants into 32-bits for folding.
 | 
						|
          if (UseOp.getSubReg()) {
 | 
						|
            if (UseRC->getSize() != 8)
 | 
						|
              continue;
 | 
						|
 | 
						|
            if (UseOp.getSubReg() == AMDGPU::sub0) {
 | 
						|
              Imm = Imm.getLoBits(32);
 | 
						|
            } else {
 | 
						|
              assert(UseOp.getSubReg() == AMDGPU::sub1);
 | 
						|
              Imm = Imm.getHiBits(32);
 | 
						|
            }
 | 
						|
          }
 | 
						|
 | 
						|
          // In order to fold immediates into copies, we need to change the
 | 
						|
          // copy to a MOV.
 | 
						|
          if (UseMI->getOpcode() == AMDGPU::COPY) {
 | 
						|
            unsigned DestReg = UseMI->getOperand(0).getReg();
 | 
						|
            const TargetRegisterClass *DestRC
 | 
						|
              = TargetRegisterInfo::isVirtualRegister(DestReg) ?
 | 
						|
              MRI.getRegClass(DestReg) :
 | 
						|
              TRI.getPhysRegClass(DestReg);
 | 
						|
 | 
						|
            unsigned MovOp = TII->getMovOpcode(DestRC);
 | 
						|
            if (MovOp == AMDGPU::COPY)
 | 
						|
              continue;
 | 
						|
 | 
						|
            UseMI->setDesc(TII->get(MovOp));
 | 
						|
          }
 | 
						|
        }
 | 
						|
 | 
						|
        const MCInstrDesc &UseDesc = UseMI->getDesc();
 | 
						|
 | 
						|
        // Don't fold into target independent nodes.  Target independent opcodes
 | 
						|
        // don't have defined register classes.
 | 
						|
        if (UseDesc.isVariadic() ||
 | 
						|
            UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
 | 
						|
          continue;
 | 
						|
 | 
						|
        if (FoldingImm) {
 | 
						|
          MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
 | 
						|
          tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
 | 
						|
          continue;
 | 
						|
        }
 | 
						|
 | 
						|
        tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
 | 
						|
 | 
						|
        // FIXME: We could try to change the instruction from 64-bit to 32-bit
 | 
						|
        // to enable more folding opportunites.  The shrink operands pass
 | 
						|
        // already does this.
 | 
						|
      }
 | 
						|
 | 
						|
      for (FoldCandidate &Fold : FoldList) {
 | 
						|
        if (updateOperand(Fold, TRI)) {
 | 
						|
          // Clear kill flags.
 | 
						|
          if (!Fold.isImm()) {
 | 
						|
            assert(Fold.OpToFold && Fold.OpToFold->isReg());
 | 
						|
            Fold.OpToFold->setIsKill(false);
 | 
						|
          }
 | 
						|
          DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
 | 
						|
                Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return false;
 | 
						|
}
 |