diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index fc395ee4919..c6be2e46999 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -1736,7 +1736,7 @@ void SIInstrInfo::reserveIndirectRegisters(BitVector &Reserved, Reserved.set(AMDGPU::VReg_512RegClass.getRegister(Index)); } -const MachineOperand *SIInstrInfo::getNamedOperand(const MachineInstr& MI, +MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI, unsigned OperandName) const { int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OperandName); if (Idx == -1) diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 48d6ca5fcf3..e3888cf2c31 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -187,8 +187,7 @@ public: /// \brief Returns the operand named \p Op. If \p MI does not have an /// operand named \c Op, this function returns nullptr. - const MachineOperand *getNamedOperand(const MachineInstr& MI, - unsigned OperandName) const; + MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const; }; namespace AMDGPU { diff --git a/lib/Target/R600/SIShrinkInstructions.cpp b/lib/Target/R600/SIShrinkInstructions.cpp index 745c4b65644..529248b1557 100644 --- a/lib/Target/R600/SIShrinkInstructions.cpp +++ b/lib/Target/R600/SIShrinkInstructions.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" @@ -24,6 +25,8 @@ STATISTIC(NumInstructionsShrunk, "Number of 64-bit instruction reduced to 32-bit."); +STATISTIC(NumLiteralConstantsFolded, + "Number of literal constants folded into 32-bit instructions."); namespace llvm { void initializeSIShrinkInstructionsPass(PassRegistry&); @@ -109,6 +112,70 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII, return !Clamp || Clamp->getImm() == 0; } +/// \brief This function checks \p MI for operands defined by a move immediate +/// instruction and then folds the literal constant into the instruction if it +/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instruction +/// and will only fold literal constants if we are still in SSA. +static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, + MachineRegisterInfo &MRI, bool TryToCommute = true) { + + if (!MRI.isSSA()) + return; + + assert(TII->isVOP1(MI.getOpcode()) || TII->isVOP2(MI.getOpcode()) || + TII->isVOPC(MI.getOpcode())); + + const SIRegisterInfo &TRI = TII->getRegisterInfo(); + MachineOperand *Src0 = TII->getNamedOperand(MI, AMDGPU::OpName::src0); + + // Only one literal constant is allowed per instruction, so if src0 is a + // literal constant then we can't do any folding. + if (Src0->isImm() && TII->isLiteralConstant(*Src0)) + return; + + + // Literal constants and SGPRs can only be used in Src0, so if Src0 is an + // SGPR, we cannot commute the instruction, so we can't fold any literal + // constants. + if (Src0->isReg() && !isVGPR(Src0, TRI, MRI)) + return; + + // Try to fold Src0 + if (Src0->isReg()) { + unsigned Reg = Src0->getReg(); + MachineInstr *Def = MRI.getUniqueVRegDef(Reg); + if (Def && Def->isMoveImmediate()) { + MachineOperand &MovSrc = Def->getOperand(1); + bool ConstantFolded = false; + + if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) { + Src0->ChangeToImmediate(MovSrc.getImm()); + ConstantFolded = true; + } else if (MovSrc.isFPImm()) { + const APFloat &APF = MovSrc.getFPImm()->getValueAPF(); + if (&APF.getSemantics() == &APFloat::IEEEsingle) { + MRI.removeRegOperandFromUseList(Src0); + Src0->ChangeToImmediate(APF.bitcastToAPInt().getZExtValue()); + ConstantFolded = true; + } + } + if (ConstantFolded) { + for (MachineOperand &Use : MRI.use_operands(Reg)) + Use.getParent()->dump(); + if (MRI.use_empty(Reg)) + Def->eraseFromParent(); + ++NumLiteralConstantsFolded; + return; + } + } + } + + // We have failed to fold src0, so commute the instruction and try again. + if (TryToCommute && MI.isCommutable() && TII->commuteInstruction(&MI)) + foldImmediates(MI, TII, MRI, false); + +} + bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { MachineRegisterInfo &MRI = MF.getRegInfo(); const SIInstrInfo *TII = static_cast( @@ -167,27 +234,28 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { } // We can shrink this instruction - DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << "\n";); + DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';); - MachineInstrBuilder MIB = + MachineInstrBuilder Inst32 = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32)); // dst - MIB.addOperand(MI.getOperand(0)); + Inst32.addOperand(MI.getOperand(0)); - MIB.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0)); + Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0)); const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1); if (Src1) - MIB.addOperand(*Src1); + Inst32.addOperand(*Src1); - for (const MachineOperand &MO : MI.implicit_operands()) - MIB.addOperand(MO); - - DEBUG(dbgs() << "e32 MI = "; MI.dump(); dbgs() << "\n";); ++NumInstructionsShrunk; MI.eraseFromParent(); + + foldImmediates(*Inst32, TII, MRI); + DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n'); + + } } return false; diff --git a/test/CodeGen/R600/vop-shrink.ll b/test/CodeGen/R600/vop-shrink.ll index 54e588d8084..f8bc2b4f78f 100644 --- a/test/CodeGen/R600/vop-shrink.ll +++ b/test/CodeGen/R600/vop-shrink.ll @@ -34,6 +34,20 @@ endif: ; preds = %else, %if ret void } +; Test that we fold an immediate that was illegal for a 64-bit op into the +; 32-bit op when we shrink it. + +; FUNC-LABEL: @add_fold +; SI: V_ADD_F32_e32 v{{[0-9]+}}, 0x44800000 +define void @add_fold(float addrspace(1)* %out) { +entry: + %tmp = call i32 @llvm.r600.read.tidig.x() + %tmp1 = uitofp i32 %tmp to float + %tmp2 = fadd float %tmp1, 1.024000e+03 + store float %tmp2, float addrspace(1)* %out + ret void +} + ; Function Attrs: nounwind readnone declare i32 @llvm.r600.read.tidig.x() #0