From 07cf3c32e3d621afd3c3541db76f467a75abf86b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 12 May 2015 18:59:17 +0000 Subject: [PATCH] R600/SI: Fix bug in VGPR spilling AMDGPU::SI_SPILL_V96_RESTORE was missing from a switch statement, which caused the srsrc and soffset register to not be set correctly. This commit replaces the switch statement with a SITargetInfo query to make sure all spill instructions are covered. Differential Revision: http://reviews.llvm.org/D9582 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237164 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIDefines.h | 3 +- lib/Target/R600/SIInstrFormats.td | 2 + lib/Target/R600/SIInstrInfo.h | 4 + lib/Target/R600/SIInstructions.td | 4 +- lib/Target/R600/SIPrepareScratchRegs.cpp | 116 ++++++++++------------- 5 files changed, 61 insertions(+), 68 deletions(-) diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h index b54014072bf..4727d971ab7 100644 --- a/lib/Target/R600/SIDefines.h +++ b/lib/Target/R600/SIDefines.h @@ -36,7 +36,8 @@ enum { DS = 1 << 17, MIMG = 1 << 18, FLAT = 1 << 19, - WQM = 1 << 20 + WQM = 1 << 20, + VGPRSpill = 1 << 21 }; } diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 5a505cfebfe..3dddd246cec 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -39,6 +39,7 @@ class InstSI pattern> : field bits<1> MIMG = 0; field bits<1> FLAT = 0; field bits<1> WQM = 0; + field bits<1> VGPRSpill = 0; // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = VM_CNT; @@ -66,6 +67,7 @@ class InstSI pattern> : let TSFlags{18} = MIMG; let TSFlags{19} = FLAT; let TSFlags{20} = WQM; + let TSFlags{21} = VGPRSpill; // Most instructions require adjustments after selection to satisfy // operand requirements. diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index f952c0f0853..64b5120841c 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -216,6 +216,10 @@ public: return get(Opcode).TSFlags & SIInstrFlags::WQM; } + bool isVGPRSpill(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill; + } + bool isInlineConstant(const APInt &Imm) const; bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const; bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 4c4e1aa76f4..b43c802d034 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -2057,7 +2057,7 @@ defm SI_SPILL_S256 : SI_SPILL_SGPR ; defm SI_SPILL_S512 : SI_SPILL_SGPR ; multiclass SI_SPILL_VGPR { - let UseNamedOperandTable = 1 in { + let UseNamedOperandTable = 1, VGPRSpill = 1 in { def _SAVE : InstSI < (outs), (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, @@ -2070,7 +2070,7 @@ multiclass SI_SPILL_VGPR { (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), "", [] >; - } // End UseNamedOperandTable = 1 + } // End UseNamedOperandTable = 1, VGPRSpill = 1 } defm SI_SPILL_V32 : SI_SPILL_VGPR ; diff --git a/lib/Target/R600/SIPrepareScratchRegs.cpp b/lib/Target/R600/SIPrepareScratchRegs.cpp index 0a57a5bc201..0a7f684552f 100644 --- a/lib/Target/R600/SIPrepareScratchRegs.cpp +++ b/lib/Target/R600/SIPrepareScratchRegs.cpp @@ -128,80 +128,66 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) { MachineInstr &MI = *I; RS.forward(I); DebugLoc DL = MI.getDebugLoc(); - switch(MI.getOpcode()) { - default: break; - case AMDGPU::SI_SPILL_V512_SAVE: - case AMDGPU::SI_SPILL_V256_SAVE: - case AMDGPU::SI_SPILL_V128_SAVE: - case AMDGPU::SI_SPILL_V96_SAVE: - case AMDGPU::SI_SPILL_V64_SAVE: - case AMDGPU::SI_SPILL_V32_SAVE: - case AMDGPU::SI_SPILL_V32_RESTORE: - case AMDGPU::SI_SPILL_V64_RESTORE: - case AMDGPU::SI_SPILL_V128_RESTORE: - case AMDGPU::SI_SPILL_V256_RESTORE: - case AMDGPU::SI_SPILL_V512_RESTORE: + if (!TII->isVGPRSpill(MI.getOpcode())) + continue; - // Scratch resource - unsigned ScratchRsrcReg = - RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); + // Scratch resource + unsigned ScratchRsrcReg = + RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); - uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | - 0xffffffff; // Size + uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | + 0xffffffff; // Size - unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); - unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); - unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); - unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); + unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); + unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); + unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); + unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0) - .addExternalSymbol("SCRATCH_RSRC_DWORD0") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0) + .addExternalSymbol("SCRATCH_RSRC_DWORD0") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1) - .addExternalSymbol("SCRATCH_RSRC_DWORD1") - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1) + .addExternalSymbol("SCRATCH_RSRC_DWORD1") + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) - .addImm(Rsrc & 0xffffffff) - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) + .addImm(Rsrc & 0xffffffff) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) - .addImm(Rsrc >> 32) - .addReg(ScratchRsrcReg, RegState::ImplicitDefine); + BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) + .addImm(Rsrc >> 32) + .addReg(ScratchRsrcReg, RegState::ImplicitDefine); - // Scratch Offset - if (ScratchOffsetReg == AMDGPU::NoRegister) { - ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); - BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE), - ScratchOffsetReg) - .addFrameIndex(ScratchOffsetFI) - .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) - .addReg(AMDGPU::SGPR0, RegState::Undef); - } else if (!MBB.isLiveIn(ScratchOffsetReg)) { - MBB.addLiveIn(ScratchOffsetReg); - } - - if (ScratchRsrcReg == AMDGPU::NoRegister || - ScratchOffsetReg == AMDGPU::NoRegister) { - LLVMContext &Ctx = MF.getFunction()->getContext(); - Ctx.emitError("ran out of SGPRs for spilling VGPRs"); - ScratchRsrcReg = AMDGPU::SGPR0; - ScratchOffsetReg = AMDGPU::SGPR0; - } - MI.getOperand(2).setReg(ScratchRsrcReg); - MI.getOperand(2).setIsKill(true); - MI.getOperand(2).setIsUndef(false); - MI.getOperand(3).setReg(ScratchOffsetReg); - MI.getOperand(3).setIsUndef(false); - MI.getOperand(3).setIsKill(false); - MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true)); - MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true)); - MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true)); - MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true)); - - break; + // Scratch Offset + if (ScratchOffsetReg == AMDGPU::NoRegister) { + ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); + BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE), + ScratchOffsetReg) + .addFrameIndex(ScratchOffsetFI) + .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) + .addReg(AMDGPU::SGPR0, RegState::Undef); + } else if (!MBB.isLiveIn(ScratchOffsetReg)) { + MBB.addLiveIn(ScratchOffsetReg); } + + if (ScratchRsrcReg == AMDGPU::NoRegister || + ScratchOffsetReg == AMDGPU::NoRegister) { + LLVMContext &Ctx = MF.getFunction()->getContext(); + Ctx.emitError("ran out of SGPRs for spilling VGPRs"); + ScratchRsrcReg = AMDGPU::SGPR0; + ScratchOffsetReg = AMDGPU::SGPR0; + } + MI.getOperand(2).setReg(ScratchRsrcReg); + MI.getOperand(2).setIsKill(true); + MI.getOperand(2).setIsUndef(false); + MI.getOperand(3).setReg(ScratchOffsetReg); + MI.getOperand(3).setIsUndef(false); + MI.getOperand(3).setIsKill(false); + MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true)); + MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true)); } } return true;