R600/SI: Fix bug in VGPR spilling

AMDGPU::SI_SPILL_V96_RESTORE was missing from a switch statement, which
caused the srsrc and soffset register to not be set correctly.

This commit replaces the switch statement with a SITargetInfo query
to make sure all spill instructions are covered.

Differential Revision: http://reviews.llvm.org/D9582

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@237164 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2015-05-12 18:59:17 +00:00
parent eaba2dd2f1
commit 07cf3c32e3
5 changed files with 61 additions and 68 deletions

View File

@ -36,7 +36,8 @@ enum {
DS = 1 << 17, DS = 1 << 17,
MIMG = 1 << 18, MIMG = 1 << 18,
FLAT = 1 << 19, FLAT = 1 << 19,
WQM = 1 << 20 WQM = 1 << 20,
VGPRSpill = 1 << 21
}; };
} }

View File

@ -39,6 +39,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
field bits<1> MIMG = 0; field bits<1> MIMG = 0;
field bits<1> FLAT = 0; field bits<1> FLAT = 0;
field bits<1> WQM = 0; field bits<1> WQM = 0;
field bits<1> VGPRSpill = 0;
// These need to be kept in sync with the enum in SIInstrFlags. // These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT; let TSFlags{0} = VM_CNT;
@ -66,6 +67,7 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
let TSFlags{18} = MIMG; let TSFlags{18} = MIMG;
let TSFlags{19} = FLAT; let TSFlags{19} = FLAT;
let TSFlags{20} = WQM; let TSFlags{20} = WQM;
let TSFlags{21} = VGPRSpill;
// Most instructions require adjustments after selection to satisfy // Most instructions require adjustments after selection to satisfy
// operand requirements. // operand requirements.

View File

@ -216,6 +216,10 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::WQM; return get(Opcode).TSFlags & SIInstrFlags::WQM;
} }
bool isVGPRSpill(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
}
bool isInlineConstant(const APInt &Imm) const; bool isInlineConstant(const APInt &Imm) const;
bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const; bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const;
bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const; bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const;

View File

@ -2057,7 +2057,7 @@ defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>; defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> { multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
let UseNamedOperandTable = 1 in { let UseNamedOperandTable = 1, VGPRSpill = 1 in {
def _SAVE : InstSI < def _SAVE : InstSI <
(outs), (outs),
(ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
@ -2070,7 +2070,7 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
(ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset), (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
"", [] "", []
>; >;
} // End UseNamedOperandTable = 1 } // End UseNamedOperandTable = 1, VGPRSpill = 1
} }
defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>; defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>;

View File

@ -128,80 +128,66 @@ bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I; MachineInstr &MI = *I;
RS.forward(I); RS.forward(I);
DebugLoc DL = MI.getDebugLoc(); DebugLoc DL = MI.getDebugLoc();
switch(MI.getOpcode()) { if (!TII->isVGPRSpill(MI.getOpcode()))
default: break; continue;
case AMDGPU::SI_SPILL_V512_SAVE:
case AMDGPU::SI_SPILL_V256_SAVE:
case AMDGPU::SI_SPILL_V128_SAVE:
case AMDGPU::SI_SPILL_V96_SAVE:
case AMDGPU::SI_SPILL_V64_SAVE:
case AMDGPU::SI_SPILL_V32_SAVE:
case AMDGPU::SI_SPILL_V32_RESTORE:
case AMDGPU::SI_SPILL_V64_RESTORE:
case AMDGPU::SI_SPILL_V128_RESTORE:
case AMDGPU::SI_SPILL_V256_RESTORE:
case AMDGPU::SI_SPILL_V512_RESTORE:
// Scratch resource // Scratch resource
unsigned ScratchRsrcReg = unsigned ScratchRsrcReg =
RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0); RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE | uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
0xffffffff; // Size 0xffffffff; // Size
unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0); unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1); unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2); unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3); unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0) BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
.addExternalSymbol("SCRATCH_RSRC_DWORD0") .addExternalSymbol("SCRATCH_RSRC_DWORD0")
.addReg(ScratchRsrcReg, RegState::ImplicitDefine); .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1) BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
.addExternalSymbol("SCRATCH_RSRC_DWORD1") .addExternalSymbol("SCRATCH_RSRC_DWORD1")
.addReg(ScratchRsrcReg, RegState::ImplicitDefine); .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2) BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
.addImm(Rsrc & 0xffffffff) .addImm(Rsrc & 0xffffffff)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine); .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3) BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
.addImm(Rsrc >> 32) .addImm(Rsrc >> 32)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine); .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
// Scratch Offset // Scratch Offset
if (ScratchOffsetReg == AMDGPU::NoRegister) { if (ScratchOffsetReg == AMDGPU::NoRegister) {
ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0); ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE), BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
ScratchOffsetReg) ScratchOffsetReg)
.addFrameIndex(ScratchOffsetFI) .addFrameIndex(ScratchOffsetFI)
.addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef) .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
.addReg(AMDGPU::SGPR0, RegState::Undef); .addReg(AMDGPU::SGPR0, RegState::Undef);
} else if (!MBB.isLiveIn(ScratchOffsetReg)) { } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
MBB.addLiveIn(ScratchOffsetReg); MBB.addLiveIn(ScratchOffsetReg);
}
if (ScratchRsrcReg == AMDGPU::NoRegister ||
ScratchOffsetReg == AMDGPU::NoRegister) {
LLVMContext &Ctx = MF.getFunction()->getContext();
Ctx.emitError("ran out of SGPRs for spilling VGPRs");
ScratchRsrcReg = AMDGPU::SGPR0;
ScratchOffsetReg = AMDGPU::SGPR0;
}
MI.getOperand(2).setReg(ScratchRsrcReg);
MI.getOperand(2).setIsKill(true);
MI.getOperand(2).setIsUndef(false);
MI.getOperand(3).setReg(ScratchOffsetReg);
MI.getOperand(3).setIsUndef(false);
MI.getOperand(3).setIsKill(false);
MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
break;
} }
if (ScratchRsrcReg == AMDGPU::NoRegister ||
ScratchOffsetReg == AMDGPU::NoRegister) {
LLVMContext &Ctx = MF.getFunction()->getContext();
Ctx.emitError("ran out of SGPRs for spilling VGPRs");
ScratchRsrcReg = AMDGPU::SGPR0;
ScratchOffsetReg = AMDGPU::SGPR0;
}
MI.getOperand(2).setReg(ScratchRsrcReg);
MI.getOperand(2).setIsKill(true);
MI.getOperand(2).setIsUndef(false);
MI.getOperand(3).setReg(ScratchOffsetReg);
MI.getOperand(3).setIsUndef(false);
MI.getOperand(3).setIsKill(false);
MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
} }
} }
return true; return true;