From 87234703e884cde21164aa93758ff1fb9cd95245 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 5 Dec 2013 05:15:35 +0000 Subject: [PATCH] R600/SI: Add comments for number of used registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@196467 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUAsmPrinter.cpp | 50 ++++++++++++++++---- lib/Target/R600/AMDGPUAsmPrinter.h | 20 ++++++-- test/CodeGen/R600/register-count-comments.ll | 20 ++++++++ 3 files changed, 76 insertions(+), 14 deletions(-) create mode 100644 test/CodeGen/R600/register-count-comments.ll diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index 67bdba28787..160e6f730e1 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -46,8 +46,7 @@ extern "C" void LLVMInitializeR600AsmPrinter() { } AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) -{ + : AsmPrinter(TM, Streamer) { DisasmEnabled = TM.getSubtarget().dumpCode() && ! Streamer.hasRawTextSupport(); } @@ -56,6 +55,7 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) /// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle. bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SetupMachineFunction(MF); + if (OutStreamer.hasRawTextSupport()) { OutStreamer.EmitRawText("@" + MF.getName() + ":"); } @@ -65,9 +65,12 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { ELF::SHT_PROGBITS, 0, SectionKind::getReadOnly()); OutStreamer.SwitchSection(ConfigSection); + const AMDGPUSubtarget &STM = TM.getSubtarget(); + SIProgramInfo KernelInfo; if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) { - EmitProgramInfoSI(MF); + findNumUsedRegistersSI(MF, KernelInfo.NumSGPR, KernelInfo.NumVGPR); + EmitProgramInfoSI(MF, KernelInfo); } else { EmitProgramInfoR600(MF); } @@ -79,6 +82,19 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OutStreamer.SwitchSection(getObjFileLowering().getTextSection()); EmitFunctionBody(); + if (isVerbose() && OutStreamer.hasRawTextSupport()) { + const MCSectionELF *CommentSection + = Context.getELFSection(".AMDGPU.csdata", + ELF::SHT_PROGBITS, 0, + SectionKind::getReadOnly()); + OutStreamer.SwitchSection(CommentSection); + + OutStreamer.EmitRawText( + Twine("; Kernel info:\n") + + "; NumSgprs: " + Twine(KernelInfo.NumSGPR) + "\n" + + "; NumVgprs: " + Twine(KernelInfo.NumVGPR) + "\n"); + } + if (STM.dumpCode()) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) MF.dump(); @@ -166,8 +182,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) { } } -void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) { - const AMDGPUSubtarget &STM = TM.getSubtarget(); +void AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF, + unsigned &NumSGPR, + unsigned &NumVGPR) const { unsigned MaxSGPR = 0; unsigned MaxVGPR = 0; bool VCCUsed = false; @@ -252,10 +269,24 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) { } } } - if (VCCUsed) { + + if (VCCUsed) MaxSGPR += 2; - } - SIMachineFunctionInfo * MFI = MF.getInfo(); + + NumSGPR = MaxSGPR; + NumVGPR = MaxVGPR; +} + +void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &Out, + MachineFunction &MF) const { + findNumUsedRegistersSI(MF, Out.NumSGPR, Out.NumVGPR); +} + +void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF, + const SIProgramInfo &KernelInfo) { + const AMDGPUSubtarget &STM = TM.getSubtarget(); + + SIMachineFunctionInfo *MFI = MF.getInfo(); unsigned RsrcReg; switch (MFI->ShaderType) { default: // Fall through @@ -266,7 +297,8 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF) { } OutStreamer.EmitIntValue(RsrcReg, 4); - OutStreamer.EmitIntValue(S_00B028_VGPRS(MaxVGPR / 4) | S_00B028_SGPRS(MaxSGPR / 8), 4); + OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) | + S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4); unsigned LDSAlignShift; if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h index 05dc9bb672d..3031edded54 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.h +++ b/lib/Target/R600/AMDGPUAsmPrinter.h @@ -22,6 +22,21 @@ namespace llvm { class AMDGPUAsmPrinter : public AsmPrinter { +private: + struct SIProgramInfo { + unsigned NumSGPR; + unsigned NumVGPR; + }; + + void getSIProgramInfo(SIProgramInfo &Out, MachineFunction &MF) const; + void findNumUsedRegistersSI(MachineFunction &MF, + unsigned &NumSGPR, + unsigned &NumVGPR) const; + + /// \brief Emit register usage information so that the GPU driver + /// can correctly setup the GPU state. + void EmitProgramInfoR600(MachineFunction &MF); + void EmitProgramInfoSI(MachineFunction &MF, const SIProgramInfo &KernelInfo); public: explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer); @@ -32,11 +47,6 @@ public: return "AMDGPU Assembly Printer"; } - /// \brief Emit register usage information so that the GPU driver - /// can correctly setup the GPU state. - void EmitProgramInfoR600(MachineFunction &MF); - void EmitProgramInfoSI(MachineFunction &MF); - /// Implemented in AMDGPUMCInstLower.cpp virtual void EmitInstruction(const MachineInstr *MI); diff --git a/test/CodeGen/R600/register-count-comments.ll b/test/CodeGen/R600/register-count-comments.ll new file mode 100644 index 00000000000..a64b2804bde --- /dev/null +++ b/test/CodeGen/R600/register-count-comments.ll @@ -0,0 +1,20 @@ +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s + +declare i32 @llvm.SI.tid() nounwind readnone + +; SI-LABEL: @foo: +; SI: .section .AMDGPU.csdata +; SI: ; Kernel info: +; SI: ; NumSgprs: {{[0-9]+}} +; SI: ; NumVgprs: {{[0-9]+}} +define void @foo(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %abase, i32 addrspace(1)* %bbase) nounwind { + %tid = call i32 @llvm.SI.tid() nounwind readnone + %aptr = getelementptr i32 addrspace(1)* %abase, i32 %tid + %bptr = getelementptr i32 addrspace(1)* %bbase, i32 %tid + %outptr = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load i32 addrspace(1)* %aptr, align 4 + %b = load i32 addrspace(1)* %bptr, align 4 + %result = add i32 %a, %b + store i32 %result, i32 addrspace(1)* %outptr, align 4 + ret void +}