AMDGPU/SI: Update amd_kernel_code_t definition and add assembler support

Reviewers: arsenm

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D10772

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@240839 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2015-06-26 21:58:31 +00:00
parent 4077bd9b3b
commit 4a888086a4
11 changed files with 753 additions and 203 deletions

View File

@ -118,3 +118,60 @@ assembler.
ISA version, *vendor*, and *arch* will all be stored in a single entry of the
.note section.
.amd_kernel_code_t
^^^^^^^^^^^^^^^^^^
This directive marks the beginning of a list of key / value pairs that are used
to specify the amd_kernel_code_t object that will be emitted by the assembler.
The list must be terminated by the *.end_amd_kernel_code_t* directive. For
any amd_kernel_code_t values that are unspecified a default value will be
used. The default value for all keys is 0, with the following exceptions:
- *kernel_code_version_major* defaults to 1.
- *machine_kind* defaults to 1.
- *machine_version_major*, *machine_version_minor*, and
*machine_version_stepping* are derived from the value of the -mcpu option
that is passed to the assembler.
- *kernel_code_entry_byte_offset* defaults to 256.
- *wavefront_size* defaults to 6.
- *kernarg_segment_alignment*, *group_segment_alignment*, and
*private_segment_alignment* default to 4. Note that alignments are specified
as a power of two, so a value of **n** means an alignment of 2^ **n**.
The *.amd_kernel_code_t* directive must be placed immediately after the
function label and before any instructions.
For a full list of amd_kernel_code_t keys, see the examples in
test/CodeGen/AMDGPU/hsa.s. For an explanation of the meanings of the different
keys, see the comments in lib/Target/AMDGPU/AmdKernelCodeT.h
Here is an example of a minimal amd_kernel_code_t specification:
.. code-block:: nasm
.hsa_code_object_version 1,0
.hsa_code_object_isa
.text
hello_world:
.amd_kernel_code_t
enable_sgpr_kernarg_segment_ptr = 1
is_ptr64 = 1
compute_pgm_rsrc1_vgprs = 0
compute_pgm_rsrc1_sgprs = 0
compute_pgm_rsrc2_user_sgpr = 2
kernarg_segment_byte_size = 8
wavefront_sgpr_count = 2
workitem_vgpr_count = 3
.end_amd_kernel_code_t
s_load_dwordx2 s[0:1], s[0:1] 0x0
v_mov_b32 v0, 3.14159
s_waitcnt lgkmcnt(0)
v_mov_b32 v1, s0
v_mov_b32 v2, s1
flat_store_dword v0, v[1:2]
s_endpgm

View File

@ -475,125 +475,28 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
}
void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
const SIProgramInfo &KernelInfo) const {
const SIProgramInfo &KernelInfo) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
amd_kernel_code_t header;
memset(&header, 0, sizeof(header));
header.amd_code_version_major = AMD_CODE_VERSION_MAJOR;
header.amd_code_version_minor = AMD_CODE_VERSION_MINOR;
header.struct_byte_size = sizeof(amd_kernel_code_t);
header.target_chip = STM.getAmdKernelCodeChipID();
header.kernel_code_entry_byte_offset = (1ULL << MF.getAlignment());
AMDGPU::initDefaultAMDKernelCodeT(header, STM.getFeatureBits());
header.compute_pgm_resource_registers =
KernelInfo.ComputePGMRSrc1 |
(KernelInfo.ComputePGMRSrc2 << 32);
header.code_properties =
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
AMD_CODE_PROPERTY_IS_PTR64;
// Code Properties:
header.code_properties = AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
AMD_CODE_PROPERTY_IS_PTR64;
if (KernelInfo.FlatUsed)
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
if (KernelInfo.ScratchBlocks)
header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;
header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
// MFI->ABIArgOffset is the number of bytes for the kernel arguments
// plus 36. 36 is the number of bytes reserved at the begining of the
// input buffer to store work-group size information.
// FIXME: We should be adding the size of the implicit arguments
// to this value.
header.kernarg_segment_byte_size = MFI->ABIArgOffset;
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
// FIXME: What values do I put for these alignments
header.kernarg_segment_alignment = 0;
header.group_segment_alignment = 0;
header.private_segment_alignment = 0;
header.code_type = 1; // HSA_EXT_CODE_KERNEL
header.wavefront_size = STM.getWavefrontSize();
MCSectionELF *VersionSection =
OutContext.getELFSection(".hsa.version", ELF::SHT_PROGBITS, 0);
OutStreamer->SwitchSection(VersionSection);
OutStreamer->EmitBytes(Twine("HSA Code Unit:" +
Twine(header.hsail_version_major) + "." +
Twine(header.hsail_version_minor) + ":" +
"AMD:" +
Twine(header.amd_code_version_major) + "." +
Twine(header.amd_code_version_minor) + ":" +
"GFX8.1:0").str());
OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
if (isVerbose()) {
OutStreamer->emitRawComment("amd_code_version_major = " +
Twine(header.amd_code_version_major), false);
OutStreamer->emitRawComment("amd_code_version_minor = " +
Twine(header.amd_code_version_minor), false);
OutStreamer->emitRawComment("struct_byte_size = " +
Twine(header.struct_byte_size), false);
OutStreamer->emitRawComment("target_chip = " +
Twine(header.target_chip), false);
OutStreamer->emitRawComment(" compute_pgm_rsrc1: " +
Twine::utohexstr(KernelInfo.ComputePGMRSrc1),
false);
OutStreamer->emitRawComment(" compute_pgm_rsrc2: " +
Twine::utohexstr(KernelInfo.ComputePGMRSrc2),
false);
OutStreamer->emitRawComment("enable_sgpr_private_segment_buffer = " +
Twine((bool)(header.code_properties &
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE)), false);
OutStreamer->emitRawComment("enable_sgpr_kernarg_segment_ptr = " +
Twine((bool)(header.code_properties &
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR)), false);
OutStreamer->emitRawComment("private_element_size = 2 ", false);
OutStreamer->emitRawComment("is_ptr64 = " +
Twine((bool)(header.code_properties & AMD_CODE_PROPERTY_IS_PTR64)), false);
OutStreamer->emitRawComment("workitem_private_segment_byte_size = " +
Twine(header.workitem_private_segment_byte_size),
false);
OutStreamer->emitRawComment("workgroup_group_segment_byte_size = " +
Twine(header.workgroup_group_segment_byte_size),
false);
OutStreamer->emitRawComment("gds_segment_byte_size = " +
Twine(header.gds_segment_byte_size), false);
OutStreamer->emitRawComment("kernarg_segment_byte_size = " +
Twine(header.kernarg_segment_byte_size), false);
OutStreamer->emitRawComment("wavefront_sgpr_count = " +
Twine(header.wavefront_sgpr_count), false);
OutStreamer->emitRawComment("workitem_vgpr_count = " +
Twine(header.workitem_vgpr_count), false);
OutStreamer->emitRawComment("code_type = " + Twine(header.code_type), false);
OutStreamer->emitRawComment("wavefront_size = " +
Twine((int)header.wavefront_size), false);
OutStreamer->emitRawComment("optimization_level = " +
Twine(header.optimization_level), false);
OutStreamer->emitRawComment("hsail_profile = " +
Twine(header.hsail_profile), false);
OutStreamer->emitRawComment("hsail_machine_model = " +
Twine(header.hsail_machine_model), false);
OutStreamer->emitRawComment("hsail_version_major = " +
Twine(header.hsail_version_major), false);
OutStreamer->emitRawComment("hsail_version_minor = " +
Twine(header.hsail_version_minor), false);
}
OutStreamer->EmitBytes(StringRef((char*)&header, sizeof(header)));
AMDGPUTargetStreamer *TS =
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
TS->EmitAMDKernelCodeT(header);
}
bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,

View File

@ -12,9 +12,12 @@
#ifndef AMDKERNELCODET_H
#define AMDKERNELCODET_H
#include "llvm/MC/SubtargetFeature.h"
#include <cstddef>
#include <cstdint>
#include "llvm/Support/Debug.h"
//---------------------------------------------------------------------------//
// AMD Kernel Code, and its dependencies //
//---------------------------------------------------------------------------//
@ -142,7 +145,7 @@ enum amd_code_property_mask_t {
/// the GPU flat scratch (SH_STATIC_MEM_CONFIG.ELEMENT_SIZE). This
/// is generally DWORD.
///
/// Use values from the amd_element_byte_size_t enum.
/// uSE VALUES FROM THE AMD_ELEMENT_BYTE_SIZE_T ENUM.
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT = 11,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH = 2,
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE = ((1 << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_WIDTH) - 1) << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT,
@ -171,7 +174,11 @@ enum amd_code_property_mask_t {
/// Indicate if code generated has support for debugging.
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT = 15,
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH = 1,
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT,
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT = 15,
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH = 1,
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED = ((1 << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_WIDTH) - 1) << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT
};
/// @brief The hsa_ext_control_directives_t specifies the values for the HSAIL
@ -369,7 +376,7 @@ typedef struct hsa_ext_control_directives_s {
/// Scratch Wave Offset must be added by the kernel code and moved to
/// SGPRn-4 for use as the FLAT SCRATCH BASE in flat memory instructions.
///
/// The second SGPR is 32 bit byte size of a single work-items scratch
/// The second SGPR is 32 bit byte size of a single work-item's scratch
/// memory usage. This is directly loaded from the dispatch packet Private
/// Segment Byte Size and rounded up to a multiple of DWORD.
///
@ -385,7 +392,7 @@ typedef struct hsa_ext_control_directives_s {
///
/// Private Segment Size (enable_sgpr_private_segment_size):
/// Number of User SGPR registers: 1. The 32 bit byte size of a single
/// work-items scratch memory allocation. This is the value from the dispatch
/// work-item's scratch memory allocation. This is the value from the dispatch
/// packet. Private Segment Byte Size rounded up by CP to a multiple of DWORD.
///
/// \todo [Does CP need to round this to >4 byte alignment?]
@ -433,7 +440,7 @@ typedef struct hsa_ext_control_directives_s {
/// present
///
/// Work-Group Info (enable_sgpr_workgroup_info):
/// Number of System SGPR registers: 1. {first_wave, 14b0000,
/// Number of System SGPR registers: 1. {first_wave, 14'b0000,
/// ordered_append_term[10:0], threadgroup_size_in_waves[5:0]}
///
/// Private Segment Wave Byte Offset
@ -499,25 +506,14 @@ typedef struct hsa_ext_control_directives_s {
/// Alternatively scalar loads can be used if the kernarg offset is uniform, as
/// the kernarg segment is constant for the duration of the kernel execution.
///
typedef struct amd_kernel_code_s {
/// The AMD major version of the Code Object. Must be the value
/// AMD_CODE_VERSION_MAJOR.
amd_code_version32_t amd_code_version_major;
/// The AMD minor version of the Code Object. Minor versions must be
/// backward compatible. Must be the value
/// AMD_CODE_VERSION_MINOR.
amd_code_version32_t amd_code_version_minor;
/// The byte size of this struct. Must be set to
/// sizeof(amd_kernel_code_t). Used for backward
/// compatibility.
uint32_t struct_byte_size;
/// The target chip instruction set for which code has been
/// generated. Values are from the E_SC_INSTRUCTION_SET enumeration
/// in sc/Interface/SCCommon.h.
uint32_t target_chip;
uint32_t amd_kernel_code_version_major;
uint32_t amd_kernel_code_version_minor;
uint16_t amd_machine_kind;
uint16_t amd_machine_version_major;
uint16_t amd_machine_version_minor;
uint16_t amd_machine_version_stepping;
/// Byte offset (possibly negative) from start of amd_kernel_code_t
/// object to kernel's entry point instruction. The actual code for
@ -535,10 +531,6 @@ typedef struct amd_kernel_code_s {
/// and size. The offset is from the start (possibly negative) of
/// amd_kernel_code_t object. Set both to 0 if no prefetch
/// information is available.
///
/// \todo ttye 11/15/2013 Is the prefetch definition we want? Did
/// not make the size a uint64_t as prefetching more than 4GiB seems
/// excessive.
int64_t kernel_code_prefetch_byte_offset;
uint64_t kernel_code_prefetch_byte_size;
@ -553,11 +545,11 @@ typedef struct amd_kernel_code_s {
/// Shader program settings for CS. Contains COMPUTE_PGM_RSRC1 and
/// COMPUTE_PGM_RSRC2 registers.
amd_compute_pgm_resource_register64_t compute_pgm_resource_registers;
uint64_t compute_pgm_resource_registers;
/// Code properties. See amd_code_property_mask_t for a full list of
/// properties.
amd_code_property32_t code_properties;
uint32_t code_properties;
/// The amount of memory required for the combined private, spill
/// and arg segments for a work-item in bytes. If
@ -629,76 +621,21 @@ typedef struct amd_kernel_code_s {
/// The maximum byte alignment of variables used by the kernel in
/// the specified memory segment. Expressed as a power of two. Must
/// be at least HSA_POWERTWO_16.
hsa_powertwo8_t kernarg_segment_alignment;
hsa_powertwo8_t group_segment_alignment;
hsa_powertwo8_t private_segment_alignment;
uint8_t reserved3;
/// Type of code object.
hsa_ext_code_kind32_t code_type;
/// Reserved for code properties if any are defined in the future.
/// There are currently no code properties so this field must be 0.
uint32_t reserved4;
uint8_t kernarg_segment_alignment;
uint8_t group_segment_alignment;
uint8_t private_segment_alignment;
/// Wavefront size expressed as a power of two. Must be a power of 2
/// in range 1..64 inclusive. Used to support runtime query that
/// obtains wavefront size, which may be used by application to
/// allocated dynamic group memory and set the dispatch work-group
/// size.
hsa_powertwo8_t wavefront_size;
/// The optimization level specified when the kernel was
/// finalized.
uint8_t optimization_level;
/// The HSAIL profile defines which features are used. This
/// information is from the HSAIL version directive. If this
/// amd_kernel_code_t is not generated from an HSAIL compilation
/// unit then must be 0.
hsa_ext_brig_profile8_t hsail_profile;
/// The HSAIL machine model gives the address sizes used by the
/// code. This information is from the HSAIL version directive. If
/// not generated from an HSAIL compilation unit then must still
/// indicate for what machine mode the code is generated.
hsa_ext_brig_machine_model8_t hsail_machine_model;
/// The HSAIL major version. This information is from the HSAIL
/// version directive. If this amd_kernel_code_t is not
/// generated from an HSAIL compilation unit then must be 0.
uint32_t hsail_version_major;
/// The HSAIL minor version. This information is from the HSAIL
/// version directive. If this amd_kernel_code_t is not
/// generated from an HSAIL compilation unit then must be 0.
uint32_t hsail_version_minor;
/// Reserved for HSAIL target options if any are defined in the
/// future. There are currently no target options so this field
/// must be 0.
uint16_t reserved5;
/// Reserved. Must be 0.
uint16_t reserved6;
/// The values should be the actually values used by the finalizer
/// in generating the code. This may be the union of values
/// specified as finalizer arguments and explicit HSAIL control
/// directives. If the finalizer chooses to ignore a control
/// directive, and not generate constrained code, then the control
/// directive should not be marked as enabled even though it was
/// present in the HSAIL or finalizer argument. The values are
/// intended to reflect the constraints that the code actually
/// requires to correctly execute, not the values that were
/// actually specified at finalize time.
hsa_ext_control_directives_t control_directive;
/// The code can immediately follow the amd_kernel_code_t, or can
/// come after subsequent amd_kernel_code_t structs when there are
/// multiple kernels in the compilation unit.
uint8_t wavefront_size;
int32_t call_convention;
uint8_t reserved3[12];
uint64_t runtime_loader_kernel_symbol;
uint64_t control_directives[16];
} amd_kernel_code_t;
#endif // AMDKERNELCODET_H

View File

@ -10,6 +10,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "MCTargetDesc/AMDGPUTargetStreamer.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "AMDKernelCodeT.h"
#include "SIDefines.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/SmallString.h"
@ -320,6 +321,8 @@ private:
bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
bool ParseDirectiveHSACodeObjectVersion();
bool ParseDirectiveHSACodeObjectISA();
bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
bool ParseDirectiveAMDKernelCodeT();
public:
AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
@ -683,6 +686,202 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
return false;
}
bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
amd_kernel_code_t &Header) {
if (getLexer().isNot(AsmToken::Equal))
return TokError("expected '='");
Lex();
if (getLexer().isNot(AsmToken::Integer))
return TokError("amd_kernel_code_t values must be integers");
uint64_t Value = getLexer().getTok().getIntVal();
Lex();
if (ID == "kernel_code_version_major")
Header.amd_kernel_code_version_major = Value;
else if (ID == "kernel_code_version_minor")
Header.amd_kernel_code_version_minor = Value;
else if (ID == "machine_kind")
Header.amd_machine_kind = Value;
else if (ID == "machine_version_major")
Header.amd_machine_version_major = Value;
else if (ID == "machine_version_minor")
Header.amd_machine_version_minor = Value;
else if (ID == "machine_version_stepping")
Header.amd_machine_version_stepping = Value;
else if (ID == "kernel_code_entry_byte_offset")
Header.kernel_code_entry_byte_offset = Value;
else if (ID == "kernel_code_prefetch_byte_size")
Header.kernel_code_prefetch_byte_size = Value;
else if (ID == "max_scratch_backing_memory_byte_size")
Header.max_scratch_backing_memory_byte_size = Value;
else if (ID == "compute_pgm_rsrc1_vgprs")
Header.compute_pgm_resource_registers |= S_00B848_VGPRS(Value);
else if (ID == "compute_pgm_rsrc1_sgprs")
Header.compute_pgm_resource_registers |= S_00B848_SGPRS(Value);
else if (ID == "compute_pgm_rsrc1_priority")
Header.compute_pgm_resource_registers |= S_00B848_PRIORITY(Value);
else if (ID == "compute_pgm_rsrc1_float_mode")
Header.compute_pgm_resource_registers |= S_00B848_FLOAT_MODE(Value);
else if (ID == "compute_pgm_rsrc1_priv")
Header.compute_pgm_resource_registers |= S_00B848_PRIV(Value);
else if (ID == "compute_pgm_rsrc1_dx10_clamp")
Header.compute_pgm_resource_registers |= S_00B848_DX10_CLAMP(Value);
else if (ID == "compute_pgm_rsrc1_debug_mode")
Header.compute_pgm_resource_registers |= S_00B848_DEBUG_MODE(Value);
else if (ID == "compute_pgm_rsrc1_ieee_mode")
Header.compute_pgm_resource_registers |= S_00B848_IEEE_MODE(Value);
else if (ID == "compute_pgm_rsrc2_scratch_en")
Header.compute_pgm_resource_registers |= (S_00B84C_SCRATCH_EN(Value) << 32);
else if (ID == "compute_pgm_rsrc2_user_sgpr")
Header.compute_pgm_resource_registers |= (S_00B84C_USER_SGPR(Value) << 32);
else if (ID == "compute_pgm_rsrc2_tgid_x_en")
Header.compute_pgm_resource_registers |= (S_00B84C_TGID_X_EN(Value) << 32);
else if (ID == "compute_pgm_rsrc2_tgid_y_en")
Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Y_EN(Value) << 32);
else if (ID == "compute_pgm_rsrc2_tgid_z_en")
Header.compute_pgm_resource_registers |= (S_00B84C_TGID_Z_EN(Value) << 32);
else if (ID == "compute_pgm_rsrc2_tg_size_en")
Header.compute_pgm_resource_registers |= (S_00B84C_TG_SIZE_EN(Value) << 32);
else if (ID == "compute_pgm_rsrc2_tidig_comp_cnt")
Header.compute_pgm_resource_registers |=
(S_00B84C_TIDIG_COMP_CNT(Value) << 32);
else if (ID == "compute_pgm_rsrc2_excp_en_msb")
Header.compute_pgm_resource_registers |=
(S_00B84C_EXCP_EN_MSB(Value) << 32);
else if (ID == "compute_pgm_rsrc2_lds_size")
Header.compute_pgm_resource_registers |= (S_00B84C_LDS_SIZE(Value) << 32);
else if (ID == "compute_pgm_rsrc2_excp_en")
Header.compute_pgm_resource_registers |= (S_00B84C_EXCP_EN(Value) << 32);
else if (ID == "compute_pgm_resource_registers")
Header.compute_pgm_resource_registers = Value;
else if (ID == "enable_sgpr_private_segment_buffer")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER_SHIFT);
else if (ID == "enable_sgpr_dispatch_ptr")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR_SHIFT);
else if (ID == "enable_sgpr_queue_ptr")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR_SHIFT);
else if (ID == "enable_sgpr_kernarg_segment_ptr")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR_SHIFT);
else if (ID == "enable_sgpr_dispatch_id")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID_SHIFT);
else if (ID == "enable_sgpr_flat_scratch_init")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT_SHIFT);
else if (ID == "enable_sgpr_private_segment_size")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE_SHIFT);
else if (ID == "enable_sgpr_grid_workgroup_count_x")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X_SHIFT);
else if (ID == "enable_sgpr_grid_workgroup_count_y")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y_SHIFT);
else if (ID == "enable_sgpr_grid_workgroup_count_z")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT);
else if (ID == "enable_ordered_append_gds")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS_SHIFT);
else if (ID == "private_element_size")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT);
else if (ID == "is_ptr64")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_IS_PTR64_SHIFT);
else if (ID == "is_dynamic_callstack")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT);
else if (ID == "is_debug_enabled")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED_SHIFT);
else if (ID == "is_xnack_enabled")
Header.code_properties |=
(Value << AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED_SHIFT);
else if (ID == "workitem_private_segment_byte_size")
Header.workitem_private_segment_byte_size = Value;
else if (ID == "workgroup_group_segment_byte_size")
Header.workgroup_group_segment_byte_size = Value;
else if (ID == "gds_segment_byte_size")
Header.gds_segment_byte_size = Value;
else if (ID == "kernarg_segment_byte_size")
Header.kernarg_segment_byte_size = Value;
else if (ID == "workgroup_fbarrier_count")
Header.workgroup_fbarrier_count = Value;
else if (ID == "wavefront_sgpr_count")
Header.wavefront_sgpr_count = Value;
else if (ID == "workitem_vgpr_count")
Header.workitem_vgpr_count = Value;
else if (ID == "reserved_vgpr_first")
Header.reserved_vgpr_first = Value;
else if (ID == "reserved_vgpr_count")
Header.reserved_vgpr_count = Value;
else if (ID == "reserved_sgpr_first")
Header.reserved_sgpr_first = Value;
else if (ID == "reserved_sgpr_count")
Header.reserved_sgpr_count = Value;
else if (ID == "debug_wavefront_private_segment_offset_sgpr")
Header.debug_wavefront_private_segment_offset_sgpr = Value;
else if (ID == "debug_private_segment_buffer_sgpr")
Header.debug_private_segment_buffer_sgpr = Value;
else if (ID == "kernarg_segment_alignment")
Header.kernarg_segment_alignment = Value;
else if (ID == "group_segment_alignment")
Header.group_segment_alignment = Value;
else if (ID == "private_segment_alignment")
Header.private_segment_alignment = Value;
else if (ID == "wavefront_size")
Header.wavefront_size = Value;
else if (ID == "call_convention")
Header.call_convention = Value;
else if (ID == "runtime_loader_kernel_symbol")
Header.runtime_loader_kernel_symbol = Value;
else
return TokError("amd_kernel_code_t value not recognized.");
return false;
}
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
amd_kernel_code_t Header;
AMDGPU::initDefaultAMDKernelCodeT(Header, STI.getFeatureBits());
while (true) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("amd_kernel_code_t values must begin on a new line");
// Lex EndOfStatement. This is in a while loop, because lexing a comment
// will set the current token to EndOfStatement.
while(getLexer().is(AsmToken::EndOfStatement))
Lex();
if (getLexer().isNot(AsmToken::Identifier))
return TokError("expected value identifier or .end_amd_kernel_code_t");
StringRef ID = getLexer().getTok().getIdentifier();
Lex();
if (ID == ".end_amd_kernel_code_t")
break;
if (ParseAMDKernelCodeTValue(ID, Header))
return true;
}
getTargetStreamer().EmitAMDKernelCodeT(Header);
return false;
}
bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
@ -692,6 +891,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".hsa_code_object_isa")
return ParseDirectiveHSACodeObjectISA();
if (IDVal == ".amd_kernel_code_t")
return ParseDirectiveAMDKernelCodeT();
return true;
}

View File

@ -12,9 +12,11 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUTargetStreamer.h"
#include "SIDefines.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/FormattedStream.h"
@ -51,6 +53,173 @@ AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
}
void
AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32);
bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties &
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
bool EnableSGPRDispatchPtr = (Header.code_properties &
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
bool EnableSGPRQueuePtr = (Header.code_properties &
AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
bool EnableSGPRKernargSegmentPtr = (Header.code_properties &
AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
bool EnableSGPRDispatchID = (Header.code_properties &
AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
bool EnableSGPRFlatScratchInit = (Header.code_properties &
AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
bool EnableSGPRPrivateSegmentSize = (Header.code_properties &
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
bool EnableSGPRGridWorkgroupCountX = (Header.code_properties &
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X);
bool EnableSGPRGridWorkgroupCountY = (Header.code_properties &
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y);
bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties &
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z);
bool EnableOrderedAppendGDS = (Header.code_properties &
AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS);
uint32_t PrivateElementSize = (Header.code_properties &
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >>
AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT;
bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64);
bool IsDynamicCallstack = (Header.code_properties &
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK);
bool IsDebugEnabled = (Header.code_properties &
AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED);
bool IsXNackEnabled = (Header.code_properties &
AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED);
OS << "\t.amd_kernel_code_t\n" <<
"\t\tkernel_code_version_major = " <<
Header.amd_kernel_code_version_major << '\n' <<
"\t\tkernel_code_version_minor = " <<
Header.amd_kernel_code_version_minor << '\n' <<
"\t\tmachine_kind = " <<
Header.amd_machine_kind << '\n' <<
"\t\tmachine_version_major = " <<
Header.amd_machine_version_major << '\n' <<
"\t\tmachine_version_minor = " <<
Header.amd_machine_version_minor << '\n' <<
"\t\tmachine_version_stepping = " <<
Header.amd_machine_version_stepping << '\n' <<
"\t\tkernel_code_entry_byte_offset = " <<
Header.kernel_code_entry_byte_offset << '\n' <<
"\t\tkernel_code_prefetch_byte_size = " <<
Header.kernel_code_prefetch_byte_size << '\n' <<
"\t\tmax_scratch_backing_memory_byte_size = " <<
Header.max_scratch_backing_memory_byte_size << '\n' <<
"\t\tcompute_pgm_rsrc1_vgprs = " <<
G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' <<
"\t\tcompute_pgm_rsrc1_sgprs = " <<
G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' <<
"\t\tcompute_pgm_rsrc1_priority = " <<
G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' <<
"\t\tcompute_pgm_rsrc1_float_mode = " <<
G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' <<
"\t\tcompute_pgm_rsrc1_priv = " <<
G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' <<
"\t\tcompute_pgm_rsrc1_dx10_clamp = " <<
G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' <<
"\t\tcompute_pgm_rsrc1_debug_mode = " <<
G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' <<
"\t\tcompute_pgm_rsrc1_ieee_mode = " <<
G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' <<
"\t\tcompute_pgm_rsrc2_scratch_en = " <<
G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' <<
"\t\tcompute_pgm_rsrc2_user_sgpr = " <<
G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' <<
"\t\tcompute_pgm_rsrc2_tgid_x_en = " <<
G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' <<
"\t\tcompute_pgm_rsrc2_tgid_y_en = " <<
G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' <<
"\t\tcompute_pgm_rsrc2_tgid_z_en = " <<
G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' <<
"\t\tcompute_pgm_rsrc2_tg_size_en = " <<
G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' <<
"\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " <<
G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' <<
"\t\tcompute_pgm_rsrc2_excp_en_msb = " <<
G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' <<
"\t\tcompute_pgm_rsrc2_lds_size = " <<
G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' <<
"\t\tcompute_pgm_rsrc2_excp_en = " <<
G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' <<
"\t\tenable_sgpr_private_segment_buffer = " <<
EnableSGPRPrivateSegmentBuffer << '\n' <<
"\t\tenable_sgpr_dispatch_ptr = " <<
EnableSGPRDispatchPtr << '\n' <<
"\t\tenable_sgpr_queue_ptr = " <<
EnableSGPRQueuePtr << '\n' <<
"\t\tenable_sgpr_kernarg_segment_ptr = " <<
EnableSGPRKernargSegmentPtr << '\n' <<
"\t\tenable_sgpr_dispatch_id = " <<
EnableSGPRDispatchID << '\n' <<
"\t\tenable_sgpr_flat_scratch_init = " <<
EnableSGPRFlatScratchInit << '\n' <<
"\t\tenable_sgpr_private_segment_size = " <<
EnableSGPRPrivateSegmentSize << '\n' <<
"\t\tenable_sgpr_grid_workgroup_count_x = " <<
EnableSGPRGridWorkgroupCountX << '\n' <<
"\t\tenable_sgpr_grid_workgroup_count_y = " <<
EnableSGPRGridWorkgroupCountY << '\n' <<
"\t\tenable_sgpr_grid_workgroup_count_z = " <<
EnableSGPRGridWorkgroupCountZ << '\n' <<
"\t\tenable_ordered_append_gds = " <<
EnableOrderedAppendGDS << '\n' <<
"\t\tprivate_element_size = " <<
PrivateElementSize << '\n' <<
"\t\tis_ptr64 = " <<
IsPtr64 << '\n' <<
"\t\tis_dynamic_callstack = " <<
IsDynamicCallstack << '\n' <<
"\t\tis_debug_enabled = " <<
IsDebugEnabled << '\n' <<
"\t\tis_xnack_enabled = " <<
IsXNackEnabled << '\n' <<
"\t\tworkitem_private_segment_byte_size = " <<
Header.workitem_private_segment_byte_size << '\n' <<
"\t\tworkgroup_group_segment_byte_size = " <<
Header.workgroup_group_segment_byte_size << '\n' <<
"\t\tgds_segment_byte_size = " <<
Header.gds_segment_byte_size << '\n' <<
"\t\tkernarg_segment_byte_size = " <<
Header.kernarg_segment_byte_size << '\n' <<
"\t\tworkgroup_fbarrier_count = " <<
Header.workgroup_fbarrier_count << '\n' <<
"\t\twavefront_sgpr_count = " <<
Header.wavefront_sgpr_count << '\n' <<
"\t\tworkitem_vgpr_count = " <<
Header.workitem_vgpr_count << '\n' <<
"\t\treserved_vgpr_first = " <<
Header.reserved_vgpr_first << '\n' <<
"\t\treserved_vgpr_count = " <<
Header.reserved_vgpr_count << '\n' <<
"\t\treserved_sgpr_first = " <<
Header.reserved_sgpr_first << '\n' <<
"\t\treserved_sgpr_count = " <<
Header.reserved_sgpr_count << '\n' <<
"\t\tdebug_wavefront_private_segment_offset_sgpr = " <<
Header.debug_wavefront_private_segment_offset_sgpr << '\n' <<
"\t\tdebug_private_segment_buffer_sgpr = " <<
Header.debug_private_segment_buffer_sgpr << '\n' <<
"\t\tkernarg_segment_alignment = " <<
(uint32_t)Header.kernarg_segment_alignment << '\n' <<
"\t\tgroup_segment_alignment = " <<
(uint32_t)Header.group_segment_alignment << '\n' <<
"\t\tprivate_segment_alignment = " <<
(uint32_t)Header.private_segment_alignment << '\n' <<
"\t\twavefront_size = " <<
(uint32_t)Header.wavefront_size << '\n' <<
"\t\tcall_convention = " <<
Header.call_convention << '\n' <<
"\t\truntime_loader_kernel_symbol = " <<
Header.runtime_loader_kernel_symbol << '\n' <<
// TODO: control_directives
"\t.end_amd_kernel_code_t\n";
}
//===----------------------------------------------------------------------===//
// AMDGPUTargetELFStreamer
//===----------------------------------------------------------------------===//
@ -116,3 +285,13 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
OS.EmitValueToAlignment(4);
OS.PopSection();
}
void
AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
MCStreamer &OS = getStreamer();
OS.PushSection();
OS.SwitchSection(OS.getContext().getObjectFileInfo()->getTextSection());
OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
OS.PopSection();
}

View File

@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
#include "AMDKernelCodeT.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
namespace llvm {
@ -25,6 +25,8 @@ public:
uint32_t Stepping,
StringRef VendorName,
StringRef ArchName) = 0;
virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0;
};
class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
@ -37,6 +39,8 @@ public:
void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
uint32_t Stepping, StringRef VendorName,
StringRef ArchName) override;
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
};
class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
@ -65,6 +69,9 @@ public:
void EmitDirectiveHSACodeObjectISA(uint32_t Major, uint32_t Minor,
uint32_t Stepping, StringRef VendorName,
StringRef ArchName) override;
void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
};
}

View File

@ -100,16 +100,41 @@ namespace SIOutMods {
#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
#define S_00B028_VGPRS(x) (((x) & 0x3F) << 0)
#define S_00B028_SGPRS(x) (((x) & 0x0F) << 6)
#define R_00B84C_COMPUTE_PGM_RSRC2 0x00B84C
#define S_00B84C_SCRATCH_EN(x) (((x) & 0x1) << 0)
#define G_00B84C_SCRATCH_EN(x) (((x) >> 0) & 0x1)
#define C_00B84C_SCRATCH_EN 0xFFFFFFFE
#define S_00B84C_USER_SGPR(x) (((x) & 0x1F) << 1)
#define G_00B84C_USER_SGPR(x) (((x) >> 1) & 0x1F)
#define C_00B84C_USER_SGPR 0xFFFFFFC1
#define S_00B84C_TGID_X_EN(x) (((x) & 0x1) << 7)
#define G_00B84C_TGID_X_EN(x) (((x) >> 7) & 0x1)
#define C_00B84C_TGID_X_EN 0xFFFFFF7F
#define S_00B84C_TGID_Y_EN(x) (((x) & 0x1) << 8)
#define G_00B84C_TGID_Y_EN(x) (((x) >> 8) & 0x1)
#define C_00B84C_TGID_Y_EN 0xFFFFFEFF
#define S_00B84C_TGID_Z_EN(x) (((x) & 0x1) << 9)
#define G_00B84C_TGID_Z_EN(x) (((x) >> 9) & 0x1)
#define C_00B84C_TGID_Z_EN 0xFFFFFDFF
#define S_00B84C_TG_SIZE_EN(x) (((x) & 0x1) << 10)
#define G_00B84C_TG_SIZE_EN(x) (((x) >> 10) & 0x1)
#define C_00B84C_TG_SIZE_EN 0xFFFFFBFF
#define S_00B84C_TIDIG_COMP_CNT(x) (((x) & 0x03) << 11)
#define G_00B84C_TIDIG_COMP_CNT(x) (((x) >> 11) & 0x03)
#define C_00B84C_TIDIG_COMP_CNT 0xFFFFE7FF
/* CIK */
#define S_00B84C_EXCP_EN_MSB(x) (((x) & 0x03) << 13)
#define G_00B84C_EXCP_EN_MSB(x) (((x) >> 13) & 0x03)
#define C_00B84C_EXCP_EN_MSB 0xFFFF9FFF
/* */
#define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
#define G_00B84C_LDS_SIZE(x) (((x) >> 15) & 0x1FF)
#define C_00B84C_LDS_SIZE 0xFF007FFF
#define S_00B84C_EXCP_EN(x) (((x) & 0x7F) << 24)
#define G_00B84C_EXCP_EN(x) (((x) >> 24) & 0x7F)
#define C_00B84C_EXCP_EN
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC

View File

@ -33,5 +33,28 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
return {0, 0, 0};
}
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features) {
IsaVersion ISA = getIsaVersion(Features);
memset(&Header, 0, sizeof(Header));
Header.amd_kernel_code_version_major = 1;
Header.amd_kernel_code_version_minor = 0;
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
Header.amd_machine_version_major = ISA.Major;
Header.amd_machine_version_minor = ISA.Minor;
Header.amd_machine_version_stepping = ISA.Stepping;
Header.kernel_code_entry_byte_offset = sizeof(Header);
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
Header.wavefront_size = 6;
// These alignment values are specified in powers of two, so alignment =
// 2^n. The minimum alignment is 2^4 = 16.
Header.kernarg_segment_alignment = 4;
Header.group_segment_alignment = 4;
Header.private_segment_alignment = 4;
}
} // End namespace AMDGPU
} // End namespace llvm

View File

@ -25,6 +25,8 @@ struct IsaVersion {
};
IsaVersion getIsaVersion(const FeatureBitset &Features);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features);
} // end namespace AMDGPU
} // end namespace llvm

View File

@ -16,10 +16,8 @@
; HSA: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA: {{^}}simple:
; HSA: .section .hsa.version
; HSA-NEXT: .ascii "HSA Code Unit:0.0:AMD:0.1:GFX8.1:0"
; Test that the amd_kernel_code_t object is emitted
; HSA: .asciz
; HSA: .amd_kernel_code_t
; HSA: .end_amd_kernel_code_t
; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[0:1], 0x0
; Make sure we are setting the ATC bit:
; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000

View File

@ -14,3 +14,220 @@
.hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
.text
amd_kernel_code_t_test_all:
; Test all amd_kernel_code_t members with non-default values.
.amd_kernel_code_t
kernel_code_version_major = 100
kernel_code_version_minor = 100
machine_kind = 0
machine_version_major = 5
machine_version_minor = 5
machine_version_stepping = 5
kernel_code_entry_byte_offset = 512
kernel_code_prefetch_byte_size = 1
max_scratch_backing_memory_byte_size = 1
compute_pgm_rsrc1_vgprs = 1
compute_pgm_rsrc1_sgprs = 1
compute_pgm_rsrc1_priority = 1
compute_pgm_rsrc1_float_mode = 1
compute_pgm_rsrc1_priv = 1
compute_pgm_rsrc1_dx10_clamp = 1
compute_pgm_rsrc1_debug_mode = 1
compute_pgm_rsrc1_ieee_mode = 1
compute_pgm_rsrc2_scratch_en = 1
compute_pgm_rsrc2_user_sgpr = 1
compute_pgm_rsrc2_tgid_x_en = 1
compute_pgm_rsrc2_tgid_y_en = 1
compute_pgm_rsrc2_tgid_z_en = 1
compute_pgm_rsrc2_tg_size_en = 1
compute_pgm_rsrc2_tidig_comp_cnt = 1
compute_pgm_rsrc2_excp_en_msb = 1
compute_pgm_rsrc2_lds_size = 1
compute_pgm_rsrc2_excp_en = 1
enable_sgpr_private_segment_buffer = 1
enable_sgpr_dispatch_ptr = 1
enable_sgpr_queue_ptr = 1
enable_sgpr_kernarg_segment_ptr = 1
enable_sgpr_dispatch_id = 1
enable_sgpr_flat_scratch_init = 1
enable_sgpr_private_segment_size = 1
enable_sgpr_grid_workgroup_count_x = 1
enable_sgpr_grid_workgroup_count_y = 1
enable_sgpr_grid_workgroup_count_z = 1
enable_ordered_append_gds = 1
private_element_size = 1
is_ptr64 = 1
is_dynamic_callstack = 1
is_debug_enabled = 1
is_xnack_enabled = 1
workitem_private_segment_byte_size = 1
workgroup_group_segment_byte_size = 1
gds_segment_byte_size = 1
kernarg_segment_byte_size = 1
workgroup_fbarrier_count = 1
wavefront_sgpr_count = 1
workitem_vgpr_count = 1
reserved_vgpr_first = 1
reserved_vgpr_count = 1
reserved_sgpr_first = 1
reserved_sgpr_count = 1
debug_wavefront_private_segment_offset_sgpr = 1
debug_private_segment_buffer_sgpr = 1
kernarg_segment_alignment = 5
group_segment_alignment = 5
private_segment_alignment = 5
wavefront_size = 5
call_convention = 1
runtime_loader_kernel_symbol = 1
.end_amd_kernel_code_t
// ASM-LABEL: {{^}}amd_kernel_code_t_test_all:
// ASM: .amd_kernel_code_t
// ASM: kernel_code_version_major = 100
// ASM: kernel_code_version_minor = 100
// ASM: machine_kind = 0
// ASM: machine_version_major = 5
// ASM: machine_version_minor = 5
// ASM: machine_version_stepping = 5
// ASM: kernel_code_entry_byte_offset = 512
// ASM: kernel_code_prefetch_byte_size = 1
// ASM: max_scratch_backing_memory_byte_size = 1
// ASM: compute_pgm_rsrc1_vgprs = 1
// ASM: compute_pgm_rsrc1_sgprs = 1
// ASM: compute_pgm_rsrc1_priority = 1
// ASM: compute_pgm_rsrc1_float_mode = 1
// ASM: compute_pgm_rsrc1_priv = 1
// ASM: compute_pgm_rsrc1_dx10_clamp = 1
// ASM: compute_pgm_rsrc1_debug_mode = 1
// ASM: compute_pgm_rsrc1_ieee_mode = 1
// ASM: compute_pgm_rsrc2_scratch_en = 1
// ASM: compute_pgm_rsrc2_user_sgpr = 1
// ASM: compute_pgm_rsrc2_tgid_x_en = 1
// ASM: compute_pgm_rsrc2_tgid_y_en = 1
// ASM: compute_pgm_rsrc2_tgid_z_en = 1
// ASM: compute_pgm_rsrc2_tg_size_en = 1
// ASM: compute_pgm_rsrc2_tidig_comp_cnt = 1
// ASM: compute_pgm_rsrc2_excp_en_msb = 1
// ASM: compute_pgm_rsrc2_lds_size = 1
// ASM: compute_pgm_rsrc2_excp_en = 1
// ASM: enable_sgpr_private_segment_buffer = 1
// ASM: enable_sgpr_dispatch_ptr = 1
// ASM: enable_sgpr_queue_ptr = 1
// ASM: enable_sgpr_kernarg_segment_ptr = 1
// ASM: enable_sgpr_dispatch_id = 1
// ASM: enable_sgpr_flat_scratch_init = 1
// ASM: enable_sgpr_private_segment_size = 1
// ASM: enable_sgpr_grid_workgroup_count_x = 1
// ASM: enable_sgpr_grid_workgroup_count_y = 1
// ASM: enable_sgpr_grid_workgroup_count_z = 1
// ASM: enable_ordered_append_gds = 1
// ASM: private_element_size = 1
// ASM: is_ptr64 = 1
// ASM: is_dynamic_callstack = 1
// ASM: is_debug_enabled = 1
// ASM: is_xnack_enabled = 1
// ASM: workitem_private_segment_byte_size = 1
// ASM: workgroup_group_segment_byte_size = 1
// ASM: gds_segment_byte_size = 1
// ASM: kernarg_segment_byte_size = 1
// ASM: workgroup_fbarrier_count = 1
// ASM: wavefront_sgpr_count = 1
// ASM: workitem_vgpr_count = 1
// ASM: reserved_vgpr_first = 1
// ASM: reserved_vgpr_count = 1
// ASM: reserved_sgpr_first = 1
// ASM: reserved_sgpr_count = 1
// ASM: debug_wavefront_private_segment_offset_sgpr = 1
// ASM: debug_private_segment_buffer_sgpr = 1
// ASM: kernarg_segment_alignment = 5
// ASM: group_segment_alignment = 5
// ASM: private_segment_alignment = 5
// ASM: wavefront_size = 5
// ASM: call_convention = 1
// ASM: runtime_loader_kernel_symbol = 1
// ASM: .end_amd_kernel_code_t
amd_kernel_code_t_minimal:
.amd_kernel_code_t
enable_sgpr_kernarg_segment_ptr = 1
is_ptr64 = 1
compute_pgm_rsrc1_vgprs = 1
compute_pgm_rsrc1_sgprs = 1
compute_pgm_rsrc2_user_sgpr = 2
kernarg_segment_byte_size = 16
wavefront_sgpr_count = 8
// wavefront_sgpr_count = 7
; wavefront_sgpr_count = 7
// Make sure a blank line won't break anything:
// Make sure a line with whitespace won't break anything:
workitem_vgpr_count = 16
.end_amd_kernel_code_t
// ASM-LABEL: {{^}}amd_kernel_code_t_minimal:
// ASM: .amd_kernel_code_t
// ASM: kernel_code_version_major = 1
// ASM: kernel_code_version_minor = 0
// ASM: machine_kind = 1
// ASM: machine_version_major = 7
// ASM: machine_version_minor = 0
// ASM: machine_version_stepping = 0
// ASM: kernel_code_entry_byte_offset = 256
// ASM: kernel_code_prefetch_byte_size = 0
// ASM: max_scratch_backing_memory_byte_size = 0
// ASM: compute_pgm_rsrc1_vgprs = 1
// ASM: compute_pgm_rsrc1_sgprs = 1
// ASM: compute_pgm_rsrc1_priority = 0
// ASM: compute_pgm_rsrc1_float_mode = 0
// ASM: compute_pgm_rsrc1_priv = 0
// ASM: compute_pgm_rsrc1_dx10_clamp = 0
// ASM: compute_pgm_rsrc1_debug_mode = 0
// ASM: compute_pgm_rsrc1_ieee_mode = 0
// ASM: compute_pgm_rsrc2_scratch_en = 0
// ASM: compute_pgm_rsrc2_user_sgpr = 2
// ASM: compute_pgm_rsrc2_tgid_x_en = 0
// ASM: compute_pgm_rsrc2_tgid_y_en = 0
// ASM: compute_pgm_rsrc2_tgid_z_en = 0
// ASM: compute_pgm_rsrc2_tg_size_en = 0
// ASM: compute_pgm_rsrc2_tidig_comp_cnt = 0
// ASM: compute_pgm_rsrc2_excp_en_msb = 0
// ASM: compute_pgm_rsrc2_lds_size = 0
// ASM: compute_pgm_rsrc2_excp_en = 0
// ASM: enable_sgpr_private_segment_buffer = 0
// ASM: enable_sgpr_dispatch_ptr = 0
// ASM: enable_sgpr_queue_ptr = 0
// ASM: enable_sgpr_kernarg_segment_ptr = 1
// ASM: enable_sgpr_dispatch_id = 0
// ASM: enable_sgpr_flat_scratch_init = 0
// ASM: enable_sgpr_private_segment_size = 0
// ASM: enable_sgpr_grid_workgroup_count_x = 0
// ASM: enable_sgpr_grid_workgroup_count_y = 0
// ASM: enable_sgpr_grid_workgroup_count_z = 0
// ASM: enable_ordered_append_gds = 0
// ASM: private_element_size = 0
// ASM: is_ptr64 = 1
// ASM: is_dynamic_callstack = 0
// ASM: is_debug_enabled = 0
// ASM: is_xnack_enabled = 0
// ASM: workitem_private_segment_byte_size = 0
// ASM: workgroup_group_segment_byte_size = 0
// ASM: gds_segment_byte_size = 0
// ASM: kernarg_segment_byte_size = 16
// ASM: workgroup_fbarrier_count = 0
// ASM: wavefront_sgpr_count = 8
// ASM: workitem_vgpr_count = 16
// ASM: reserved_vgpr_first = 0
// ASM: reserved_vgpr_count = 0
// ASM: reserved_sgpr_first = 0
// ASM: reserved_sgpr_count = 0
// ASM: debug_wavefront_private_segment_offset_sgpr = 0
// ASM: debug_private_segment_buffer_sgpr = 0
// ASM: kernarg_segment_alignment = 4
// ASM: group_segment_alignment = 4
// ASM: private_segment_alignment = 4
// ASM: wavefront_size = 6
// ASM: call_convention = 0
// ASM: runtime_loader_kernel_symbol = 0
// ASM: .end_amd_kernel_code_t