mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-28 04:33:05 +00:00
R600/SI: Add intrinsic for BUFFER_LOAD_DWORD* instructions
Reviewed-by: Tom Stellard <thomas.stellard@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200196 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b3bfe7f18c
commit
7018cd5af7
@ -425,26 +425,48 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU
|
|||||||
|
|
||||||
multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
|
multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
|
||||||
|
|
||||||
let glc = 0, lds = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */,
|
let lds = 0, mayLoad = 1 in {
|
||||||
mayLoad = 1 in {
|
|
||||||
|
|
||||||
let offen = 1, idxen = 0, addr64 = 0, offset = 0 in {
|
let addr64 = 0 in {
|
||||||
def _OFFEN : MUBUF <op, (outs regClass:$vdata),
|
|
||||||
(ins SReg_128:$srsrc, VReg_32:$vaddr),
|
|
||||||
asm#" $vdata, $srsrc + $vaddr", []>;
|
|
||||||
}
|
|
||||||
|
|
||||||
let offen = 0, idxen = 1, addr64 = 0 in {
|
let offen = 0, idxen = 0 in {
|
||||||
def _IDXEN : MUBUF <op, (outs regClass:$vdata),
|
def _OFFSET : MUBUF <op, (outs regClass:$vdata),
|
||||||
(ins SReg_128:$srsrc, VReg_32:$vaddr, i16imm:$offset),
|
(ins SReg_128:$srsrc, VReg_32:$vaddr,
|
||||||
asm#" $vdata, $srsrc[$vaddr] + $offset", []>;
|
i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
|
||||||
}
|
i1imm:$slc, i1imm:$tfe),
|
||||||
|
asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
|
||||||
|
}
|
||||||
|
|
||||||
let offen = 0, idxen = 0, addr64 = 1 in {
|
let offen = 1, idxen = 0, offset = 0 in {
|
||||||
def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
|
def _OFFEN : MUBUF <op, (outs regClass:$vdata),
|
||||||
(ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
|
(ins SReg_128:$srsrc, VReg_32:$vaddr,
|
||||||
asm#" $vdata, $srsrc + $vaddr + $offset", []>;
|
SSrc_32:$soffset, i1imm:$glc, i1imm:$slc,
|
||||||
}
|
i1imm:$tfe),
|
||||||
|
asm#" $vdata, $srsrc + $vaddr + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let offen = 0, idxen = 1 in {
|
||||||
|
def _IDXEN : MUBUF <op, (outs regClass:$vdata),
|
||||||
|
(ins SReg_128:$srsrc, VReg_32:$vaddr,
|
||||||
|
i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
|
||||||
|
i1imm:$slc, i1imm:$tfe),
|
||||||
|
asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
|
||||||
|
}
|
||||||
|
|
||||||
|
let offen = 1, idxen = 1 in {
|
||||||
|
def _BOTHEN : MUBUF <op, (outs regClass:$vdata),
|
||||||
|
(ins SReg_128:$srsrc, VReg_64:$vaddr,
|
||||||
|
SSrc_32:$soffset, i1imm:$glc,
|
||||||
|
i1imm:$slc, i1imm:$tfe),
|
||||||
|
asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
|
||||||
|
def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
|
||||||
|
(ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
|
||||||
|
asm#" $vdata, $srsrc + $vaddr + $offset", []>;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1428,7 +1428,7 @@ def : Pat <
|
|||||||
/* int_SI_vs_load_input */
|
/* int_SI_vs_load_input */
|
||||||
def : Pat<
|
def : Pat<
|
||||||
(SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
|
(SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
|
||||||
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset)
|
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
/* int_SI_export */
|
/* int_SI_export */
|
||||||
@ -1834,7 +1834,7 @@ def : Pat <
|
|||||||
// 3. Offset in an 32Bit VGPR
|
// 3. Offset in an 32Bit VGPR
|
||||||
def : Pat <
|
def : Pat <
|
||||||
(SIload_constant i128:$sbase, i32:$voff),
|
(SIload_constant i128:$sbase, i32:$voff),
|
||||||
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff)
|
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
// The multiplication scales from [0,1] to the unsigned integer range
|
// The multiplication scales from [0,1] to the unsigned integer range
|
||||||
@ -1995,6 +1995,50 @@ defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>;
|
|||||||
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
|
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>;
|
||||||
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
|
defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>;
|
||||||
|
|
||||||
|
// BUFFER_LOAD_DWORD*, addr64=0
|
||||||
|
multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
|
||||||
|
MUBUF bothen> {
|
||||||
|
|
||||||
|
def : Pat <
|
||||||
|
(vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
|
||||||
|
imm:$offset, 0, 0, imm:$glc, imm:$slc,
|
||||||
|
imm:$tfe)),
|
||||||
|
(offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
|
||||||
|
(as_i1imm $slc), (as_i1imm $tfe))
|
||||||
|
>;
|
||||||
|
|
||||||
|
def : Pat <
|
||||||
|
(vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
|
||||||
|
imm, 1, 0, imm:$glc, imm:$slc,
|
||||||
|
imm:$tfe)),
|
||||||
|
(offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
|
||||||
|
(as_i1imm $tfe))
|
||||||
|
>;
|
||||||
|
|
||||||
|
def : Pat <
|
||||||
|
(vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
|
||||||
|
imm:$offset, 0, 1, imm:$glc, imm:$slc,
|
||||||
|
imm:$tfe)),
|
||||||
|
(idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
|
||||||
|
(as_i1imm $slc), (as_i1imm $tfe))
|
||||||
|
>;
|
||||||
|
|
||||||
|
def : Pat <
|
||||||
|
(vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset,
|
||||||
|
imm, 1, 1, imm:$glc, imm:$slc,
|
||||||
|
imm:$tfe)),
|
||||||
|
(bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
|
||||||
|
(as_i1imm $tfe))
|
||||||
|
>;
|
||||||
|
}
|
||||||
|
|
||||||
|
defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
|
||||||
|
BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
|
||||||
|
defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
|
||||||
|
BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
|
||||||
|
defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
|
||||||
|
BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// MTBUF Patterns
|
// MTBUF Patterns
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -38,6 +38,20 @@ let TargetPrefix = "SI", isTarget = 1 in {
|
|||||||
llvm_i32_ty], // tfe(imm)
|
llvm_i32_ty], // tfe(imm)
|
||||||
[]>;
|
[]>;
|
||||||
|
|
||||||
|
// Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed
|
||||||
|
def int_SI_buffer_load_dword : Intrinsic <
|
||||||
|
[llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32
|
||||||
|
[llvm_anyint_ty, // rsrc(SGPR)
|
||||||
|
llvm_anyint_ty, // vaddr(VGPR)
|
||||||
|
llvm_i32_ty, // soffset(SGPR)
|
||||||
|
llvm_i32_ty, // inst_offset(imm)
|
||||||
|
llvm_i32_ty, // offen(imm)
|
||||||
|
llvm_i32_ty, // idxen(imm)
|
||||||
|
llvm_i32_ty, // glc(imm)
|
||||||
|
llvm_i32_ty, // slc(imm)
|
||||||
|
llvm_i32_ty], // tfe(imm)
|
||||||
|
[IntrReadArgMem]>;
|
||||||
|
|
||||||
def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
def int_SI_sendmsg : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
|
||||||
class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
|
class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||||
|
40
test/CodeGen/R600/llvm.SI.load.dword.ll
Normal file
40
test/CodeGen/R600/llvm.SI.load.dword.ll
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
;RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck %s
|
||||||
|
|
||||||
|
; Example of a simple geometry shader loading vertex attributes from the
|
||||||
|
; ESGS ring buffer
|
||||||
|
|
||||||
|
; CHECK-LABEL: @main
|
||||||
|
; CHECK: BUFFER_LOAD_DWORD
|
||||||
|
; CHECK: BUFFER_LOAD_DWORD
|
||||||
|
; CHECK: BUFFER_LOAD_DWORD
|
||||||
|
; CHECK: BUFFER_LOAD_DWORD
|
||||||
|
|
||||||
|
define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [2 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32, i32, i32, i32) #0 {
|
||||||
|
main_body:
|
||||||
|
%10 = getelementptr [2 x <16 x i8>] addrspace(2)* %3, i64 0, i32 1
|
||||||
|
%11 = load <16 x i8> addrspace(2)* %10, !tbaa !0
|
||||||
|
%12 = shl i32 %6, 2
|
||||||
|
%13 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
|
||||||
|
%14 = bitcast i32 %13 to float
|
||||||
|
%15 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0)
|
||||||
|
%16 = bitcast i32 %15 to float
|
||||||
|
%17 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %11, i32 %12, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0)
|
||||||
|
%18 = bitcast i32 %17 to float
|
||||||
|
%19 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8> %11, <2 x i32> <i32 0, i32 0>, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0)
|
||||||
|
%20 = bitcast i32 %19 to float
|
||||||
|
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %14, float %16, float %18, float %20)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: nounwind readonly
|
||||||
|
declare i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32) #1
|
||||||
|
|
||||||
|
; Function Attrs: nounwind readonly
|
||||||
|
declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #1
|
||||||
|
|
||||||
|
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
|
||||||
|
|
||||||
|
attributes #0 = { "ShaderType"="1" }
|
||||||
|
attributes #1 = { nounwind readonly }
|
||||||
|
|
||||||
|
!0 = metadata !{metadata !"const", null, i32 1}
|
Loading…
Reference in New Issue
Block a user