R600/SI: Update MUBUF assembly string to match AMD proprietary compiler

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@214866 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2014-08-05 14:48:12 +00:00
parent 9a7e35aecc
commit 94dfb8818d
8 changed files with 117 additions and 41 deletions

View File

@ -40,6 +40,50 @@ void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
}
void AMDGPUInstPrinter::printOffen(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm())
O << " offen";
}
void AMDGPUInstPrinter::printIdxen(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm())
O << " idxen";
}
void AMDGPUInstPrinter::printAddr64(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm())
O << " addr64";
}
void AMDGPUInstPrinter::printMBUFOffset(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm()) {
O << " offset:";
printU16ImmOperand(MI, OpNo, O);
}
}
void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm())
O << " glc";
}
void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm())
O << " slc";
}
void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
if (MI->getOperand(OpNo).getImm())
O << " tfe";
}
void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
switch (reg) {
case AMDGPU::VCC:

View File

@ -35,6 +35,13 @@ private:
void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU32ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printOffen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printRegOperand(unsigned RegNo, raw_ostream &O);
void printImmediate(uint32_t Imm, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);

View File

@ -161,6 +161,32 @@ def sopp_brtarget : Operand<OtherVT> {
include "SIInstrFormats.td"
let OperandType = "OPERAND_IMMEDIATE" in {
def offen : Operand<i1> {
let PrintMethod = "printOffen";
}
def idxen : Operand<i1> {
let PrintMethod = "printIdxen";
}
def addr64 : Operand<i1> {
let PrintMethod = "printAddr64";
}
def mbuf_offset : Operand<i16> {
let PrintMethod = "printMBUFOffset";
}
def glc : Operand <i1> {
let PrintMethod = "printGLC";
}
def slc : Operand <i1> {
let PrintMethod = "printSLC";
}
def tfe : Operand <i1> {
let PrintMethod = "printTFE";
}
} // End OperandType = "OPERAND_IMMEDIATE"
//===----------------------------------------------------------------------===//
// Complex patterns
//===----------------------------------------------------------------------===//
@ -899,42 +925,41 @@ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass,
let offen = 0, idxen = 0, vaddr = 0 in {
def _OFFSET : MUBUF <op, (outs regClass:$vdata),
(ins SReg_128:$srsrc,
u16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
i1imm:$slc, i1imm:$tfe),
asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
mbuf_offset:$offset, SSrc_32:$soffset, glc:$glc,
slc:$slc, tfe:$tfe),
asm#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
let offen = 1, idxen = 0 in {
def _OFFEN : MUBUF <op, (outs regClass:$vdata),
(ins SReg_128:$srsrc, VReg_32:$vaddr,
SSrc_32:$soffset, u16imm:$offset, i1imm:$glc, i1imm:$slc,
i1imm:$tfe),
asm#" $vdata, $srsrc + $vaddr + $soffset + $offset, glc=$glc, slc=$slc, tfe=$tfe", []>;
SSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc,
tfe:$tfe),
asm#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
let offen = 0, idxen = 1 in {
def _IDXEN : MUBUF <op, (outs regClass:$vdata),
(ins SReg_128:$srsrc, VReg_32:$vaddr,
u16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
i1imm:$slc, i1imm:$tfe),
asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
mbuf_offset:$offset, SSrc_32:$soffset, glc:$glc,
slc:$slc, tfe:$tfe),
asm#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>;
}
let offen = 1, idxen = 1 in {
def _BOTHEN : MUBUF <op, (outs regClass:$vdata),
(ins SReg_128:$srsrc, VReg_64:$vaddr,
SSrc_32:$soffset, i1imm:$glc,
i1imm:$slc, i1imm:$tfe),
asm#" $vdata, $srsrc[$vaddr[0]] + $vaddr[1] + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
SSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe),
asm#" $vdata, $vaddr, $srsrc, $soffset, idxen offen"#"$glc"#"$slc"#"$tfe", []>;
}
}
let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
(ins SReg_128:$srsrc, VReg_64:$vaddr, u16imm:$offset),
asm#" $vdata, $srsrc + $vaddr + $offset",
(ins SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
asm#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
[(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
i64:$vaddr, u16imm:$offset)))]>;
i64:$vaddr, i16:$offset)))]>;
}
}
}
@ -945,9 +970,9 @@ multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass
def "" : MUBUF <
op, (outs),
(ins vdataClass:$vdata, SReg_128:$srsrc, VReg_32:$vaddr, SSrc_32:$soffset,
u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$slc,
i1imm:$tfe),
name#" $vdata, $srsrc, $vaddr, $soffset, $offset $offen $idxen $glc $slc $tfe",
mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc,
tfe:$tfe),
name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#"$glc"#"$slc"#"$tfe",
[]
> {
let addr64 = 0;
@ -955,10 +980,10 @@ multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass
def _ADDR64 : MUBUF <
op, (outs),
(ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, u16imm:$offset),
name#" $vdata, $srsrc + $vaddr + $offset",
(ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset),
name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset",
[(st store_vt:$vdata,
(MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, u16imm:$offset))]> {
(MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))]> {
let mayLoad = 0;
let mayStore = 1;

View File

@ -15,7 +15,7 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index
; with the appropriate offset. We should fold this into the store.
; SI-ALLOCA: V_ADD_I32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
; SI-ALLOCA: BUFFER_STORE_DWORD {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], [[PTRREG]]
; SI-ALLOCA: BUFFER_STORE_DWORD {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]
;
; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
; alloca to a vector. It currently fails because it does not know how

View File

@ -236,8 +236,8 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa
}
; FUNC-LABEL: @v_ctpop_i32_add_vvar_inv
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], {{.*}} + 0x0
; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], {{.*}} + 0x10
; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 {{addr64$}}
; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], {{.*}} offset:0x10
; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM

View File

@ -6,7 +6,7 @@
; MUBUF load with an immediate byte offset that fits into 12-bits
; CHECK-LABEL: @mubuf_load0
; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80
define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%0 = getelementptr i32 addrspace(1)* %in, i64 1
@ -17,7 +17,7 @@ entry:
; MUBUF load with the largest possible immediate offset
; CHECK-LABEL: @mubuf_load1
; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0xfff ; encoding: [0xff,0x8f
; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0xfff ; encoding: [0xff,0x8f
define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = getelementptr i8 addrspace(1)* %in, i64 4095
@ -28,7 +28,7 @@ entry:
; MUBUF load with an immediate byte offset that doesn't fit into 12-bits
; CHECK-LABEL: @mubuf_load2
; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x0 ; encoding: [0x00,0x80
; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80
define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%0 = getelementptr i32 addrspace(1)* %in, i64 1024
@ -40,7 +40,7 @@ entry:
; MUBUF load with a 12-bit immediate offset and a register offset
; CHECK-LABEL: @mubuf_load3
; CHECK-NOT: ADD
; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80
define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) {
entry:
%0 = getelementptr i32 addrspace(1)* %in, i64 %offset
@ -56,7 +56,7 @@ entry:
; MUBUF store with an immediate byte offset that fits into 12-bits
; CHECK-LABEL: @mubuf_store0
; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80
define void @mubuf_store0(i32 addrspace(1)* %out) {
entry:
%0 = getelementptr i32 addrspace(1)* %out, i64 1
@ -66,7 +66,7 @@ entry:
; MUBUF store with the largest possible immediate offset
; CHECK-LABEL: @mubuf_store1
; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0xfff ; encoding: [0xff,0x8f
; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0xfff ; encoding: [0xff,0x8f
define void @mubuf_store1(i8 addrspace(1)* %out) {
entry:
@ -77,7 +77,7 @@ entry:
; MUBUF store with an immediate byte offset that doesn't fit into 12-bits
; CHECK-LABEL: @mubuf_store2
; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x0 ; encoding: [0x00,0x80
; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80
define void @mubuf_store2(i32 addrspace(1)* %out) {
entry:
%0 = getelementptr i32 addrspace(1)* %out, i64 1024
@ -88,7 +88,7 @@ entry:
; MUBUF store with a 12-bit immediate offset and a register offset
; CHECK-LABEL: @mubuf_store3
; CHECK-NOT: ADD
; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80
; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80
define void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) {
entry:
%0 = getelementptr i32 addrspace(1)* %out, i64 %offset

View File

@ -16,8 +16,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; SI-PROMOTE: DS_READ_B32
; SI-PROMOTE: DS_READ_B32
; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
entry:
%stack = alloca [5 x i32], align 4
@ -116,10 +116,10 @@ for.end:
; R600: MOVA_INT
; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}
; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
; SI-PROMOTE-NOT: MOVREL
; SI-PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] + v{{[0-9]+}} + s{{[0-9]+}}
; SI-PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}}
define void @short_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [2 x i16]
@ -138,8 +138,8 @@ entry:
; R600: MOVA_INT
; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}, 0x0
; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}, 0x1
; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} {{offen$}}
; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x1
define void @char_array(i32 addrspace(1)* %out, i32 %index) {
entry:
%0 = alloca [2 x i8]

View File

@ -14,8 +14,8 @@
; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
; instructions
; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #1