diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index 0927040cb5b..7451ff53eba 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -40,6 +40,50 @@ void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo, O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff); } +void AMDGPUInstPrinter::printOffen(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << " offen"; +} + +void AMDGPUInstPrinter::printIdxen(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << " idxen"; +} + +void AMDGPUInstPrinter::printAddr64(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << " addr64"; +} + +void AMDGPUInstPrinter::printMBUFOffset(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) { + O << " offset:"; + printU16ImmOperand(MI, OpNo, O); + } +} + +void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << " glc"; +} + +void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << " slc"; +} + +void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << " tfe"; +} + void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) { switch (reg) { case AMDGPU::VCC: diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h index 6ca717076cd..4236404d9b5 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -35,6 +35,13 @@ private: void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printU32ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printOffen(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printRegOperand(unsigned RegNo, raw_ostream &O); void printImmediate(uint32_t Imm, raw_ostream &O); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index f5cb8b3241f..eacfbc6a25b 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -161,6 +161,32 @@ def sopp_brtarget : Operand { include "SIInstrFormats.td" +let OperandType = "OPERAND_IMMEDIATE" in { + +def offen : Operand { + let PrintMethod = "printOffen"; +} +def idxen : Operand { + let PrintMethod = "printIdxen"; +} +def addr64 : Operand { + let PrintMethod = "printAddr64"; +} +def mbuf_offset : Operand { + let PrintMethod = "printMBUFOffset"; +} +def glc : Operand { + let PrintMethod = "printGLC"; +} +def slc : Operand { + let PrintMethod = "printSLC"; +} +def tfe : Operand { + let PrintMethod = "printTFE"; +} + +} // End OperandType = "OPERAND_IMMEDIATE" + //===----------------------------------------------------------------------===// // Complex patterns //===----------------------------------------------------------------------===// @@ -899,42 +925,41 @@ multiclass MUBUF_Load_Helper op, string asm, RegisterClass regClass, let offen = 0, idxen = 0, vaddr = 0 in { def _OFFSET : MUBUF ; + mbuf_offset:$offset, SSrc_32:$soffset, glc:$glc, + slc:$slc, tfe:$tfe), + asm#" $vdata, $srsrc, $soffset"#"$offset"#"$glc"#"$slc"#"$tfe", []>; } let offen = 1, idxen = 0 in { def _OFFEN : MUBUF ; + SSrc_32:$soffset, mbuf_offset:$offset, glc:$glc, slc:$slc, + tfe:$tfe), + asm#" $vdata, $vaddr, $srsrc, $soffset offen"#"$offset"#"$glc"#"$slc"#"$tfe", []>; } let offen = 0, idxen = 1 in { def _IDXEN : MUBUF ; + mbuf_offset:$offset, SSrc_32:$soffset, glc:$glc, + slc:$slc, tfe:$tfe), + asm#" $vdata, $vaddr, $srsrc, $soffset idxen"#"$offset"#"$glc"#"$slc"#"$tfe", []>; } let offen = 1, idxen = 1 in { def _BOTHEN : MUBUF ; + SSrc_32:$soffset, glc:$glc, slc:$slc, tfe:$tfe), + asm#" $vdata, $vaddr, $srsrc, $soffset, idxen offen"#"$glc"#"$slc"#"$tfe", []>; } } let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in { def _ADDR64 : MUBUF ; + i64:$vaddr, i16:$offset)))]>; } } } @@ -945,9 +970,9 @@ multiclass MUBUF_Store_Helper op, string name, RegisterClass vdataClass def "" : MUBUF < op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_32:$vaddr, SSrc_32:$soffset, - u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$slc, - i1imm:$tfe), - name#" $vdata, $srsrc, $vaddr, $soffset, $offset $offen $idxen $glc $slc $tfe", + mbuf_offset:$offset, offen:$offen, idxen:$idxen, glc:$glc, slc:$slc, + tfe:$tfe), + name#" $vdata, $vaddr, $srsrc, $soffset"#"$offen"#"$idxen"#"$offset"#"$glc"#"$slc"#"$tfe", [] > { let addr64 = 0; @@ -955,10 +980,10 @@ multiclass MUBUF_Store_Helper op, string name, RegisterClass vdataClass def _ADDR64 : MUBUF < op, (outs), - (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, u16imm:$offset), - name#" $vdata, $srsrc + $vaddr + $offset", + (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, mbuf_offset:$offset), + name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset", [(st store_vt:$vdata, - (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, u16imm:$offset))]> { + (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i16:$offset))]> { let mayLoad = 0; let mayStore = 1; diff --git a/test/CodeGen/R600/array-ptr-calc-i32.ll b/test/CodeGen/R600/array-ptr-calc-i32.ll index a2b69782351..1950074bbc3 100644 --- a/test/CodeGen/R600/array-ptr-calc-i32.ll +++ b/test/CodeGen/R600/array-ptr-calc-i32.ll @@ -15,7 +15,7 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate ; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index ; with the appropriate offset. We should fold this into the store. ; SI-ALLOCA: V_ADD_I32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}} -; SI-ALLOCA: BUFFER_STORE_DWORD {{v[0-9]+}}, s[{{[0-9]+:[0-9]+}}], [[PTRREG]] +; SI-ALLOCA: BUFFER_STORE_DWORD {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}] ; ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this ; alloca to a vector. It currently fails because it does not know how diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll index 22a3022145f..f26d30d7b80 100644 --- a/test/CodeGen/R600/ctpop.ll +++ b/test/CodeGen/R600/ctpop.ll @@ -236,8 +236,8 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa } ; FUNC-LABEL: @v_ctpop_i32_add_vvar_inv -; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], {{.*}} + 0x0 -; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], {{.*}} + 0x10 +; SI-DAG: BUFFER_LOAD_DWORD [[VAL:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 {{addr64$}} +; SI-DAG: BUFFER_LOAD_DWORD [[VAR:v[0-9]+]], {{.*}} offset:0x10 ; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] ; SI: BUFFER_STORE_DWORD [[RESULT]], ; SI: S_ENDPGM diff --git a/test/CodeGen/R600/mubuf.ll b/test/CodeGen/R600/mubuf.ll index f465d3dad8f..75b1b6d0dc0 100644 --- a/test/CodeGen/R600/mubuf.ll +++ b/test/CodeGen/R600/mubuf.ll @@ -6,7 +6,7 @@ ; MUBUF load with an immediate byte offset that fits into 12-bits ; CHECK-LABEL: @mubuf_load0 -; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80 +; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80 define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %0 = getelementptr i32 addrspace(1)* %in, i64 1 @@ -17,7 +17,7 @@ entry: ; MUBUF load with the largest possible immediate offset ; CHECK-LABEL: @mubuf_load1 -; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0xfff ; encoding: [0xff,0x8f +; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0xfff ; encoding: [0xff,0x8f define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: %0 = getelementptr i8 addrspace(1)* %in, i64 4095 @@ -28,7 +28,7 @@ entry: ; MUBUF load with an immediate byte offset that doesn't fit into 12-bits ; CHECK-LABEL: @mubuf_load2 -; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x0 ; encoding: [0x00,0x80 +; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80 define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { entry: %0 = getelementptr i32 addrspace(1)* %in, i64 1024 @@ -40,7 +40,7 @@ entry: ; MUBUF load with a 12-bit immediate offset and a register offset ; CHECK-LABEL: @mubuf_load3 ; CHECK-NOT: ADD -; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80 +; CHECK: BUFFER_LOAD_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80 define void @mubuf_load3(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i64 %offset) { entry: %0 = getelementptr i32 addrspace(1)* %in, i64 %offset @@ -56,7 +56,7 @@ entry: ; MUBUF store with an immediate byte offset that fits into 12-bits ; CHECK-LABEL: @mubuf_store0 -; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80 +; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80 define void @mubuf_store0(i32 addrspace(1)* %out) { entry: %0 = getelementptr i32 addrspace(1)* %out, i64 1 @@ -66,7 +66,7 @@ entry: ; MUBUF store with the largest possible immediate offset ; CHECK-LABEL: @mubuf_store1 -; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0xfff ; encoding: [0xff,0x8f +; CHECK: BUFFER_STORE_BYTE v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0xfff ; encoding: [0xff,0x8f define void @mubuf_store1(i8 addrspace(1)* %out) { entry: @@ -77,7 +77,7 @@ entry: ; MUBUF store with an immediate byte offset that doesn't fit into 12-bits ; CHECK-LABEL: @mubuf_store2 -; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x0 ; encoding: [0x00,0x80 +; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 ; encoding: [0x00,0x80 define void @mubuf_store2(i32 addrspace(1)* %out) { entry: %0 = getelementptr i32 addrspace(1)* %out, i64 1024 @@ -88,7 +88,7 @@ entry: ; MUBUF store with a 12-bit immediate offset and a register offset ; CHECK-LABEL: @mubuf_store3 ; CHECK-NOT: ADD -; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, s[{{[0-9]:[0-9]}}] + v[{{[0-9]:[0-9]}}] + 0x4 ; encoding: [0x04,0x80 +; CHECK: BUFFER_STORE_DWORD v{{[0-9]}}, v[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0 addr64 offset:0x4 ; encoding: [0x04,0x80 define void @mubuf_store3(i32 addrspace(1)* %out, i64 %offset) { entry: %0 = getelementptr i32 addrspace(1)* %out, i64 %offset diff --git a/test/CodeGen/R600/private-memory.ll b/test/CodeGen/R600/private-memory.ll index 124d9fa6450..9da3d32bb8f 100644 --- a/test/CodeGen/R600/private-memory.ll +++ b/test/CodeGen/R600/private-memory.ll @@ -16,8 +16,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; SI-PROMOTE: DS_READ_B32 ; SI-PROMOTE: DS_READ_B32 -; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}} -; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}} +; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} +; SI-ALLOCA: BUFFER_STORE_DWORD v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) { entry: %stack = alloca [5 x i32], align 4 @@ -116,10 +116,10 @@ for.end: ; R600: MOVA_INT -; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}} -; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}} +; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} +; SI-PROMOTE: BUFFER_STORE_SHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} ; SI-PROMOTE-NOT: MOVREL -; SI-PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] + v{{[0-9]+}} + s{{[0-9]+}} +; SI-PROMOTE: BUFFER_LOAD_SSHORT v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} define void @short_array(i32 addrspace(1)* %out, i32 %index) { entry: %0 = alloca [2 x i16] @@ -138,8 +138,8 @@ entry: ; R600: MOVA_INT -; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}, 0x0 -; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, s{{[0-9]+}}, 0x1 +; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} {{offen$}} +; SI-DAG: BUFFER_STORE_BYTE v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen offset:0x1 define void @char_array(i32 addrspace(1)* %out, i32 %index) { entry: %0 = alloca [2 x i8] diff --git a/test/CodeGen/R600/salu-to-valu.ll b/test/CodeGen/R600/salu-to-valu.ll index e7719b6feb3..49ec62803ae 100644 --- a/test/CodeGen/R600/salu-to-valu.ll +++ b/test/CodeGen/R600/salu-to-valu.ll @@ -14,8 +14,8 @@ ; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_* ; instructions -; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] -; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] +; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 +; CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64 define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { entry: %0 = call i32 @llvm.r600.read.tidig.x() #1