From 28233d3a63f958f80ebd6f8095405420ba3dd3fa Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 30 Sep 2014 19:49:48 +0000 Subject: [PATCH] R600/SI: Fix printing of clamp and omod No tests for omod since nothing uses it yet, but this should get rid of the remaining annoying trailing zeros after some instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218692 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../R600/InstPrinter/AMDGPUInstPrinter.cpp | 17 +++++++ .../R600/InstPrinter/AMDGPUInstPrinter.h | 2 + lib/Target/R600/SIDefines.h | 9 ++++ lib/Target/R600/SIInstrInfo.td | 44 ++++++++++++------- test/CodeGen/R600/fneg.f64.ll | 2 +- test/CodeGen/R600/fneg.ll | 2 +- test/CodeGen/R600/frem.ll | 2 +- test/CodeGen/R600/imm.ll | 18 ++++---- test/CodeGen/R600/llvm.AMDGPU.clamp.ll | 6 +-- 9 files changed, 70 insertions(+), 32 deletions(-) diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index 9bb8beca7bb..3a2055678dd 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -294,6 +294,23 @@ void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo, printIfSet(MI, OpNo, O, "_SAT"); } +void AMDGPUInstPrinter::printClampSI(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).getImm()) + O << " clamp"; +} + +void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + int Imm = MI->getOperand(OpNo).getImm(); + if (Imm == SIOutMods::MUL2) + O << " mul:2"; + else if (Imm == SIOutMods::MUL4) + O << " mul:4"; + else if (Imm == SIOutMods::DIV2) + O << " div:2"; +} + void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O) { int32_t Imm = MI->getOperand(OpNo).getImm(); diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h index 83f42a18a46..6954dafe7c3 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h @@ -52,6 +52,8 @@ private: StringRef Asm, StringRef Default = ""); static void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O); static void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O); + static void printClampSI(const MCInst *MI, unsigned OpNo, raw_ostream &O); + static void printOModSI(const MCInst *MI, unsigned OpNo, raw_ostream &O); static void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O); static void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O); static void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O); diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h index 29be2e67169..2e7dab6cb56 100644 --- a/lib/Target/R600/SIDefines.h +++ b/lib/Target/R600/SIDefines.h @@ -43,6 +43,15 @@ namespace SISrcMods { }; } +namespace SIOutMods { + enum { + NONE = 0, + MUL2 = 1, + MUL4 = 2, + DIV2 = 3 + }; +} + #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 #define R_00B02C_SPI_SHADER_PGM_RSRC2_PS 0x00B02C #define S_00B02C_EXTRA_LDS_SIZE(x) (((x) & 0xFF) << 8) diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index f2c13079325..6b7da98e4bc 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -185,6 +185,14 @@ def tfe : Operand { let PrintMethod = "printTFE"; } +def omod : Operand { + let PrintMethod = "printOModSI"; +} + +def ClampMod : Operand { + let PrintMethod = "printClampSI"; +} + } // End OperandType = "OPERAND_IMMEDIATE" //===----------------------------------------------------------------------===// @@ -399,7 +407,7 @@ class getIns64 { // instruction. class getAsm64 { string src0 = "$src0_modifiers,"; - string src1 = !if(!eq(NumSrcArgs, 1), "", " $src1_modifiers,"); - string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers,", ""); + string src1 = !if(!eq(NumSrcArgs, 1), "", + !if(!eq(NumSrcArgs, 2), " $src1_modifiers", + " $src1_modifiers,")); + string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", ""); string ret = !if(!eq(HasModifiers, 0), getAsm32.ret, - " $dst, "#src0#src1#src2#" $clamp, $omod"); + " $dst, "#src0#src1#src2#"$clamp"#"$omod"); } @@ -632,7 +642,7 @@ multiclass VOP1Inst op, string opName, VOPProfile P, P.Ins64, P.Asm64, !if(P.HasModifiers, [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, - i32:$src0_modifiers, i32:$clamp, i32:$omod))))], + i32:$src0_modifiers, i1:$clamp, i32:$omod))))], [(set P.DstVT:$dst, (node P.Src0VT:$src0))]), P.HasModifiers >; @@ -664,7 +674,7 @@ multiclass VOP2Inst op, string opName, VOPProfile P, !if(P.HasModifiers, [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i32:$clamp, i32:$omod)), + i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), revOp, P.HasModifiers @@ -692,7 +702,7 @@ multiclass VOP2bInst op, string opName, VOPProfile P, !if(P.HasModifiers, [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i32:$clamp, i32:$omod)), + i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]), revOp, P.HasModifiers @@ -719,7 +729,7 @@ multiclass VOPCInst op, string opName, !if(P.HasModifiers, [(set i1:$dst, (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i32:$clamp, i32:$omod)), + i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), cond))], [(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]), @@ -767,7 +777,7 @@ multiclass VOP3Inst op, string opName, VOPProfile P, !if(P.HasModifiers, [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i32:$clamp, i32:$omod)), + i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))], [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1, @@ -776,14 +786,14 @@ multiclass VOP3Inst op, string opName, VOPProfile P, !if(P.HasModifiers, [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i32:$clamp, i32:$omod)), + i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))], [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]) /* P.NumSrcArgs == 1 */, !if(P.HasModifiers, [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, - i32:$clamp, i32:$omod))))], + i1:$clamp, i32:$omod))))], [(set P.DstVT:$dst, (node P.Src0VT:$src0))]))), P.NumSrcArgs, P.HasModifiers >; @@ -795,8 +805,8 @@ multiclass VOP3b_Helper op, RegisterClass vrc, RegisterClass arc, (ins InputModsNoDefault:$src0_modifiers, arc:$src0, InputModsNoDefault:$src1_modifiers, arc:$src1, InputModsNoDefault:$src2_modifiers, arc:$src2, - i32imm:$clamp, i32imm:$omod), - opName#" $dst0, $dst1, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern, + ClampMod:$clamp, i32imm:$omod), + opName#" $dst0, $dst1, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern, opName, opName, 1, 1 >; @@ -808,13 +818,13 @@ multiclass VOP3b_32 op, string opName, list pattern> : class Vop3ModPat : Pat< - (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i32:$clamp, i32:$omod)), + (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)), (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)), (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))), (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, i32:$src2_modifiers, P.Src2VT:$src2, - i32:$clamp, + i1:$clamp, i32:$omod)>; //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/R600/fneg.f64.ll b/test/CodeGen/R600/fneg.f64.ll index f0b341b7a6b..b8a4a9fee83 100644 --- a/test/CodeGen/R600/fneg.f64.ll +++ b/test/CodeGen/R600/fneg.f64.ll @@ -39,7 +39,7 @@ define void @fneg_v4f64(<4 x double> addrspace(1)* nocapture %out, <4 x double> ; FUNC-LABEL: @fneg_free_f64 ; FIXME: Unnecessary copy to VGPRs -; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]}}, 0, 0 +; SI: V_ADD_F64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, -{{v\[[0-9]+:[0-9]+\]$}} define void @fneg_free_f64(double addrspace(1)* %out, i64 %in) { %bc = bitcast i64 %in to double %fsub = fsub double 0.0, %bc diff --git a/test/CodeGen/R600/fneg.ll b/test/CodeGen/R600/fneg.ll index 8631301b49c..4fd2239dddd 100644 --- a/test/CodeGen/R600/fneg.ll +++ b/test/CodeGen/R600/fneg.ll @@ -48,7 +48,7 @@ define void @fneg_v4f32(<4 x float> addrspace(1)* nocapture %out, <4 x float> %i ; R600: -KC0[2].Z ; XXX: We could use V_ADD_F32_e64 with the negate bit here instead. -; SI: V_SUB_F32_e64 v{{[0-9]}}, 0.0, s{{[0-9]}}, 0, 0 +; SI: V_SUB_F32_e64 v{{[0-9]}}, 0.0, s{{[0-9]+$}} define void @fneg_free_f32(float addrspace(1)* %out, i32 %in) { %bc = bitcast i32 %in to float %fsub = fsub float 0.0, %bc diff --git a/test/CodeGen/R600/frem.ll b/test/CodeGen/R600/frem.ll index d41538483c3..de2a5bcf419 100644 --- a/test/CodeGen/R600/frem.ll +++ b/test/CodeGen/R600/frem.ll @@ -27,7 +27,7 @@ define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, ; SI: V_RCP_F32_e32 [[INVY:v[0-9]+]], [[Y]] ; SI: V_MUL_F32_e32 [[DIV:v[0-9]+]], [[INVY]], [[X]] ; SI: V_TRUNC_F32_e32 [[TRUNC:v[0-9]+]], [[DIV]] -; SI: V_MAD_F32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]], +; SI: V_MAD_F32 [[RESULT:v[0-9]+]], -[[TRUNC]], [[Y]], [[X]] ; SI: BUFFER_STORE_DWORD [[RESULT]] ; SI: S_ENDPGM define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, diff --git a/test/CodeGen/R600/imm.ll b/test/CodeGen/R600/imm.ll index e1d680c4ddb..5eecb035f6a 100644 --- a/test/CodeGen/R600/imm.ll +++ b/test/CodeGen/R600/imm.ll @@ -104,7 +104,7 @@ define void @store_literal_imm_f32(float addrspace(1)* %out) { ; CHECK-LABEL: @add_inline_imm_0.0_f32 ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]] -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 0.0, +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 0.0{{$}} ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]] define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0.0 @@ -114,7 +114,7 @@ define void @add_inline_imm_0.0_f32(float addrspace(1)* %out, float %x) { ; CHECK-LABEL: @add_inline_imm_0.5_f32 ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]] -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 0.5, +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 0.5{{$}} ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]] define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 0.5 @@ -124,7 +124,7 @@ define void @add_inline_imm_0.5_f32(float addrspace(1)* %out, float %x) { ; CHECK-LABEL: @add_inline_imm_neg_0.5_f32 ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]] -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -0.5, +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -0.5{{$}} ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]] define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, -0.5 @@ -134,7 +134,7 @@ define void @add_inline_imm_neg_0.5_f32(float addrspace(1)* %out, float %x) { ; CHECK-LABEL: @add_inline_imm_1.0_f32 ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]] -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 1.0, +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 1.0{{$}} ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]] define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 1.0 @@ -144,7 +144,7 @@ define void @add_inline_imm_1.0_f32(float addrspace(1)* %out, float %x) { ; CHECK-LABEL: @add_inline_imm_neg_1.0_f32 ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]] -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -1.0, +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -1.0{{$}} ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]] define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, -1.0 @@ -154,7 +154,7 @@ define void @add_inline_imm_neg_1.0_f32(float addrspace(1)* %out, float %x) { ; CHECK-LABEL: @add_inline_imm_2.0_f32 ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]] -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 2.0, +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 2.0{{$}} ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]] define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 2.0 @@ -164,7 +164,7 @@ define void @add_inline_imm_2.0_f32(float addrspace(1)* %out, float %x) { ; CHECK-LABEL: @add_inline_imm_neg_2.0_f32 ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]] -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -2.0, +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -2.0{{$}} ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]] define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, -2.0 @@ -174,7 +174,7 @@ define void @add_inline_imm_neg_2.0_f32(float addrspace(1)* %out, float %x) { ; CHECK-LABEL: @add_inline_imm_4.0_f32 ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]] -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 4.0, +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], 4.0{{$}} ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]] define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, 4.0 @@ -184,7 +184,7 @@ define void @add_inline_imm_4.0_f32(float addrspace(1)* %out, float %x) { ; CHECK-LABEL: @add_inline_imm_neg_4.0_f32 ; CHECK: S_LOAD_DWORD [[VAL:s[0-9]+]] -; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -4.0, +; CHECK: V_ADD_F32_e64 [[REG:v[0-9]+]], [[VAL]], -4.0{{$}} ; CHECK-NEXT: BUFFER_STORE_DWORD [[REG]] define void @add_inline_imm_neg_4.0_f32(float addrspace(1)* %out, float %x) { %y = fadd float %x, -4.0 diff --git a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll index cafca080df5..c12732c6004 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.clamp.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.clamp.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI -check-prefix=FUNC %s ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.AMDGPU.clamp.f32(float, float, float) nounwind readnone @@ -6,7 +6,7 @@ declare float @llvm.AMDIL.clamp.f32(float, float, float) nounwind readnone ; FUNC-LABEL: @clamp_0_1_f32 ; SI: S_LOAD_DWORD [[ARG:s[0-9]+]], -; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]], 1, 0 +; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}} ; SI: BUFFER_STORE_DWORD [[RESULT]] ; SI: S_ENDPGM @@ -19,7 +19,7 @@ define void @clamp_0_1_f32(float addrspace(1)* %out, float %src) nounwind { ; FUNC-LABEL: @clamp_0_1_amdil_legacy_f32 ; SI: S_LOAD_DWORD [[ARG:s[0-9]+]], -; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]], 1, 0 +; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], 0, [[ARG]] clamp{{$}} ; SI: BUFFER_STORE_DWORD [[RESULT]] define void @clamp_0_1_amdil_legacy_f32(float addrspace(1)* %out, float %src) nounwind { %clamp = call float @llvm.AMDIL.clamp.f32(float %src, float 0.0, float 1.0) nounwind readnone