From 1f4772305adabd0e9390afad61e0b6a92ab8b830 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 10 Jun 2014 19:18:28 +0000 Subject: [PATCH] R600: Use BCNT_INT for evergreen git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210569 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 8 +++- lib/Target/R600/AMDGPUSubtarget.h | 8 ++++ lib/Target/R600/EvergreenInstructions.td | 2 + lib/Target/R600/SIISelLowering.cpp | 3 -- test/CodeGen/R600/ctpop.ll | 51 ++++++++++++++++++++++++ 5 files changed, 67 insertions(+), 5 deletions(-) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 8ee453de352..83d80f0d5c3 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -220,9 +220,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); } + if (!Subtarget->hasBCNT(32)) + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + + if (!Subtarget->hasBCNT(64)) + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + for (MVT VT : { MVT::i32, MVT::i64 }) { - // TODO: Evergreen has BCNT_INT for CTPOP - setOperationAction(ISD::CTPOP, VT, Expand); setOperationAction(ISD::CTTZ, VT, Expand); setOperationAction(ISD::CTLZ, VT, Expand); } diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index b3b3a1a0edc..e1b5b3371fd 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -80,6 +80,14 @@ public: return hasBFE(); } + bool hasBCNT(unsigned Size) const { + if (Size == 32) + return (getGeneration() >= EVERGREEN); + + assert(Size == 64); + return (getGeneration() >= SOUTHERN_ISLANDS); + } + bool hasMulU24() const { return (getGeneration() >= EVERGREEN); } diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td index e09a0b2f786..0d0b9c751a4 100644 --- a/lib/Target/R600/EvergreenInstructions.td +++ b/lib/Target/R600/EvergreenInstructions.td @@ -326,6 +326,8 @@ def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24", def DOT4_eg : DOT4_Common<0xBE>; defm CUBE_eg : CUBE_Common<0xC0>; +def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop>; + let hasSideEffects = 1 in { def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", [], VecALU>; } diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index d05d71add82..608aad2d3ca 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -211,9 +211,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::FRINT, MVT::f64, Legal); } - setOperationAction(ISD::CTPOP, MVT::i32, Legal); - setOperationAction(ISD::CTPOP, MVT::i64, Legal); - setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); diff --git a/test/CodeGen/R600/ctpop.ll b/test/CodeGen/R600/ctpop.ll index 168b83cb3cd..e4d11e00369 100644 --- a/test/CodeGen/R600/ctpop.ll +++ b/test/CodeGen/R600/ctpop.ll @@ -13,6 +13,8 @@ declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone ; SI: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] ; SI: BUFFER_STORE_DWORD [[VRESULT]], ; SI: S_ENDPGM + +; EG: BCNT_INT define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone store i32 %ctpop, i32 addrspace(1)* %out, align 4 @@ -26,6 +28,8 @@ define void @s_ctpop_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind { ; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VZERO]] ; SI: BUFFER_STORE_DWORD [[RESULT]], ; SI: S_ENDPGM + +; EG: BCNT_INT define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %val = load i32 addrspace(1)* %in, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone @@ -42,6 +46,9 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]] ; SI: BUFFER_STORE_DWORD [[RESULT]], ; SI: S_ENDPGM + +; EG: BCNT_INT +; EG: BCNT_INT define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind { %val0 = load i32 addrspace(1)* %in0, align 4 %val1 = load i32 addrspace(1)* %in1, align 4 @@ -56,6 +63,9 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace ; SI: V_BCNT_U32_B32_e32 ; SI: V_BCNT_U32_B32_e32 ; SI: S_ENDPGM + +; EG: BCNT_INT +; EG: BCNT_INT define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind { %val = load <2 x i32> addrspace(1)* %in, align 8 %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone @@ -69,6 +79,11 @@ define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrs ; SI: V_BCNT_U32_B32_e32 ; SI: V_BCNT_U32_B32_e32 ; SI: S_ENDPGM + +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind { %val = load <4 x i32> addrspace(1)* %in, align 16 %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone @@ -86,6 +101,15 @@ define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrs ; SI: V_BCNT_U32_B32_e32 ; SI: V_BCNT_U32_B32_e32 ; SI: S_ENDPGM + +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind { %val = load <8 x i32> addrspace(1)* %in, align 32 %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone @@ -111,6 +135,23 @@ define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrs ; SI: V_BCNT_U32_B32_e32 ; SI: V_BCNT_U32_B32_e32 ; SI: S_ENDPGM + +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT +; EG: BCNT_INT define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind { %val = load <16 x i32> addrspace(1)* %in, align 32 %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone @@ -123,6 +164,8 @@ define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> ad ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 ; SI: BUFFER_STORE_DWORD [[RESULT]], ; SI: S_ENDPGM + +; EG: BCNT_INT define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %val = load i32 addrspace(1)* %in, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone @@ -136,6 +179,8 @@ define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], 4 ; SI: BUFFER_STORE_DWORD [[RESULT]], ; SI: S_ENDPGM + +; EG: BCNT_INT define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %val = load i32 addrspace(1)* %in, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone @@ -164,6 +209,8 @@ define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspa ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] ; SI: BUFFER_STORE_DWORD [[RESULT]], ; SI: S_ENDPGM + +; EG: BCNT_INT define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { %val = load i32 addrspace(1)* %in, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone @@ -178,6 +225,8 @@ define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1 ; SI: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] ; SI: BUFFER_STORE_DWORD [[RESULT]], ; SI: S_ENDPGM + +; EG: BCNT_INT define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind { %val = load i32 addrspace(1)* %in, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone @@ -192,6 +241,8 @@ define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspa ; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL]], [[VAR]] ; SI: BUFFER_STORE_DWORD [[RESULT]], ; SI: S_ENDPGM + +; EG: BCNT_INT define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind { %val = load i32 addrspace(1)* %in, align 4 %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone