From e5bd58468373cf04455015f4fbe34c962259e233 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 11 Dec 2014 22:15:39 +0000 Subject: [PATCH] R600/SI: Make more unordered comparisons legal This saves a second compare and an and / or by using the unordered comparison instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224066 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUInstructions.td | 2 +- lib/Target/R600/SIISelLowering.cpp | 9 --------- lib/Target/R600/SIInstructions.td | 16 ++++++++-------- test/CodeGen/R600/fcmp64.ll | 8 ++++---- test/CodeGen/R600/setcc.ll | 27 +++++++++++---------------- test/CodeGen/R600/setcc64.ll | 25 +++++++++---------------- 6 files changed, 33 insertions(+), 54 deletions(-) diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index c746d2e1f7e..4e536c37b0b 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -103,7 +103,7 @@ def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>; def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>; //===----------------------------------------------------------------------===// -// PatLeafs for unsigned comparisons +// PatLeafs for unsigned / unordered comparisons //===----------------------------------------------------------------------===// def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>; diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 03068462290..197e368ab38 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -63,16 +63,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : // Condition Codes setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); - setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); - setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); - setCondCodeAction(ISD::SETULE, MVT::f32, Expand); - setCondCodeAction(ISD::SETULT, MVT::f32, Expand); - setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); - setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); - setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); - setCondCodeAction(ISD::SETULE, MVT::f64, Expand); - setCondCodeAction(ISD::SETULT, MVT::f64, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand); diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 83861799cc1..c12ffe78bbc 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -510,12 +510,12 @@ defm V_CMP_LG_F32 : VOPC_F32 , "v_cmp_lg_f32", COND_ONE>; defm V_CMP_GE_F32 : VOPC_F32 , "v_cmp_ge_f32", COND_OGE>; defm V_CMP_O_F32 : VOPC_F32 , "v_cmp_o_f32", COND_O>; defm V_CMP_U_F32 : VOPC_F32 , "v_cmp_u_f32", COND_UO>; -defm V_CMP_NGE_F32 : VOPC_F32 , "v_cmp_nge_f32">; +defm V_CMP_NGE_F32 : VOPC_F32 , "v_cmp_nge_f32", COND_ULT>; defm V_CMP_NLG_F32 : VOPC_F32 , "v_cmp_nlg_f32">; -defm V_CMP_NGT_F32 : VOPC_F32 , "v_cmp_ngt_f32">; -defm V_CMP_NLE_F32 : VOPC_F32 , "v_cmp_nle_f32">; +defm V_CMP_NGT_F32 : VOPC_F32 , "v_cmp_ngt_f32", COND_ULE>; +defm V_CMP_NLE_F32 : VOPC_F32 , "v_cmp_nle_f32", COND_UGT>; defm V_CMP_NEQ_F32 : VOPC_F32 , "v_cmp_neq_f32", COND_UNE>; -defm V_CMP_NLT_F32 : VOPC_F32 , "v_cmp_nlt_f32">; +defm V_CMP_NLT_F32 : VOPC_F32 , "v_cmp_nlt_f32", COND_UGE>; defm V_CMP_TRU_F32 : VOPC_F32 , "v_cmp_tru_f32">; let hasSideEffects = 1 in { @@ -548,12 +548,12 @@ defm V_CMP_LG_F64 : VOPC_F64 , "v_cmp_lg_f64", COND_ONE>; defm V_CMP_GE_F64 : VOPC_F64 , "v_cmp_ge_f64", COND_OGE>; defm V_CMP_O_F64 : VOPC_F64 , "v_cmp_o_f64", COND_O>; defm V_CMP_U_F64 : VOPC_F64 , "v_cmp_u_f64", COND_UO>; -defm V_CMP_NGE_F64 : VOPC_F64 , "v_cmp_nge_f64">; +defm V_CMP_NGE_F64 : VOPC_F64 , "v_cmp_nge_f64", COND_ULT>; defm V_CMP_NLG_F64 : VOPC_F64 , "v_cmp_nlg_f64">; -defm V_CMP_NGT_F64 : VOPC_F64 , "v_cmp_ngt_f64">; -defm V_CMP_NLE_F64 : VOPC_F64 , "v_cmp_nle_f64">; +defm V_CMP_NGT_F64 : VOPC_F64 , "v_cmp_ngt_f64", COND_ULE>; +defm V_CMP_NLE_F64 : VOPC_F64 , "v_cmp_nle_f64", COND_UGT>; defm V_CMP_NEQ_F64 : VOPC_F64 , "v_cmp_neq_f64", COND_UNE>; -defm V_CMP_NLT_F64 : VOPC_F64 , "v_cmp_nlt_f64">; +defm V_CMP_NLT_F64 : VOPC_F64 , "v_cmp_nlt_f64", COND_UGE>; defm V_CMP_TRU_F64 : VOPC_F64 , "v_cmp_tru_f64">; let hasSideEffects = 1 in { diff --git a/test/CodeGen/R600/fcmp64.ll b/test/CodeGen/R600/fcmp64.ll index dc24443504d..73cee669c43 100644 --- a/test/CodeGen/R600/fcmp64.ll +++ b/test/CodeGen/R600/fcmp64.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=r600 -mcpu=tahiti -verify-machineinstrs | FileCheck %s ; CHECK-LABEL: {{^}}flt_f64: -; CHECK: v_cmp_lt_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} +; CHECK: v_cmp_nge_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} define void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 @@ -13,7 +13,7 @@ define void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, } ; CHECK-LABEL: {{^}}fle_f64: -; CHECK: v_cmp_le_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} +; CHECK: v_cmp_ngt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} define void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 @@ -25,7 +25,7 @@ define void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, } ; CHECK-LABEL: {{^}}fgt_f64: -; CHECK: v_cmp_gt_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} +; CHECK: v_cmp_nle_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} define void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 @@ -37,7 +37,7 @@ define void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, } ; CHECK-LABEL: {{^}}fge_f64: -; CHECK: v_cmp_ge_f64_e64 {{s[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} +; CHECK: v_cmp_nlt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}} define void @fge_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) { %r0 = load double addrspace(1)* %in1 diff --git a/test/CodeGen/R600/setcc.ll b/test/CodeGen/R600/setcc.ll index d2842d440cb..2b19fcf7868 100644 --- a/test/CodeGen/R600/setcc.ll +++ b/test/CodeGen/R600/setcc.ll @@ -145,10 +145,8 @@ entry: ; FUNC-LABEL: {{^}}f32_ugt: ; R600: SETGE ; R600: SETE_DX10 -; SI: v_cmp_u_f32 -; SI: v_cmp_gt_f32 -; SI: s_or_b64 -; SI: v_cndmask_b32 +; SI: v_cmp_nle_f32_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ugt float %a, %b @@ -160,10 +158,9 @@ entry: ; FUNC-LABEL: {{^}}f32_uge: ; R600: SETGT ; R600: SETE_DX10 -; SI: v_cmp_u_f32 -; SI: v_cmp_ge_f32 -; SI: s_or_b64 -; SI: v_cndmask_b32 + +; SI: v_cmp_nlt_f32_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp uge float %a, %b @@ -175,10 +172,9 @@ entry: ; FUNC-LABEL: {{^}}f32_ult: ; R600: SETGE ; R600: SETE_DX10 -; SI: v_cmp_u_f32 -; SI: v_cmp_lt_f32 -; SI: s_or_b64 -; SI: v_cndmask_b32 + +; SI: v_cmp_nge_f32_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ult float %a, %b @@ -190,10 +186,9 @@ entry: ; FUNC-LABEL: {{^}}f32_ule: ; R600: SETGT ; R600: SETE_DX10 -; SI: v_cmp_u_f32 -; SI: v_cmp_le_f32 -; SI: s_or_b64 -; SI: v_cndmask_b32 + +; SI: v_cmp_ngt_f32_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ule float %a, %b diff --git a/test/CodeGen/R600/setcc64.ll b/test/CodeGen/R600/setcc64.ll index 6a1c5ef1af0..d9e982e09b2 100644 --- a/test/CodeGen/R600/setcc64.ll +++ b/test/CodeGen/R600/setcc64.ll @@ -91,10 +91,9 @@ entry: } ; FUNC-LABEL: {{^}}f64_ugt: -; SI: v_cmp_u_f64 -; SI: v_cmp_gt_f64 -; SI: s_or_b64 -; SI: v_cndmask_b32 + +; SI: v_cmp_nle_f64_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ugt double %a, %b @@ -104,10 +103,8 @@ entry: } ; FUNC-LABEL: {{^}}f64_uge: -; SI: v_cmp_u_f64 -; SI: v_cmp_ge_f64 -; SI: s_or_b64 -; SI: v_cndmask_b32 +; SI: v_cmp_nlt_f64_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp uge double %a, %b @@ -117,10 +114,8 @@ entry: } ; FUNC-LABEL: {{^}}f64_ult: -; SI: v_cmp_u_f64 -; SI: v_cmp_lt_f64 -; SI: s_or_b64 -; SI: v_cndmask_b32 +; SI: v_cmp_nge_f64_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ult double %a, %b @@ -130,10 +125,8 @@ entry: } ; FUNC-LABEL: {{^}}f64_ule: -; SI: v_cmp_u_f64 -; SI: v_cmp_le_f64 -; SI: s_or_b64 -; SI: v_cndmask_b32 +; SI: v_cmp_ngt_f64_e32 vcc +; SI-NEXT: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1, vcc define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ule double %a, %b