R600/SI: Match integer min / max instructions

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222015 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2014-11-14 18:30:06 +00:00
parent 8fd3b90c3f
commit 01213b1132
4 changed files with 284 additions and 29 deletions

View File

@ -1000,19 +1000,14 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
}
/// \brief Generate Min/Max node
SDValue AMDGPUTargetLowering::CombineMinMax(SDLoc DL,
EVT VT,
SDValue LHS,
SDValue RHS,
SDValue True,
SDValue False,
SDValue CC,
SelectionDAG &DAG) const {
if (VT != MVT::f32 &&
(VT != MVT::f64 ||
Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS))
return SDValue();
SDValue AMDGPUTargetLowering::CombineFMinMax(SDLoc DL,
EVT VT,
SDValue LHS,
SDValue RHS,
SDValue True,
SDValue False,
SDValue CC,
SelectionDAG &DAG) const {
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
return SDValue();
@ -1057,6 +1052,45 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDLoc DL,
return SDValue();
}
/// \brief Generate Min/Max node
SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
EVT VT,
SDValue LHS,
SDValue RHS,
SDValue True,
SDValue False,
SDValue CC,
SelectionDAG &DAG) const {
if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
return SDValue();
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
switch (CCOpcode) {
case ISD::SETULE:
case ISD::SETULT: {
unsigned Opc = (LHS == True) ? AMDGPUISD::UMIN : AMDGPUISD::UMAX;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
case ISD::SETLE:
case ISD::SETLT: {
unsigned Opc = (LHS == True) ? AMDGPUISD::SMIN : AMDGPUISD::SMAX;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
case ISD::SETGT:
case ISD::SETGE: {
unsigned Opc = (LHS == True) ? AMDGPUISD::SMAX : AMDGPUISD::SMIN;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
case ISD::SETUGE:
case ISD::SETUGT: {
unsigned Opc = (LHS == True) ? AMDGPUISD::UMAX : AMDGPUISD::UMIN;
return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
default:
return SDValue();
}
}
SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
SelectionDAG &DAG) const {
LoadSDNode *Load = cast<LoadSDNode>(Op);
@ -2117,20 +2151,25 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue True = N->getOperand(2);
SDValue False = N->getOperand(3);
SDValue CC = N->getOperand(4);
if (VT == MVT::f32 ||
(VT == MVT::f64 &&
Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue True = N->getOperand(2);
SDValue False = N->getOperand(3);
SDValue CC = N->getOperand(4);
return CombineMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
}
break;
}
case ISD::SELECT: {
SDValue Cond = N->getOperand(0);
if (Cond.getOpcode() == ISD::SETCC) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SDValue LHS = Cond.getOperand(0);
SDValue RHS = Cond.getOperand(1);
SDValue CC = Cond.getOperand(2);
@ -2138,8 +2177,17 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
SDValue True = N->getOperand(1);
SDValue False = N->getOperand(2);
if (VT == MVT::f32 ||
(VT == MVT::f64 &&
Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) {
return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
}
return CombineMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
// TODO: Implement min / max Evergreen instructions.
if (VT == MVT::i32 &&
Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
}
}
break;

View File

@ -140,14 +140,23 @@ public:
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
SDValue CombineMinMax(SDLoc DL,
EVT VT,
SDValue LHS,
SDValue RHS,
SDValue True,
SDValue False,
SDValue CC,
SelectionDAG &DAG) const;
SDValue CombineFMinMax(SDLoc DL,
EVT VT,
SDValue LHS,
SDValue RHS,
SDValue True,
SDValue False,
SDValue CC,
SelectionDAG &DAG) const;
SDValue CombineIMinMax(SDLoc DL,
EVT VT,
SDValue LHS,
SDValue RHS,
SDValue True,
SDValue False,
SDValue CC,
SelectionDAG &DAG) const;
const char* getTargetNodeName(unsigned Opcode) const override;
virtual SDNode *PostISelFolding(MachineSDNode *N,

99
test/CodeGen/R600/max.ll Normal file
View File

@ -0,0 +1,99 @@
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; FUNC-LABEL: @v_test_imax_sge_i32
; SI: v_max_i32_e32
define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
%a = load i32 addrspace(1)* %gep0, align 4
%b = load i32 addrspace(1)* %gep1, align 4
%cmp = icmp sge i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
ret void
}
; FUNC-LABEL: @s_test_imax_sge_i32
; SI: s_max_i32
define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp sge i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: @v_test_imax_sgt_i32
; SI: v_max_i32_e32
define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
%a = load i32 addrspace(1)* %gep0, align 4
%b = load i32 addrspace(1)* %gep1, align 4
%cmp = icmp sgt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
ret void
}
; FUNC-LABEL: @s_test_imax_sgt_i32
; SI: s_max_i32
define void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp sgt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: @v_test_umax_uge_i32
; SI: v_max_u32_e32
define void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
%a = load i32 addrspace(1)* %gep0, align 4
%b = load i32 addrspace(1)* %gep1, align 4
%cmp = icmp uge i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
ret void
}
; FUNC-LABEL: @s_test_umax_uge_i32
; SI: s_max_u32
define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp uge i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: @v_test_umax_ugt_i32
; SI: v_max_u32_e32
define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
%a = load i32 addrspace(1)* %gep0, align 4
%b = load i32 addrspace(1)* %gep1, align 4
%cmp = icmp ugt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
ret void
}
; FUNC-LABEL: @s_test_umax_ugt_i32
; SI: s_max_u32
define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp ugt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}

99
test/CodeGen/R600/min.ll Normal file
View File

@ -0,0 +1,99 @@
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; FUNC-LABEL: @v_test_imin_sle_i32
; SI: v_min_i32_e32
define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
%a = load i32 addrspace(1)* %gep0, align 4
%b = load i32 addrspace(1)* %gep1, align 4
%cmp = icmp sle i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
ret void
}
; FUNC-LABEL: @s_test_imin_sle_i32
; SI: s_min_i32
define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp sle i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: @v_test_imin_slt_i32
; SI: v_min_i32_e32
define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
%a = load i32 addrspace(1)* %gep0, align 4
%b = load i32 addrspace(1)* %gep1, align 4
%cmp = icmp slt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
ret void
}
; FUNC-LABEL: @s_test_imin_slt_i32
; SI: s_min_i32
define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp slt i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: @v_test_umin_ule_i32
; SI: v_min_u32_e32
define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
%a = load i32 addrspace(1)* %gep0, align 4
%b = load i32 addrspace(1)* %gep1, align 4
%cmp = icmp ule i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
ret void
}
; FUNC-LABEL: @s_test_umin_ule_i32
; SI: s_min_u32
define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp ule i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}
; FUNC-LABEL: @v_test_umin_ult_i32
; SI: v_min_u32_e32
define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
%gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
%outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
%a = load i32 addrspace(1)* %gep0, align 4
%b = load i32 addrspace(1)* %gep1, align 4
%cmp = icmp ult i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %outgep, align 4
ret void
}
; FUNC-LABEL: @s_test_umin_ult_i32
; SI: s_min_u32
define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
%cmp = icmp ult i32 %a, %b
%val = select i1 %cmp, i32 %a, i32 %b
store i32 %val, i32 addrspace(1)* %out, align 4
ret void
}