diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 6d608d130ff..f153991b842 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1000,19 +1000,14 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, } /// \brief Generate Min/Max node -SDValue AMDGPUTargetLowering::CombineMinMax(SDLoc DL, - EVT VT, - SDValue LHS, - SDValue RHS, - SDValue True, - SDValue False, - SDValue CC, - SelectionDAG &DAG) const { - if (VT != MVT::f32 && - (VT != MVT::f64 || - Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)) - return SDValue(); - +SDValue AMDGPUTargetLowering::CombineFMinMax(SDLoc DL, + EVT VT, + SDValue LHS, + SDValue RHS, + SDValue True, + SDValue False, + SDValue CC, + SelectionDAG &DAG) const { if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) return SDValue(); @@ -1057,6 +1052,45 @@ SDValue AMDGPUTargetLowering::CombineMinMax(SDLoc DL, return SDValue(); } +/// \brief Generate Min/Max node +SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL, + EVT VT, + SDValue LHS, + SDValue RHS, + SDValue True, + SDValue False, + SDValue CC, + SelectionDAG &DAG) const { + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) + return SDValue(); + + ISD::CondCode CCOpcode = cast(CC)->get(); + switch (CCOpcode) { + case ISD::SETULE: + case ISD::SETULT: { + unsigned Opc = (LHS == True) ? AMDGPUISD::UMIN : AMDGPUISD::UMAX; + return DAG.getNode(Opc, DL, VT, LHS, RHS); + } + case ISD::SETLE: + case ISD::SETLT: { + unsigned Opc = (LHS == True) ? AMDGPUISD::SMIN : AMDGPUISD::SMAX; + return DAG.getNode(Opc, DL, VT, LHS, RHS); + } + case ISD::SETGT: + case ISD::SETGE: { + unsigned Opc = (LHS == True) ? AMDGPUISD::SMAX : AMDGPUISD::SMIN; + return DAG.getNode(Opc, DL, VT, LHS, RHS); + } + case ISD::SETUGE: + case ISD::SETUGT: { + unsigned Opc = (LHS == True) ? AMDGPUISD::UMAX : AMDGPUISD::UMIN; + return DAG.getNode(Opc, DL, VT, LHS, RHS); + } + default: + return SDValue(); + } +} + SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op, SelectionDAG &DAG) const { LoadSDNode *Load = cast(Op); @@ -2117,20 +2151,25 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, SDLoc DL(N); EVT VT = N->getValueType(0); - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDValue True = N->getOperand(2); - SDValue False = N->getOperand(3); - SDValue CC = N->getOperand(4); + if (VT == MVT::f32 || + (VT == MVT::f64 && + Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue True = N->getOperand(2); + SDValue False = N->getOperand(3); + SDValue CC = N->getOperand(4); - return CombineMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); + return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); + } + + break; } case ISD::SELECT: { SDValue Cond = N->getOperand(0); if (Cond.getOpcode() == ISD::SETCC) { SDLoc DL(N); EVT VT = N->getValueType(0); - SDValue LHS = Cond.getOperand(0); SDValue RHS = Cond.getOperand(1); SDValue CC = Cond.getOperand(2); @@ -2138,8 +2177,17 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, SDValue True = N->getOperand(1); SDValue False = N->getOperand(2); + if (VT == MVT::f32 || + (VT == MVT::f64 && + Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)) { + return CombineFMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); + } - return CombineMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); + // TODO: Implement min / max Evergreen instructions. + if (VT == MVT::i32 && + Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG); + } } break; diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 793c84754a3..e848d2cfb67 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -140,14 +140,23 @@ public: SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const; - SDValue CombineMinMax(SDLoc DL, - EVT VT, - SDValue LHS, - SDValue RHS, - SDValue True, - SDValue False, - SDValue CC, - SelectionDAG &DAG) const; + SDValue CombineFMinMax(SDLoc DL, + EVT VT, + SDValue LHS, + SDValue RHS, + SDValue True, + SDValue False, + SDValue CC, + SelectionDAG &DAG) const; + SDValue CombineIMinMax(SDLoc DL, + EVT VT, + SDValue LHS, + SDValue RHS, + SDValue True, + SDValue False, + SDValue CC, + SelectionDAG &DAG) const; + const char* getTargetNodeName(unsigned Opcode) const override; virtual SDNode *PostISelFolding(MachineSDNode *N, diff --git a/test/CodeGen/R600/max.ll b/test/CodeGen/R600/max.ll new file mode 100644 index 00000000000..d67ef4772dd --- /dev/null +++ b/test/CodeGen/R600/max.ll @@ -0,0 +1,99 @@ +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.r600.read.tidig.x() nounwind readnone + +; FUNC-LABEL: @v_test_imax_sge_i32 +; SI: v_max_i32_e32 +define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load i32 addrspace(1)* %gep0, align 4 + %b = load i32 addrspace(1)* %gep1, align 4 + %cmp = icmp sge i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: @s_test_imax_sge_i32 +; SI: s_max_i32 +define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp sge i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_test_imax_sgt_i32 +; SI: v_max_i32_e32 +define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load i32 addrspace(1)* %gep0, align 4 + %b = load i32 addrspace(1)* %gep1, align 4 + %cmp = icmp sgt i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: @s_test_imax_sgt_i32 +; SI: s_max_i32 +define void @s_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp sgt i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_test_umax_uge_i32 +; SI: v_max_u32_e32 +define void @v_test_umax_uge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load i32 addrspace(1)* %gep0, align 4 + %b = load i32 addrspace(1)* %gep1, align 4 + %cmp = icmp uge i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: @s_test_umax_uge_i32 +; SI: s_max_u32 +define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp uge i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_test_umax_ugt_i32 +; SI: v_max_u32_e32 +define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load i32 addrspace(1)* %gep0, align 4 + %b = load i32 addrspace(1)* %gep1, align 4 + %cmp = icmp ugt i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: @s_test_umax_ugt_i32 +; SI: s_max_u32 +define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp ugt i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} diff --git a/test/CodeGen/R600/min.ll b/test/CodeGen/R600/min.ll new file mode 100644 index 00000000000..88c0dfffa17 --- /dev/null +++ b/test/CodeGen/R600/min.ll @@ -0,0 +1,99 @@ +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s + +declare i32 @llvm.r600.read.tidig.x() nounwind readnone + +; FUNC-LABEL: @v_test_imin_sle_i32 +; SI: v_min_i32_e32 +define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load i32 addrspace(1)* %gep0, align 4 + %b = load i32 addrspace(1)* %gep1, align 4 + %cmp = icmp sle i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: @s_test_imin_sle_i32 +; SI: s_min_i32 +define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp sle i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_test_imin_slt_i32 +; SI: v_min_i32_e32 +define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load i32 addrspace(1)* %gep0, align 4 + %b = load i32 addrspace(1)* %gep1, align 4 + %cmp = icmp slt i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: @s_test_imin_slt_i32 +; SI: s_min_i32 +define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp slt i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_test_umin_ule_i32 +; SI: v_min_u32_e32 +define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load i32 addrspace(1)* %gep0, align 4 + %b = load i32 addrspace(1)* %gep1, align 4 + %cmp = icmp ule i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: @s_test_umin_ule_i32 +; SI: s_min_u32 +define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp ule i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: @v_test_umin_ult_i32 +; SI: v_min_u32_e32 +define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid + %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid + %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid + %a = load i32 addrspace(1)* %gep0, align 4 + %b = load i32 addrspace(1)* %gep1, align 4 + %cmp = icmp ult i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %outgep, align 4 + ret void +} + +; FUNC-LABEL: @s_test_umin_ult_i32 +; SI: s_min_u32 +define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind { + %cmp = icmp ult i32 %a, %b + %val = select i1 %cmp, i32 %a, i32 %b + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +}