From 193c3e91b9e5c0d2fe68b29d4d4eb01f14ce36ee Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 31 Mar 2014 19:35:33 +0000 Subject: [PATCH] R600: Compute masked bits for min and max git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205242 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/AMDGPUISelLowering.cpp | 44 ++++++++++++++++++++++++++ test/CodeGen/R600/llvm.AMDGPU.umax.ll | 15 +++++++++ test/CodeGen/R600/llvm.AMDGPU.umin.ll | 15 +++++++++ 3 files changed, 74 insertions(+) diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 54ef2c403f2..183725cc217 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -1219,11 +1219,55 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { } } +static void computeMaskedBitsForMinMax(const SDValue Op0, + const SDValue Op1, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) { + APInt Op0Zero, Op0One; + APInt Op1Zero, Op1One; + DAG.ComputeMaskedBits(Op0, Op0Zero, Op0One, Depth); + DAG.ComputeMaskedBits(Op1, Op1Zero, Op1One, Depth); + + KnownZero = Op0Zero & Op1Zero; + KnownOne = Op0One & Op1One; +} + void AMDGPUTargetLowering::computeMaskedBitsForTargetNode( const SDValue Op, APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything. + unsigned Opc = Op.getOpcode(); + switch (Opc) { + case ISD::INTRINSIC_WO_CHAIN: { + // FIXME: The intrinsic should just use the node. + switch (cast(Op.getOperand(0))->getZExtValue()) { + case AMDGPUIntrinsic::AMDGPU_imax: + case AMDGPUIntrinsic::AMDGPU_umax: + case AMDGPUIntrinsic::AMDGPU_imin: + case AMDGPUIntrinsic::AMDGPU_umin: + computeMaskedBitsForMinMax(Op.getOperand(1), Op.getOperand(2), + KnownZero, KnownOne, DAG, Depth); + break; + default: + break; + } + + break; + } + case AMDGPUISD::SMAX: + case AMDGPUISD::UMAX: + case AMDGPUISD::SMIN: + case AMDGPUISD::UMIN: + computeMaskedBitsForMinMax(Op.getOperand(0), Op.getOperand(1), + KnownZero, KnownOne, DAG, Depth); + break; + default: + break; + } } diff --git a/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/test/CodeGen/R600/llvm.AMDGPU.umax.ll index c3e1cfe9019..1b8da2e1553 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umax.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.umax.ll @@ -21,6 +21,21 @@ entry: ret void } +; SI-LABEL: @trunc_zext_umax +; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]], +; SI: V_MAX_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]] +; SI-NOT: AND +; SI: BUFFER_STORE_SHORT [[RESULT]], +define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind { + %tmp5 = load i8 addrspace(1)* %src, align 1 + %tmp2 = zext i8 %tmp5 to i32 + %tmp3 = tail call i32 @llvm.AMDGPU.umax(i32 %tmp2, i32 0) nounwind readnone + %tmp4 = trunc i32 %tmp3 to i8 + %tmp6 = zext i8 %tmp4 to i16 + store i16 %tmp6, i16 addrspace(1)* %out, align 2 + ret void +} + ; Function Attrs: readnone declare i32 @llvm.AMDGPU.umax(i32, i32) #1 diff --git a/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/test/CodeGen/R600/llvm.AMDGPU.umin.ll index 460a7b2d425..08397f8356c 100644 --- a/test/CodeGen/R600/llvm.AMDGPU.umin.ll +++ b/test/CodeGen/R600/llvm.AMDGPU.umin.ll @@ -21,6 +21,21 @@ entry: ret void } +; SI-LABEL: @trunc_zext_umin +; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]], +; SI: V_MIN_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]] +; SI-NOT: AND +; SI: BUFFER_STORE_SHORT [[RESULT]], +define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind { + %tmp5 = load i8 addrspace(1)* %src, align 1 + %tmp2 = zext i8 %tmp5 to i32 + %tmp3 = tail call i32 @llvm.AMDGPU.umin(i32 %tmp2, i32 0) nounwind readnone + %tmp4 = trunc i32 %tmp3 to i8 + %tmp6 = zext i8 %tmp4 to i16 + store i16 %tmp6, i16 addrspace(1)* %out, align 2 + ret void +} + ; Function Attrs: readnone declare i32 @llvm.AMDGPU.umin(i32, i32) #1