R600: Compute masked bits for min and max

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@205242 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2014-03-31 19:35:33 +00:00
parent c298307ae6
commit 193c3e91b9
3 changed files with 74 additions and 0 deletions

View File

@ -1219,11 +1219,55 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
static void computeMaskedBitsForMinMax(const SDValue Op0,
const SDValue Op1,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) {
APInt Op0Zero, Op0One;
APInt Op1Zero, Op1One;
DAG.ComputeMaskedBits(Op0, Op0Zero, Op0One, Depth);
DAG.ComputeMaskedBits(Op1, Op1Zero, Op1One, Depth);
KnownZero = Op0Zero & Op1Zero;
KnownOne = Op0One & Op1One;
}
void AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
unsigned Opc = Op.getOpcode();
switch (Opc) {
case ISD::INTRINSIC_WO_CHAIN: {
// FIXME: The intrinsic should just use the node.
switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
case AMDGPUIntrinsic::AMDGPU_imax:
case AMDGPUIntrinsic::AMDGPU_umax:
case AMDGPUIntrinsic::AMDGPU_imin:
case AMDGPUIntrinsic::AMDGPU_umin:
computeMaskedBitsForMinMax(Op.getOperand(1), Op.getOperand(2),
KnownZero, KnownOne, DAG, Depth);
break;
default:
break;
}
break;
}
case AMDGPUISD::SMAX:
case AMDGPUISD::UMAX:
case AMDGPUISD::SMIN:
case AMDGPUISD::UMIN:
computeMaskedBitsForMinMax(Op.getOperand(0), Op.getOperand(1),
KnownZero, KnownOne, DAG, Depth);
break;
default:
break;
}
}

View File

@ -21,6 +21,21 @@ entry:
ret void
}
; SI-LABEL: @trunc_zext_umax
; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
; SI: V_MAX_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
; SI-NOT: AND
; SI: BUFFER_STORE_SHORT [[RESULT]],
define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
%tmp5 = load i8 addrspace(1)* %src, align 1
%tmp2 = zext i8 %tmp5 to i32
%tmp3 = tail call i32 @llvm.AMDGPU.umax(i32 %tmp2, i32 0) nounwind readnone
%tmp4 = trunc i32 %tmp3 to i8
%tmp6 = zext i8 %tmp4 to i16
store i16 %tmp6, i16 addrspace(1)* %out, align 2
ret void
}
; Function Attrs: readnone
declare i32 @llvm.AMDGPU.umax(i32, i32) #1

View File

@ -21,6 +21,21 @@ entry:
ret void
}
; SI-LABEL: @trunc_zext_umin
; SI: BUFFER_LOAD_UBYTE [[VREG:v[0-9]+]],
; SI: V_MIN_U32_e32 [[RESULT:v[0-9]+]], 0, [[VREG]]
; SI-NOT: AND
; SI: BUFFER_STORE_SHORT [[RESULT]],
define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
%tmp5 = load i8 addrspace(1)* %src, align 1
%tmp2 = zext i8 %tmp5 to i32
%tmp3 = tail call i32 @llvm.AMDGPU.umin(i32 %tmp2, i32 0) nounwind readnone
%tmp4 = trunc i32 %tmp3 to i8
%tmp6 = zext i8 %tmp4 to i16
store i16 %tmp6, i16 addrspace(1)* %out, align 2
ret void
}
; Function Attrs: readnone
declare i32 @llvm.AMDGPU.umin(i32, i32) #1