mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-06 21:05:51 +00:00
R600/SI: Pattern match isinf to v_cmp_class instructions
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225307 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a5b2b64292
commit
42d9f7cf0a
@ -1400,6 +1400,37 @@ SDValue SITargetLowering::performMin3Max3Combine(SDNode *N,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::performSetCCCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
SDLoc SL(N);
|
||||
|
||||
SDValue LHS = N->getOperand(0);
|
||||
SDValue RHS = N->getOperand(1);
|
||||
EVT VT = LHS.getValueType();
|
||||
|
||||
if (VT != MVT::f32 && VT != MVT::f64)
|
||||
return SDValue();
|
||||
|
||||
// Match isinf pattern
|
||||
// (fcmp oeq (fabs x), inf) -> (fp_class x, (p_infinity | n_infinity))
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
|
||||
if (CC == ISD::SETOEQ && LHS.getOpcode() == ISD::FABS) {
|
||||
const ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
|
||||
if (!CRHS)
|
||||
return SDValue();
|
||||
|
||||
const APFloat &APF = CRHS->getValueAPF();
|
||||
if (APF.isInfinity() && !APF.isNegative()) {
|
||||
unsigned Mask = SIInstrFlags::P_INFINITY | SIInstrFlags::N_INFINITY;
|
||||
return DAG.getNode(AMDGPUISD::FP_CLASS, SL, MVT::i1,
|
||||
LHS.getOperand(0), DAG.getConstant(Mask, MVT::i32));
|
||||
}
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
SelectionDAG &DAG = DCI.DAG;
|
||||
@ -1408,6 +1439,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
|
||||
switch (N->getOpcode()) {
|
||||
default:
|
||||
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
|
||||
case ISD::SETCC:
|
||||
return performSetCCCombine(N, DCI);
|
||||
case ISD::FMAXNUM: // TODO: What about fmax_legacy?
|
||||
case ISD::FMINNUM:
|
||||
case AMDGPUISD::SMAX:
|
||||
|
@ -62,6 +62,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
|
||||
SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
||||
SDValue performMin3Max3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
||||
|
||||
public:
|
||||
SITargetLowering(TargetMachine &tm);
|
||||
|
45
test/CodeGen/R600/fp-classify.ll
Normal file
45
test/CodeGen/R600/fp-classify.ll
Normal file
@ -0,0 +1,45 @@
|
||||
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
declare i1 @llvm.AMDGPU.class.f32(float, i32) #1
|
||||
declare i1 @llvm.AMDGPU.class.f64(double, i32) #1
|
||||
declare i32 @llvm.r600.read.tidig.x() #1
|
||||
declare float @llvm.fabs.f32(float) #1
|
||||
declare double @llvm.fabs.f64(double) #1
|
||||
|
||||
; SI-LABEL: {{^}}test_isinf_pattern:
|
||||
; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x204{{$}}
|
||||
; SI: v_cmp_class_f32_e32 vcc, s{{[0-9]+}}, [[MASK]]
|
||||
; SI-NOT: v_cmp
|
||||
; SI: s_endpgm
|
||||
define void @test_isinf_pattern(i32 addrspace(1)* nocapture %out, float %x) #0 {
|
||||
%fabs = tail call float @llvm.fabs.f32(float %x) #1
|
||||
%cmp = fcmp oeq float %fabs, 0x7FF0000000000000
|
||||
%ext = zext i1 %cmp to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_not_isinf_pattern_0:
|
||||
; SI-NOT: v_cmp_class
|
||||
; SI: s_endpgm
|
||||
define void @test_not_isinf_pattern_0(i32 addrspace(1)* nocapture %out, float %x) #0 {
|
||||
%fabs = tail call float @llvm.fabs.f32(float %x) #1
|
||||
%cmp = fcmp ueq float %fabs, 0x7FF0000000000000
|
||||
%ext = zext i1 %cmp to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}test_not_isinf_pattern_1:
|
||||
; SI-NOT: v_cmp_class
|
||||
; SI: s_endpgm
|
||||
define void @test_not_isinf_pattern_1(i32 addrspace(1)* nocapture %out, float %x) #0 {
|
||||
%fabs = tail call float @llvm.fabs.f32(float %x) #1
|
||||
%cmp = fcmp oeq float %fabs, 0xFFF0000000000000
|
||||
%ext = zext i1 %cmp to i32
|
||||
store i32 %ext, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind readnone }
|
Loading…
Reference in New Issue
Block a user