mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-06 06:33:24 +00:00
R600/SI: Use complex operand folding for div_scale
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@229238 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
febc8e20a5
commit
fd31a769ce
@ -783,6 +783,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
|
||||
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
|
||||
}
|
||||
|
||||
// We need to handle this here because tablegen doesn't support matching
|
||||
// instructions with multiple outputs.
|
||||
SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
|
||||
SDLoc SL(N);
|
||||
EVT VT = N->getValueType(0);
|
||||
@ -792,19 +794,12 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
|
||||
unsigned Opc
|
||||
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
|
||||
|
||||
const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32);
|
||||
const SDValue False = CurDAG->getTargetConstant(0, MVT::i1);
|
||||
SDValue Ops[] = {
|
||||
Zero, // src0_modifiers
|
||||
N->getOperand(0), // src0
|
||||
Zero, // src1_modifiers
|
||||
N->getOperand(1), // src1
|
||||
Zero, // src2_modifiers
|
||||
N->getOperand(2), // src2
|
||||
False, // clamp
|
||||
Zero // omod
|
||||
};
|
||||
// src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
|
||||
SDValue Ops[8];
|
||||
|
||||
SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
|
||||
SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
|
||||
SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
|
||||
return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
declare i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
declare { float, i1 } @llvm.AMDGPU.div.scale.f32(float, float, i1) nounwind readnone
|
||||
declare { double, i1 } @llvm.AMDGPU.div.scale.f64(double, double, i1) nounwind readnone
|
||||
declare float @llvm.fabs.f32(float) nounwind readnone
|
||||
|
||||
; SI-LABEL @test_div_scale_f32_1:
|
||||
; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
|
||||
@ -285,3 +286,79 @@ define void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, double %
|
||||
store double %result0, double addrspace(1)* %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL @test_div_scale_f32_inline_imm_num:
|
||||
; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[A]], 1.0
|
||||
; SI: buffer_store_dword [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
|
||||
%a = load float addrspace(1)* %gep.0, align 4
|
||||
|
||||
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
|
||||
%result0 = extractvalue { float, i1 } %result, 0
|
||||
store float %result0, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL @test_div_scale_f32_inline_imm_den:
|
||||
; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
|
||||
; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], 2.0, 2.0, [[A]]
|
||||
; SI: buffer_store_dword [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
|
||||
%a = load float addrspace(1)* %gep.0, align 4
|
||||
|
||||
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
|
||||
%result0 = extractvalue { float, i1 } %result, 0
|
||||
store float %result0, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL @test_div_scale_f32_fabs_num:
|
||||
; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
|
||||
; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], |[[A]]|
|
||||
; SI: buffer_store_dword [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
%a = load float addrspace(1)* %gep.0, align 4
|
||||
%b = load float addrspace(1)* %gep.1, align 4
|
||||
|
||||
%a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
|
||||
|
||||
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone
|
||||
%result0 = extractvalue { float, i1 } %result, 0
|
||||
store float %result0, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL @test_div_scale_f32_fabs_den:
|
||||
; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
|
||||
; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
|
||||
; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], |[[B]]|, |[[B]]|, [[A]]
|
||||
; SI: buffer_store_dword [[RESULT0]]
|
||||
; SI: s_endpgm
|
||||
define void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
|
||||
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
|
||||
%gep.0 = getelementptr float addrspace(1)* %in, i32 %tid
|
||||
%gep.1 = getelementptr float addrspace(1)* %gep.0, i32 1
|
||||
|
||||
%a = load float addrspace(1)* %gep.0, align 4
|
||||
%b = load float addrspace(1)* %gep.1, align 4
|
||||
|
||||
%b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
|
||||
|
||||
%result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone
|
||||
%result0 = extractvalue { float, i1 } %result, 0
|
||||
store float %result0, float addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user