mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 04:30:23 +00:00
R600/SI Allow same SGPR to be used for multiple operands
Instead of moving the first SGPR that is different than the first, legalize the operand that requires the fewest moves if one SGPR is used for multiple operands. This saves extra moves and is also required for some instructions which require that the same operand be used for multiple operands. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218532 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
aed12d4bad
commit
d991d2217b
@ -1390,10 +1390,12 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
|
|||||||
// Legalize VOP3
|
// Legalize VOP3
|
||||||
if (isVOP3(MI->getOpcode())) {
|
if (isVOP3(MI->getOpcode())) {
|
||||||
const MCInstrDesc &Desc = get(MI->getOpcode());
|
const MCInstrDesc &Desc = get(MI->getOpcode());
|
||||||
unsigned SGPRReg = AMDGPU::NoRegister;
|
|
||||||
|
|
||||||
int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
|
int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
|
||||||
|
|
||||||
|
// Find the one SGPR operand we are allowed to use.
|
||||||
|
unsigned SGPRReg = AMDGPU::NoRegister;
|
||||||
|
|
||||||
for (const MachineOperand &MO : MI->implicit_operands()) {
|
for (const MachineOperand &MO : MI->implicit_operands()) {
|
||||||
// We only care about reads.
|
// We only care about reads.
|
||||||
if (MO.isDef())
|
if (MO.isDef())
|
||||||
@ -1410,8 +1412,9 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (SGPRReg == AMDGPU::NoRegister) {
|
if (SGPRReg == AMDGPU::NoRegister) {
|
||||||
|
unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
|
||||||
|
|
||||||
// First we need to consider the instruction's operand requirements before
|
// First we need to consider the instruction's operand requirements before
|
||||||
// legalizing. Some operands are required to be SGPRs, but we are still
|
// legalizing. Some operands are required to be SGPRs, but we are still
|
||||||
// bound by the constant bus requirement to only use one.
|
// bound by the constant bus requirement to only use one.
|
||||||
@ -1422,9 +1425,33 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
|
|||||||
if (Idx == -1)
|
if (Idx == -1)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass)) {
|
const MachineOperand &MO = MI->getOperand(Idx);
|
||||||
SGPRReg = MI->getOperand(Idx).getReg();
|
if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass))
|
||||||
break;
|
SGPRReg = MO.getReg();
|
||||||
|
|
||||||
|
if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
|
||||||
|
UsedSGPRs[i] = MO.getReg();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (SGPRReg == AMDGPU::NoRegister) {
|
||||||
|
// We don't have a required SGPR operand, so we have a bit more freedom in
|
||||||
|
// selecting operands to move.
|
||||||
|
|
||||||
|
// Try to select the most used SGPR. If an SGPR is equal to one of the
|
||||||
|
// others, we choose that.
|
||||||
|
//
|
||||||
|
// e.g.
|
||||||
|
// V_FMA_F32 v0, s0, s0, s0 -> No moves
|
||||||
|
// V_FMA_F32 v0, s0, s1, s0 -> Move s1
|
||||||
|
|
||||||
|
if (UsedSGPRs[0] != AMDGPU::NoRegister) {
|
||||||
|
if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
|
||||||
|
SGPRReg = UsedSGPRs[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (SGPRReg == AMDGPU::NoRegister && UsedSGPRs[1] != AMDGPU::NoRegister) {
|
||||||
|
if (UsedSGPRs[1] == UsedSGPRs[2])
|
||||||
|
SGPRReg = UsedSGPRs[1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
96
test/CodeGen/R600/use-sgpr-multiple-times.ll
Normal file
96
test/CodeGen/R600/use-sgpr-multiple-times.ll
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
||||||
|
|
||||||
|
declare float @llvm.fma.f32(float, float, float) #1
|
||||||
|
declare float @llvm.fmuladd.f32(float, float, float) #1
|
||||||
|
declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1
|
||||||
|
|
||||||
|
|
||||||
|
; SI-LABEL: @test_sgpr_use_twice_binop:
|
||||||
|
; SI: S_LOAD_DWORD [[SGPR:s[0-9]+]],
|
||||||
|
; SI: V_ADD_F32_e64 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]]
|
||||||
|
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||||
|
define void @test_sgpr_use_twice_binop(float addrspace(1)* %out, float %a) #0 {
|
||||||
|
%dbl = fadd float %a, %a
|
||||||
|
store float %dbl, float addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; SI-LABEL: @test_sgpr_use_three_ternary_op:
|
||||||
|
; SI: S_LOAD_DWORD [[SGPR:s[0-9]+]],
|
||||||
|
; SI: V_FMA_F32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[SGPR]]
|
||||||
|
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||||
|
define void @test_sgpr_use_three_ternary_op(float addrspace(1)* %out, float %a) #0 {
|
||||||
|
%fma = call float @llvm.fma.f32(float %a, float %a, float %a) #1
|
||||||
|
store float %fma, float addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; SI-LABEL: @test_sgpr_use_twice_ternary_op_a_a_b:
|
||||||
|
; SI: S_LOAD_DWORD [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||||
|
; SI: S_LOAD_DWORD [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||||
|
; SI: V_MOV_B32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
||||||
|
; SI: V_FMA_F32 [[RESULT:v[0-9]+]], [[SGPR0]], [[SGPR0]], [[VGPR1]]
|
||||||
|
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||||
|
define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, float %a, float %b) #0 {
|
||||||
|
%fma = call float @llvm.fma.f32(float %a, float %a, float %b) #1
|
||||||
|
store float %fma, float addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; SI-LABEL: @test_sgpr_use_twice_ternary_op_a_b_a:
|
||||||
|
; SI: S_LOAD_DWORD [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||||
|
; SI: S_LOAD_DWORD [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||||
|
; SI: V_MOV_B32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
||||||
|
; SI: V_FMA_F32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
|
||||||
|
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||||
|
define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
|
||||||
|
%fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
|
||||||
|
store float %fma, float addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; SI-LABEL: @test_sgpr_use_twice_ternary_op_b_a_a:
|
||||||
|
; SI: S_LOAD_DWORD [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
||||||
|
; SI: S_LOAD_DWORD [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
||||||
|
; SI: V_MOV_B32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
|
||||||
|
; SI: V_FMA_F32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
|
||||||
|
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||||
|
define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
|
||||||
|
%fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
|
||||||
|
store float %fma, float addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; SI-LABEL: @test_sgpr_use_twice_ternary_op_a_a_imm:
|
||||||
|
; SI: S_LOAD_DWORD [[SGPR:s[0-9]+]]
|
||||||
|
; SI: V_FMA_F32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], 2.0
|
||||||
|
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||||
|
define void @test_sgpr_use_twice_ternary_op_a_a_imm(float addrspace(1)* %out, float %a) #0 {
|
||||||
|
%fma = call float @llvm.fma.f32(float %a, float %a, float 2.0) #1
|
||||||
|
store float %fma, float addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; SI-LABEL: @test_sgpr_use_twice_ternary_op_a_imm_a:
|
||||||
|
; SI: S_LOAD_DWORD [[SGPR:s[0-9]+]]
|
||||||
|
; SI: V_FMA_F32 [[RESULT:v[0-9]+]], [[SGPR]], 2.0, [[SGPR]]
|
||||||
|
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||||
|
define void @test_sgpr_use_twice_ternary_op_a_imm_a(float addrspace(1)* %out, float %a) #0 {
|
||||||
|
%fma = call float @llvm.fma.f32(float %a, float 2.0, float %a) #1
|
||||||
|
store float %fma, float addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Don't use fma since fma c, x, y is canonicalized to fma x, c, y
|
||||||
|
; SI-LABEL: @test_sgpr_use_twice_ternary_op_imm_a_a:
|
||||||
|
; SI: S_LOAD_DWORD [[SGPR:s[0-9]+]]
|
||||||
|
; SI: V_MAD_I32_I24 [[RESULT:v[0-9]+]], 2, [[SGPR]], [[SGPR]]
|
||||||
|
; SI: BUFFER_STORE_DWORD [[RESULT]]
|
||||||
|
define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32 %a) #0 {
|
||||||
|
%fma = call i32 @llvm.AMDGPU.imad24(i32 2, i32 %a, i32 %a) #1
|
||||||
|
store i32 %fma, i32 addrspace(1)* %out, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { nounwind }
|
||||||
|
attributes #1 = { nounwind readnone }
|
Loading…
Reference in New Issue
Block a user