mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-11-04 05:17:07 +00:00 
			
		
		
		
	R600/SI: Commute instructions to enable more folding opportunities
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@225410 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		@@ -56,10 +56,16 @@ struct FoldCandidate {
 | 
				
			|||||||
  uint64_t ImmToFold;
 | 
					  uint64_t ImmToFold;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
 | 
					  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
 | 
				
			||||||
      UseMI(MI), UseOpNo(OpNo), OpToFold(FoldOp), ImmToFold(0) { }
 | 
					                UseMI(MI), UseOpNo(OpNo) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  FoldCandidate(MachineInstr *MI, unsigned OpNo, uint64_t Imm) :
 | 
					    if (FoldOp->isImm()) {
 | 
				
			||||||
      UseMI(MI), UseOpNo(OpNo), OpToFold(nullptr), ImmToFold(Imm) { }
 | 
					      OpToFold = nullptr;
 | 
				
			||||||
 | 
					      ImmToFold = FoldOp->getImm();
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      assert(FoldOp->isReg());
 | 
				
			||||||
 | 
					      OpToFold = FoldOp;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  bool isImm() const {
 | 
					  bool isImm() const {
 | 
				
			||||||
    return !OpToFold;
 | 
					    return !OpToFold;
 | 
				
			||||||
@@ -119,6 +125,35 @@ static bool updateOperand(FoldCandidate &Fold,
 | 
				
			|||||||
  return false;
 | 
					  return false;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
 | 
				
			||||||
 | 
					                             MachineInstr *MI, unsigned OpNo,
 | 
				
			||||||
 | 
					                             MachineOperand *OpToFold,
 | 
				
			||||||
 | 
					                             const SIInstrInfo *TII) {
 | 
				
			||||||
 | 
					  if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {
 | 
				
			||||||
 | 
					    // Operand is not legal, so try to commute the instruction to
 | 
				
			||||||
 | 
					    // see if this makes it possible to fold.
 | 
				
			||||||
 | 
					    unsigned CommuteIdx0;
 | 
				
			||||||
 | 
					    unsigned CommuteIdx1;
 | 
				
			||||||
 | 
					    bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (CanCommute) {
 | 
				
			||||||
 | 
					      if (CommuteIdx0 == OpNo)
 | 
				
			||||||
 | 
					        OpNo = CommuteIdx1;
 | 
				
			||||||
 | 
					      else if (CommuteIdx1 == OpNo)
 | 
				
			||||||
 | 
					        OpNo = CommuteIdx0;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!CanCommute || !TII->commuteInstruction(MI))
 | 
				
			||||||
 | 
					      return false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!TII->isOperandLegal(MI, OpNo, OpToFold))
 | 
				
			||||||
 | 
					      return false;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
 | 
				
			||||||
 | 
					  return true;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
 | 
					bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
 | 
				
			||||||
  MachineRegisterInfo &MRI = MF.getRegInfo();
 | 
					  MachineRegisterInfo &MRI = MF.getRegInfo();
 | 
				
			||||||
  const SIInstrInfo *TII =
 | 
					  const SIInstrInfo *TII =
 | 
				
			||||||
@@ -140,6 +175,11 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
 | 
				
			|||||||
      MachineOperand &OpToFold = MI.getOperand(1);
 | 
					      MachineOperand &OpToFold = MI.getOperand(1);
 | 
				
			||||||
      bool FoldingImm = OpToFold.isImm() || OpToFold.isFPImm();
 | 
					      bool FoldingImm = OpToFold.isImm() || OpToFold.isFPImm();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // FIXME: We could also be folding things like FrameIndexes and
 | 
				
			||||||
 | 
					      // TargetIndexes.
 | 
				
			||||||
 | 
					      if (!FoldingImm && !OpToFold.isReg())
 | 
				
			||||||
 | 
					        continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // Folding immediates with more than one use will increase program side.
 | 
					      // Folding immediates with more than one use will increase program side.
 | 
				
			||||||
      // FIXME: This will also reduce register usage, which may be better
 | 
					      // FIXME: This will also reduce register usage, which may be better
 | 
				
			||||||
      // in some cases.  A better heuristic is needed.
 | 
					      // in some cases.  A better heuristic is needed.
 | 
				
			||||||
@@ -210,24 +250,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
 | 
				
			|||||||
            UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
 | 
					            UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
 | 
				
			||||||
          continue;
 | 
					          continue;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
        if (FoldingImm) {
 | 
					        if (FoldingImm) {
 | 
				
			||||||
          const MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
 | 
					          MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
 | 
				
			||||||
          if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &ImmOp)) {
 | 
					          tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
 | 
				
			||||||
            FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(),
 | 
					 | 
				
			||||||
                               Imm.getSExtValue()));
 | 
					 | 
				
			||||||
          }
 | 
					 | 
				
			||||||
          continue;
 | 
					          continue;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // Normal substitution with registers
 | 
					        tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
 | 
				
			||||||
        if (TII->isOperandLegal(UseMI, Use.getOperandNo(), &OpToFold)) {
 | 
					 | 
				
			||||||
          FoldList.push_back(FoldCandidate(UseMI, Use.getOperandNo(), &OpToFold));
 | 
					 | 
				
			||||||
          continue;
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        // FIXME: We could commute the instruction to create more opportunites
 | 
					 | 
				
			||||||
        // for folding.  This will only be useful if we have 32-bit instructions.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // FIXME: We could try to change the instruction from 64-bit to 32-bit
 | 
					        // FIXME: We could try to change the instruction from 64-bit to 32-bit
 | 
				
			||||||
        // to enable more folding opportunites.  The shrink operands pass
 | 
					        // to enable more folding opportunites.  The shrink operands pass
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -709,6 +709,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
 | 
					MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
 | 
				
			||||||
                                              bool NewMI) const {
 | 
					                                              bool NewMI) const {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (MI->getNumOperands() < 3)
 | 
					  if (MI->getNumOperands() < 3)
 | 
				
			||||||
    return nullptr;
 | 
					    return nullptr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -730,8 +731,9 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
 | 
				
			|||||||
  // Make sure it's legal to commute operands for VOP2.
 | 
					  // Make sure it's legal to commute operands for VOP2.
 | 
				
			||||||
  if (isVOP2(MI->getOpcode()) &&
 | 
					  if (isVOP2(MI->getOpcode()) &&
 | 
				
			||||||
      (!isOperandLegal(MI, Src0Idx, &Src1) ||
 | 
					      (!isOperandLegal(MI, Src0Idx, &Src1) ||
 | 
				
			||||||
       !isOperandLegal(MI, Src1Idx, &Src0)))
 | 
					       !isOperandLegal(MI, Src1Idx, &Src0))) {
 | 
				
			||||||
    return nullptr;
 | 
					    return nullptr;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (!Src1.isReg()) {
 | 
					  if (!Src1.isReg()) {
 | 
				
			||||||
    // Allow commuting instructions with Imm or FPImm operands.
 | 
					    // Allow commuting instructions with Imm or FPImm operands.
 | 
				
			||||||
@@ -1471,6 +1473,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
 | 
				
			|||||||
    //
 | 
					    //
 | 
				
			||||||
    // s_sendmsg 0, s0 ; Operand defined as m0reg
 | 
					    // s_sendmsg 0, s0 ; Operand defined as m0reg
 | 
				
			||||||
    //                 ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
 | 
					    //                 ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
 | 
					    return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1,7 +1,7 @@
 | 
				
			|||||||
;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 | 
					;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
 | 
					;CHECK: v_mov_b32_e32 v{{[0-9]+}}, 0xaaaaaaab
 | 
				
			||||||
;CHECK: v_mul_hi_u32 v0, {{[sv][0-9]+}}, {{v[0-9]+}}
 | 
					;CHECK: v_mul_hi_u32 v0, {{v[0-9]+}}, {{s[0-9]+}}
 | 
				
			||||||
;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
 | 
					;CHECK-NEXT: v_lshrrev_b32_e32 v0, 1, v0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define void @test(i32 %p) {
 | 
					define void @test(i32 %p) {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -35,7 +35,7 @@ define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 | 
				
			|||||||
; FUNC-LABEL: {{^}}slow_sdiv_i32_3435:
 | 
					; FUNC-LABEL: {{^}}slow_sdiv_i32_3435:
 | 
				
			||||||
; SI: buffer_load_dword [[VAL:v[0-9]+]],
 | 
					; SI: buffer_load_dword [[VAL:v[0-9]+]],
 | 
				
			||||||
; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
 | 
					; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b
 | 
				
			||||||
; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]]
 | 
					; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[MAGIC]], [[VAL]]
 | 
				
			||||||
; SI: v_add_i32
 | 
					; SI: v_add_i32
 | 
				
			||||||
; SI: v_lshrrev_b32
 | 
					; SI: v_lshrrev_b32
 | 
				
			||||||
; SI: v_ashrrev_i32
 | 
					; SI: v_ashrrev_i32
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -41,7 +41,7 @@ define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, floa
 | 
				
			|||||||
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 | 
					; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 | 
				
			||||||
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
 | 
					; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
 | 
				
			||||||
; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
 | 
					; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
 | 
				
			||||||
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
 | 
					; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
 | 
				
			||||||
; SI: buffer_store_dword [[RESULT]]
 | 
					; SI: buffer_store_dword [[RESULT]]
 | 
				
			||||||
define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
 | 
					define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, float %a, float %b) #0 {
 | 
				
			||||||
  %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
 | 
					  %fma = call float @llvm.fma.f32(float %a, float %b, float %a) #1
 | 
				
			||||||
@@ -53,7 +53,7 @@ define void @test_sgpr_use_twice_ternary_op_a_b_a(float addrspace(1)* %out, floa
 | 
				
			|||||||
; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 | 
					; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 | 
				
			||||||
; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
 | 
					; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
 | 
				
			||||||
; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
 | 
					; SI: v_mov_b32_e32 [[VGPR1:v[0-9]+]], [[SGPR1]]
 | 
				
			||||||
; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[VGPR1]], [[SGPR0]], [[SGPR0]]
 | 
					; SI: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR0]], [[VGPR1]], [[SGPR0]]
 | 
				
			||||||
; SI: buffer_store_dword [[RESULT]]
 | 
					; SI: buffer_store_dword [[RESULT]]
 | 
				
			||||||
define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
 | 
					define void @test_sgpr_use_twice_ternary_op_b_a_a(float addrspace(1)* %out, float %a, float %b) #0 {
 | 
				
			||||||
  %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
 | 
					  %fma = call float @llvm.fma.f32(float %b, float %a, float %a) #1
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user