mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-09 11:25:55 +00:00
R600: add a pass that merges clauses.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191790 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -29,6 +29,7 @@ FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
|
|||||||
FunctionPass *createR600TextureIntrinsicsReplacer();
|
FunctionPass *createR600TextureIntrinsicsReplacer();
|
||||||
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
|
FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
|
||||||
FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
|
FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
|
||||||
|
FunctionPass *createR600ClauseMergePass(TargetMachine &tm);
|
||||||
FunctionPass *createR600Packetizer(TargetMachine &tm);
|
FunctionPass *createR600Packetizer(TargetMachine &tm);
|
||||||
FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
|
FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
|
||||||
FunctionPass *createAMDGPUCFGStructurizerPass(TargetMachine &tm);
|
FunctionPass *createAMDGPUCFGStructurizerPass(TargetMachine &tm);
|
||||||
|
@@ -168,10 +168,11 @@ bool AMDGPUPassConfig::addPostRegAlloc() {
|
|||||||
bool AMDGPUPassConfig::addPreSched2() {
|
bool AMDGPUPassConfig::addPreSched2() {
|
||||||
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
|
||||||
|
|
||||||
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
|
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
|
||||||
addPass(createR600EmitClauseMarkers(*TM));
|
addPass(createR600EmitClauseMarkers(*TM));
|
||||||
}
|
|
||||||
addPass(&IfConverterID);
|
addPass(&IfConverterID);
|
||||||
|
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
|
||||||
|
addPass(createR600ClauseMergePass(*TM));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -28,6 +28,7 @@ add_llvm_target(R600CodeGen
|
|||||||
AMDGPUConvertToISA.cpp
|
AMDGPUConvertToISA.cpp
|
||||||
AMDGPUInstrInfo.cpp
|
AMDGPUInstrInfo.cpp
|
||||||
AMDGPURegisterInfo.cpp
|
AMDGPURegisterInfo.cpp
|
||||||
|
R600ClauseMergePass.cpp
|
||||||
R600ControlFlowFinalizer.cpp
|
R600ControlFlowFinalizer.cpp
|
||||||
R600EmitClauseMarkers.cpp
|
R600EmitClauseMarkers.cpp
|
||||||
R600ExpandSpecialInstrs.cpp
|
R600ExpandSpecialInstrs.cpp
|
||||||
|
204
lib/Target/R600/R600ClauseMergePass.cpp
Normal file
204
lib/Target/R600/R600ClauseMergePass.cpp
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
//===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
|
||||||
|
//
|
||||||
|
// The LLVM Compiler Infrastructure
|
||||||
|
//
|
||||||
|
// This file is distributed under the University of Illinois Open Source
|
||||||
|
// License. See LICENSE.TXT for details.
|
||||||
|
//
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
//
|
||||||
|
/// \file
|
||||||
|
/// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
|
||||||
|
/// This pass is merging consecutive CFAlus where applicable.
|
||||||
|
/// It needs to be called after IfCvt for best results.
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#define DEBUG_TYPE "r600mergeclause"
|
||||||
|
#include "AMDGPU.h"
|
||||||
|
#include "R600Defines.h"
|
||||||
|
#include "R600InstrInfo.h"
|
||||||
|
#include "R600MachineFunctionInfo.h"
|
||||||
|
#include "R600RegisterInfo.h"
|
||||||
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||||
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
|
#include "llvm/Support/Debug.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
static bool isCFAlu(const MachineInstr *MI) {
|
||||||
|
switch (MI->getOpcode()) {
|
||||||
|
case AMDGPU::CF_ALU:
|
||||||
|
case AMDGPU::CF_ALU_PUSH_BEFORE:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class R600ClauseMergePass : public MachineFunctionPass {
|
||||||
|
|
||||||
|
private:
|
||||||
|
static char ID;
|
||||||
|
const R600InstrInfo *TII;
|
||||||
|
|
||||||
|
unsigned getCFAluSize(const MachineInstr *MI) const;
|
||||||
|
bool isCFAluEnabled(const MachineInstr *MI) const;
|
||||||
|
|
||||||
|
/// IfCvt pass can generate "disabled" ALU clause marker that need to be
|
||||||
|
/// removed and their content affected to the previous alu clause.
|
||||||
|
/// This function parse instructions after CFAlu untill it find a disabled
|
||||||
|
/// CFAlu and merge the content, or an enabled CFAlu.
|
||||||
|
void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const;
|
||||||
|
|
||||||
|
/// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
|
||||||
|
/// it is the case.
|
||||||
|
bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu)
|
||||||
|
const;
|
||||||
|
|
||||||
|
public:
|
||||||
|
R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
|
||||||
|
|
||||||
|
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||||
|
|
||||||
|
const char *getPassName() const;
|
||||||
|
};
|
||||||
|
|
||||||
|
char R600ClauseMergePass::ID = 0;
|
||||||
|
|
||||||
|
unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const {
|
||||||
|
assert(isCFAlu(MI));
|
||||||
|
return MI->getOperand(
|
||||||
|
TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const {
|
||||||
|
assert(isCFAlu(MI));
|
||||||
|
return MI->getOperand(
|
||||||
|
TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm();
|
||||||
|
}
|
||||||
|
|
||||||
|
void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu)
|
||||||
|
const {
|
||||||
|
int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
|
||||||
|
MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end();
|
||||||
|
I++;
|
||||||
|
do {
|
||||||
|
while (I!= E && !isCFAlu(I))
|
||||||
|
I++;
|
||||||
|
if (I == E)
|
||||||
|
return;
|
||||||
|
MachineInstr *MI = I++;
|
||||||
|
if (isCFAluEnabled(MI))
|
||||||
|
break;
|
||||||
|
CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
|
||||||
|
MI->eraseFromParent();
|
||||||
|
} while (I != E);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu,
|
||||||
|
const MachineInstr *LatrCFAlu) const {
|
||||||
|
assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
|
||||||
|
int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
|
||||||
|
unsigned RootInstCount = getCFAluSize(RootCFAlu),
|
||||||
|
LaterInstCount = getCFAluSize(LatrCFAlu);
|
||||||
|
unsigned CumuledInsts = RootInstCount + LaterInstCount;
|
||||||
|
if (CumuledInsts >= TII->getMaxAlusPerClause()) {
|
||||||
|
DEBUG(dbgs() << "Excess inst counts\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
|
||||||
|
return false;
|
||||||
|
// Is KCache Bank 0 compatible ?
|
||||||
|
int Mode0Idx =
|
||||||
|
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
|
||||||
|
int KBank0Idx =
|
||||||
|
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
|
||||||
|
int KBank0LineIdx =
|
||||||
|
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
|
||||||
|
if (LatrCFAlu->getOperand(Mode0Idx).getImm() &&
|
||||||
|
RootCFAlu->getOperand(Mode0Idx).getImm() &&
|
||||||
|
(LatrCFAlu->getOperand(KBank0Idx).getImm() !=
|
||||||
|
RootCFAlu->getOperand(KBank0Idx).getImm() ||
|
||||||
|
LatrCFAlu->getOperand(KBank0LineIdx).getImm() !=
|
||||||
|
RootCFAlu->getOperand(KBank0LineIdx).getImm())) {
|
||||||
|
DEBUG(dbgs() << "Wrong KC0\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Is KCache Bank 1 compatible ?
|
||||||
|
int Mode1Idx =
|
||||||
|
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
|
||||||
|
int KBank1Idx =
|
||||||
|
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
|
||||||
|
int KBank1LineIdx =
|
||||||
|
TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
|
||||||
|
if (LatrCFAlu->getOperand(Mode1Idx).getImm() &&
|
||||||
|
RootCFAlu->getOperand(Mode1Idx).getImm() &&
|
||||||
|
(LatrCFAlu->getOperand(KBank1Idx).getImm() !=
|
||||||
|
RootCFAlu->getOperand(KBank1Idx).getImm() ||
|
||||||
|
LatrCFAlu->getOperand(KBank1LineIdx).getImm() !=
|
||||||
|
RootCFAlu->getOperand(KBank1LineIdx).getImm())) {
|
||||||
|
DEBUG(dbgs() << "Wrong KC0\n");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (LatrCFAlu->getOperand(Mode0Idx).getImm()) {
|
||||||
|
RootCFAlu->getOperand(Mode0Idx).setImm(
|
||||||
|
LatrCFAlu->getOperand(Mode0Idx).getImm());
|
||||||
|
RootCFAlu->getOperand(KBank0Idx).setImm(
|
||||||
|
LatrCFAlu->getOperand(KBank0Idx).getImm());
|
||||||
|
RootCFAlu->getOperand(KBank0LineIdx).setImm(
|
||||||
|
LatrCFAlu->getOperand(KBank0LineIdx).getImm());
|
||||||
|
}
|
||||||
|
if (LatrCFAlu->getOperand(Mode1Idx).getImm()) {
|
||||||
|
RootCFAlu->getOperand(Mode1Idx).setImm(
|
||||||
|
LatrCFAlu->getOperand(Mode1Idx).getImm());
|
||||||
|
RootCFAlu->getOperand(KBank1Idx).setImm(
|
||||||
|
LatrCFAlu->getOperand(KBank1Idx).getImm());
|
||||||
|
RootCFAlu->getOperand(KBank1LineIdx).setImm(
|
||||||
|
LatrCFAlu->getOperand(KBank1LineIdx).getImm());
|
||||||
|
}
|
||||||
|
RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts);
|
||||||
|
RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode()));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
|
||||||
|
TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
|
||||||
|
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
|
||||||
|
BB != BB_E; ++BB) {
|
||||||
|
MachineBasicBlock &MBB = *BB;
|
||||||
|
MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
|
||||||
|
MachineBasicBlock::iterator LatestCFAlu = E;
|
||||||
|
while (I != E) {
|
||||||
|
MachineInstr *MI = I++;
|
||||||
|
if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
|
||||||
|
TII->mustBeLastInClause(MI->getOpcode()))
|
||||||
|
LatestCFAlu = E;
|
||||||
|
if (!isCFAlu(MI))
|
||||||
|
continue;
|
||||||
|
cleanPotentialDisabledCFAlu(MI);
|
||||||
|
|
||||||
|
if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) {
|
||||||
|
MI->eraseFromParent();
|
||||||
|
} else {
|
||||||
|
assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled");
|
||||||
|
LatestCFAlu = MI;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *R600ClauseMergePass::getPassName() const {
|
||||||
|
return "R600 Merge Clause Markers Pass";
|
||||||
|
}
|
||||||
|
|
||||||
|
} // end anonymous namespace
|
||||||
|
|
||||||
|
|
||||||
|
llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) {
|
||||||
|
return new R600ClauseMergePass(TM);
|
||||||
|
}
|
@@ -153,6 +153,24 @@ bool R600InstrInfo::isLDSInstr(unsigned Opcode) const {
|
|||||||
(TargetFlags & R600_InstFlag::LDS_1A2D));
|
(TargetFlags & R600_InstFlag::LDS_1A2D));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const {
|
||||||
|
if (isALUInstr(MI->getOpcode()))
|
||||||
|
return true;
|
||||||
|
if (isVector(*MI) || isCubeOp(MI->getOpcode()))
|
||||||
|
return true;
|
||||||
|
switch (MI->getOpcode()) {
|
||||||
|
case AMDGPU::PRED_X:
|
||||||
|
case AMDGPU::INTERP_PAIR_XY:
|
||||||
|
case AMDGPU::INTERP_PAIR_ZW:
|
||||||
|
case AMDGPU::INTERP_VEC_LOAD:
|
||||||
|
case AMDGPU::COPY:
|
||||||
|
case AMDGPU::DOT_4:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
|
bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
|
||||||
if (ST.hasCaymanISA())
|
if (ST.hasCaymanISA())
|
||||||
return false;
|
return false;
|
||||||
|
@@ -66,6 +66,10 @@ namespace llvm {
|
|||||||
bool hasInstrModifiers(unsigned Opcode) const;
|
bool hasInstrModifiers(unsigned Opcode) const;
|
||||||
bool isLDSInstr(unsigned Opcode) const;
|
bool isLDSInstr(unsigned Opcode) const;
|
||||||
|
|
||||||
|
/// \returns true if this \p Opcode represents an ALU instruction or an
|
||||||
|
/// instruction that will be lowered in ExpandSpecialInstrs Pass.
|
||||||
|
bool canBeConsideredALU(const MachineInstr *MI) const;
|
||||||
|
|
||||||
bool isTransOnly(unsigned Opcode) const;
|
bool isTransOnly(unsigned Opcode) const;
|
||||||
bool isTransOnly(const MachineInstr *MI) const;
|
bool isTransOnly(const MachineInstr *MI) const;
|
||||||
bool isVectorOnly(unsigned Opcode) const;
|
bool isVectorOnly(unsigned Opcode) const;
|
||||||
|
@@ -590,6 +590,7 @@ i32imm:$COUNT, i32imm:$Enabled),
|
|||||||
let ALT_CONST = 0;
|
let ALT_CONST = 0;
|
||||||
let WHOLE_QUAD_MODE = 0;
|
let WHOLE_QUAD_MODE = 0;
|
||||||
let BARRIER = 1;
|
let BARRIER = 1;
|
||||||
|
let UseNamedOperandTable = 1;
|
||||||
|
|
||||||
let Inst{31-0} = Word0;
|
let Inst{31-0} = Word0;
|
||||||
let Inst{63-32} = Word1;
|
let Inst{63-32} = Word1;
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||||
|
|
||||||
; CHECK: JUMP @10
|
; CHECK: JUMP @5
|
||||||
; CHECK: EXPORT
|
; CHECK: EXPORT
|
||||||
; CHECK-NOT: EXPORT
|
; CHECK-NOT: EXPORT
|
||||||
|
|
||||||
|
@@ -1,9 +1,9 @@
|
|||||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||||
|
|
||||||
;CHECK: ALU_PUSH
|
;CHECK: ALU_PUSH
|
||||||
;CHECK: LOOP_START_DX10 @13
|
;CHECK: LOOP_START_DX10 @11
|
||||||
;CHECK: LOOP_BREAK @12
|
;CHECK: LOOP_BREAK @10
|
||||||
;CHECK: POP @12
|
;CHECK: POP @10
|
||||||
|
|
||||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
|
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
|
||||||
target triple = "r600--"
|
target triple = "r600--"
|
||||||
|
@@ -30,7 +30,8 @@ ENDIF:
|
|||||||
|
|
||||||
; CHECK: @test_b
|
; CHECK: @test_b
|
||||||
; CHECK: SET{{[GTEQN]+}}_DX10
|
; CHECK: SET{{[GTEQN]+}}_DX10
|
||||||
; CHECK: PRED_
|
; CHECK-NEXT: PRED_
|
||||||
|
; CHECK-NEXT: ALU clause starting
|
||||||
define void @test_b(i32 addrspace(1)* %out, float %in) {
|
define void @test_b(i32 addrspace(1)* %out, float %in) {
|
||||||
entry:
|
entry:
|
||||||
%0 = fcmp olt float %in, 0.0
|
%0 = fcmp olt float %in, 0.0
|
||||||
|
Reference in New Issue
Block a user