mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-05-22 19:38:40 +00:00
R600: Factorize code handling Const Read Port limitation
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177078 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b4ba5e68e1
commit
3ab0ba3cd8
@ -365,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
|
|||||||
SDValue Operand = Ops[OperandIdx[i] - 1];
|
SDValue Operand = Ops[OperandIdx[i] - 1];
|
||||||
switch (Operand.getOpcode()) {
|
switch (Operand.getOpcode()) {
|
||||||
case AMDGPUISD::CONST_ADDRESS: {
|
case AMDGPUISD::CONST_ADDRESS: {
|
||||||
if (i == 2)
|
|
||||||
break;
|
|
||||||
SDValue CstOffset;
|
SDValue CstOffset;
|
||||||
if (!Operand.getValueType().isVector() &&
|
if (Operand.getValueType().isVector() ||
|
||||||
SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
|
!SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
|
||||||
|
break;
|
||||||
|
|
||||||
|
// Gather others constants values
|
||||||
|
std::vector<unsigned> Consts;
|
||||||
|
for (unsigned j = 0; j < 3; j++) {
|
||||||
|
int SrcIdx = OperandIdx[j];
|
||||||
|
if (SrcIdx < 0)
|
||||||
|
break;
|
||||||
|
if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
|
||||||
|
if (Reg->getReg() == AMDGPU::ALU_CONST) {
|
||||||
|
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
|
||||||
|
Consts.push_back(Cst->getZExtValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
|
||||||
|
Consts.push_back(Cst->getZExtValue());
|
||||||
|
if (!TII->fitsConstReadLimitations(Consts))
|
||||||
|
break;
|
||||||
|
|
||||||
Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
|
Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
|
||||||
Ops[SelIdx[i] - 1] = CstOffset;
|
Ops[SelIdx[i] - 1] = CstOffset;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
break;
|
|
||||||
case ISD::FNEG:
|
case ISD::FNEG:
|
||||||
if (NegIdx[i] < 0)
|
if (NegIdx[i] < 0)
|
||||||
break;
|
break;
|
||||||
|
@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned Opcode) const {
|
|||||||
(TargetFlags & R600_InstFlag::OP3));
|
(TargetFlags & R600_InstFlag::OP3));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
|
||||||
|
const {
|
||||||
|
assert (Consts.size() <= 12 && "Too many operands in instructions group");
|
||||||
|
unsigned Pair1 = 0, Pair2 = 0;
|
||||||
|
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
|
||||||
|
unsigned ReadConstHalf = Consts[i] & 2;
|
||||||
|
unsigned ReadConstIndex = Consts[i] & (~3);
|
||||||
|
unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
|
||||||
|
if (!Pair1) {
|
||||||
|
Pair1 = ReadHalfConst;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (Pair1 == ReadHalfConst)
|
||||||
|
continue;
|
||||||
|
if (!Pair2) {
|
||||||
|
Pair2 = ReadHalfConst;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (Pair2 != ReadHalfConst)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
|
||||||
|
std::vector<unsigned> Consts;
|
||||||
|
for (unsigned i = 0, n = MIs.size(); i < n; i++) {
|
||||||
|
const MachineInstr *MI = MIs[i];
|
||||||
|
|
||||||
|
const R600Operands::Ops OpTable[3][2] = {
|
||||||
|
{R600Operands::SRC0, R600Operands::SRC0_SEL},
|
||||||
|
{R600Operands::SRC1, R600Operands::SRC1_SEL},
|
||||||
|
{R600Operands::SRC2, R600Operands::SRC2_SEL},
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!isALUInstr(MI->getOpcode()))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (unsigned j = 0; j < 3; j++) {
|
||||||
|
int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
|
||||||
|
if (SrcIdx < 0)
|
||||||
|
break;
|
||||||
|
if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
|
||||||
|
unsigned Const = MI->getOperand(
|
||||||
|
getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
|
||||||
|
Consts.push_back(Const);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fitsConstReadLimitations(Consts);
|
||||||
|
}
|
||||||
|
|
||||||
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
|
DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
|
||||||
const ScheduleDAG *DAG) const {
|
const ScheduleDAG *DAG) const {
|
||||||
const InstrItineraryData *II = TM->getInstrItineraryData();
|
const InstrItineraryData *II = TM->getInstrItineraryData();
|
||||||
|
@ -53,6 +53,9 @@ namespace llvm {
|
|||||||
/// \returns true if this \p Opcode represents an ALU instruction.
|
/// \returns true if this \p Opcode represents an ALU instruction.
|
||||||
bool isALUInstr(unsigned Opcode) const;
|
bool isALUInstr(unsigned Opcode) const;
|
||||||
|
|
||||||
|
bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
|
||||||
|
bool canBundle(const std::vector<MachineInstr *> &) const;
|
||||||
|
|
||||||
/// \breif Vector instructions are instructions that must fill all
|
/// \breif Vector instructions are instructions that must fill all
|
||||||
/// instruction slots within an instruction group.
|
/// instruction slots within an instruction group.
|
||||||
bool isVector(const MachineInstr &MI) const;
|
bool isVector(const MachineInstr &MI) const;
|
||||||
|
@ -37,7 +37,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
|
|||||||
CurInstKind = IDOther;
|
CurInstKind = IDOther;
|
||||||
CurEmitted = 0;
|
CurEmitted = 0;
|
||||||
OccupedSlotsMask = 15;
|
OccupedSlotsMask = 15;
|
||||||
memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
|
|
||||||
InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
|
InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
|
||||||
|
|
||||||
|
|
||||||
@ -288,79 +287,19 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class ConstPairs {
|
|
||||||
private:
|
|
||||||
unsigned XYPair;
|
|
||||||
unsigned ZWPair;
|
|
||||||
public:
|
|
||||||
ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) {
|
|
||||||
for (unsigned i = 0; i < 3; i++) {
|
|
||||||
unsigned ReadConstChan = ReadConst[i] & 3;
|
|
||||||
unsigned ReadConstIndex = ReadConst[i] & (~3);
|
|
||||||
if (ReadConstChan < 2) {
|
|
||||||
if (!XYPair) {
|
|
||||||
XYPair = ReadConstIndex;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (!ZWPair) {
|
|
||||||
ZWPair = ReadConstIndex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool isCompatibleWith(const ConstPairs& CP) const {
|
|
||||||
return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) &&
|
|
||||||
(!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static
|
|
||||||
const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) {
|
|
||||||
unsigned ReadConsts[3] = {0, 0, 0};
|
|
||||||
R600Operands::Ops OpTable[3][2] = {
|
|
||||||
{R600Operands::SRC0, R600Operands::SRC0_SEL},
|
|
||||||
{R600Operands::SRC1, R600Operands::SRC1_SEL},
|
|
||||||
{R600Operands::SRC2, R600Operands::SRC2_SEL},
|
|
||||||
};
|
|
||||||
|
|
||||||
if (!TII->isALUInstr(MI.getOpcode()))
|
|
||||||
return ConstPairs(ReadConsts);
|
|
||||||
|
|
||||||
for (unsigned i = 0; i < 3; i++) {
|
|
||||||
int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]);
|
|
||||||
if (SrcIdx < 0)
|
|
||||||
break;
|
|
||||||
if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST)
|
|
||||||
ReadConsts[i] =MI.getOperand(
|
|
||||||
TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm();
|
|
||||||
}
|
|
||||||
return ConstPairs(ReadConsts);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
|
||||||
R600SchedStrategy::isBundleable(const MachineInstr& MI) {
|
|
||||||
const ConstPairs &MIPair = getPairs(TII, MI);
|
|
||||||
for (unsigned i = 0; i < 4; i++) {
|
|
||||||
if (!InstructionsGroupCandidate[i])
|
|
||||||
continue;
|
|
||||||
const ConstPairs &IGPair = getPairs(TII,
|
|
||||||
*InstructionsGroupCandidate[i]->getInstr());
|
|
||||||
if (!IGPair.isCompatibleWith(MIPair))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
|
SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
|
||||||
if (Q.empty())
|
if (Q.empty())
|
||||||
return NULL;
|
return NULL;
|
||||||
for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
|
for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
|
||||||
It != E; ++It) {
|
It != E; ++It) {
|
||||||
SUnit *SU = *It;
|
SUnit *SU = *It;
|
||||||
if (isBundleable(*SU->getInstr())) {
|
InstructionsGroupCandidate.push_back(SU->getInstr());
|
||||||
|
if (TII->canBundle(InstructionsGroupCandidate)) {
|
||||||
|
InstructionsGroupCandidate.pop_back();
|
||||||
Q.erase(It);
|
Q.erase(It);
|
||||||
return SU;
|
return SU;
|
||||||
|
} else {
|
||||||
|
InstructionsGroupCandidate.pop_back();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -381,7 +320,7 @@ void R600SchedStrategy::PrepareNextSlot() {
|
|||||||
DEBUG(dbgs() << "New Slot\n");
|
DEBUG(dbgs() << "New Slot\n");
|
||||||
assert (OccupedSlotsMask && "Slot wasn't filled");
|
assert (OccupedSlotsMask && "Slot wasn't filled");
|
||||||
OccupedSlotsMask = 0;
|
OccupedSlotsMask = 0;
|
||||||
memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
|
InstructionsGroupCandidate.clear();
|
||||||
LoadAlu();
|
LoadAlu();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -462,7 +401,7 @@ SUnit* R600SchedStrategy::pickAlu() {
|
|||||||
SUnit *SU = AttemptFillSlot(Chan);
|
SUnit *SU = AttemptFillSlot(Chan);
|
||||||
if (SU) {
|
if (SU) {
|
||||||
OccupedSlotsMask |= (1 << Chan);
|
OccupedSlotsMask |= (1 << Chan);
|
||||||
InstructionsGroupCandidate[Chan] = SU;
|
InstructionsGroupCandidate.push_back(SU->getInstr());
|
||||||
return SU;
|
return SU;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -98,7 +98,7 @@ public:
|
|||||||
virtual void releaseBottomNode(SUnit *SU);
|
virtual void releaseBottomNode(SUnit *SU);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
SUnit *InstructionsGroupCandidate[4];
|
std::vector<MachineInstr *> InstructionsGroupCandidate;
|
||||||
|
|
||||||
int getInstKind(SUnit *SU);
|
int getInstKind(SUnit *SU);
|
||||||
bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
|
bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
|
||||||
@ -112,7 +112,6 @@ private:
|
|||||||
void AssignSlot(MachineInstr *MI, unsigned Slot);
|
void AssignSlot(MachineInstr *MI, unsigned Slot);
|
||||||
SUnit* pickAlu();
|
SUnit* pickAlu();
|
||||||
SUnit* pickOther(int QID);
|
SUnit* pickOther(int QID);
|
||||||
bool isBundleable(const MachineInstr& MI);
|
|
||||||
void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
|
void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||||
|
|
||||||
|
; CHECK: @main1
|
||||||
; CHECK: MOV T{{[0-9]+\.[XYZW], CBuf0\[[0-9]+\]\.[XYZW]}}
|
; CHECK: MOV T{{[0-9]+\.[XYZW], CBuf0\[[0-9]+\]\.[XYZW]}}
|
||||||
|
define void @main1() {
|
||||||
define void @main() {
|
|
||||||
main_body:
|
main_body:
|
||||||
%0 = load <4 x float> addrspace(8)* null
|
%0 = load <4 x float> addrspace(8)* null
|
||||||
%1 = extractelement <4 x float> %0, i32 0
|
%1 = extractelement <4 x float> %0, i32 0
|
||||||
@ -48,5 +48,53 @@ main_body:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; CHECK: @main2
|
||||||
|
; CHECK-NOT: MOV
|
||||||
|
define void @main2() {
|
||||||
|
main_body:
|
||||||
|
%0 = load <4 x float> addrspace(8)* null
|
||||||
|
%1 = extractelement <4 x float> %0, i32 0
|
||||||
|
%2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||||
|
%3 = extractelement <4 x float> %2, i32 0
|
||||||
|
%4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||||
|
%5 = extractelement <4 x float> %4, i32 1
|
||||||
|
%6 = fcmp ult float %1, 0.000000e+00
|
||||||
|
%7 = select i1 %6, float %3, float %5
|
||||||
|
%8 = load <4 x float> addrspace(8)* null
|
||||||
|
%9 = extractelement <4 x float> %8, i32 1
|
||||||
|
%10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
|
||||||
|
%11 = extractelement <4 x float> %10, i32 0
|
||||||
|
%12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
|
||||||
|
%13 = extractelement <4 x float> %12, i32 1
|
||||||
|
%14 = fcmp ult float %9, 0.000000e+00
|
||||||
|
%15 = select i1 %14, float %11, float %13
|
||||||
|
%16 = load <4 x float> addrspace(8)* null
|
||||||
|
%17 = extractelement <4 x float> %16, i32 2
|
||||||
|
%18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||||
|
%19 = extractelement <4 x float> %18, i32 3
|
||||||
|
%20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||||
|
%21 = extractelement <4 x float> %20, i32 2
|
||||||
|
%22 = fcmp ult float %17, 0.000000e+00
|
||||||
|
%23 = select i1 %22, float %19, float %21
|
||||||
|
%24 = load <4 x float> addrspace(8)* null
|
||||||
|
%25 = extractelement <4 x float> %24, i32 3
|
||||||
|
%26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
|
||||||
|
%27 = extractelement <4 x float> %26, i32 3
|
||||||
|
%28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
|
||||||
|
%29 = extractelement <4 x float> %28, i32 2
|
||||||
|
%30 = fcmp ult float %25, 0.000000e+00
|
||||||
|
%31 = select i1 %30, float %27, float %29
|
||||||
|
%32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
|
||||||
|
%33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
|
||||||
|
%34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00)
|
||||||
|
%35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00)
|
||||||
|
%36 = insertelement <4 x float> undef, float %32, i32 0
|
||||||
|
%37 = insertelement <4 x float> %36, float %33, i32 1
|
||||||
|
%38 = insertelement <4 x float> %37, float %34, i32 2
|
||||||
|
%39 = insertelement <4 x float> %38, float %35, i32 3
|
||||||
|
call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
declare float @llvm.AMDIL.clamp.(float, float, float) readnone
|
declare float @llvm.AMDIL.clamp.(float, float, float) readnone
|
||||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user