mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 04:30:23 +00:00
R600: Support schedule and packetization of trans-only inst
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185268 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7d1a0d4e3e
commit
8f9fbd67c3
@ -250,8 +250,9 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const {
|
||||
|
||||
std::vector<std::pair<int, unsigned> >
|
||||
R600InstrInfo::ExtractSrcs(MachineInstr *MI,
|
||||
const DenseMap<unsigned, unsigned> &PV)
|
||||
const {
|
||||
const DenseMap<unsigned, unsigned> &PV,
|
||||
unsigned &ConstCount) const {
|
||||
ConstCount = 0;
|
||||
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
|
||||
const std::pair<int, unsigned> DummyPair(-1, 0);
|
||||
std::vector<std::pair<int, unsigned> > Result;
|
||||
@ -259,18 +260,20 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI,
|
||||
for (unsigned n = Srcs.size(); i < n; ++i) {
|
||||
unsigned Reg = Srcs[i].first->getReg();
|
||||
unsigned Index = RI.getEncodingValue(Reg) & 0xff;
|
||||
unsigned Chan = RI.getHWRegChan(Reg);
|
||||
if (Reg == AMDGPU::OQAP) {
|
||||
Result.push_back(std::pair<int, unsigned>(Index, 0));
|
||||
}
|
||||
if (Index > 127) {
|
||||
Result.push_back(DummyPair);
|
||||
continue;
|
||||
}
|
||||
if (PV.find(Reg) != PV.end()) {
|
||||
// 255 is used to tells its a PS/PV reg
|
||||
Result.push_back(std::pair<int, unsigned>(255, 0));
|
||||
continue;
|
||||
}
|
||||
if (Index > 127) {
|
||||
ConstCount++;
|
||||
Result.push_back(DummyPair);
|
||||
continue;
|
||||
}
|
||||
unsigned Chan = RI.getHWRegChan(Reg);
|
||||
Result.push_back(std::pair<int, unsigned>(Index, Chan));
|
||||
}
|
||||
for (; i < 3; ++i)
|
||||
@ -305,23 +308,51 @@ Swizzle(std::vector<std::pair<int, unsigned> > Src,
|
||||
return Src;
|
||||
}
|
||||
|
||||
bool
|
||||
R600InstrInfo::isLegal(
|
||||
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
|
||||
const std::vector<R600InstrInfo::BankSwizzle> &Swz,
|
||||
unsigned CheckedSize) const {
|
||||
static unsigned
|
||||
getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
|
||||
switch (Swz) {
|
||||
case R600InstrInfo::ALU_VEC_012_SCL_210: {
|
||||
unsigned Cycles[3] = { 2, 1, 0};
|
||||
return Cycles[Op];
|
||||
}
|
||||
case R600InstrInfo::ALU_VEC_021_SCL_122: {
|
||||
unsigned Cycles[3] = { 1, 2, 2};
|
||||
return Cycles[Op];
|
||||
}
|
||||
case R600InstrInfo::ALU_VEC_120_SCL_212: {
|
||||
unsigned Cycles[3] = { 2, 1, 2};
|
||||
return Cycles[Op];
|
||||
}
|
||||
case R600InstrInfo::ALU_VEC_102_SCL_221: {
|
||||
unsigned Cycles[3] = { 2, 2, 1};
|
||||
return Cycles[Op];
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("Wrong Swizzle for Trans Slot");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
|
||||
/// in the same Instruction Group while meeting read port limitations given a
|
||||
/// Swz swizzle sequence.
|
||||
unsigned R600InstrInfo::isLegalUpTo(
|
||||
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
|
||||
const std::vector<R600InstrInfo::BankSwizzle> &Swz,
|
||||
const std::vector<std::pair<int, unsigned> > &TransSrcs,
|
||||
R600InstrInfo::BankSwizzle TransSwz) const {
|
||||
int Vector[4][3];
|
||||
memset(Vector, -1, sizeof(Vector));
|
||||
for (unsigned i = 0; i < CheckedSize; i++) {
|
||||
for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
|
||||
const std::vector<std::pair<int, unsigned> > &Srcs =
|
||||
Swizzle(IGSrcs[i], Swz[i]);
|
||||
for (unsigned j = 0; j < 3; j++) {
|
||||
const std::pair<int, unsigned> &Src = Srcs[j];
|
||||
if (Src.first < 0)
|
||||
if (Src.first < 0 || Src.first == 255)
|
||||
continue;
|
||||
if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
|
||||
if (Swz[i] != R600InstrInfo::ALU_VEC_012 &&
|
||||
Swz[i] != R600InstrInfo::ALU_VEC_021) {
|
||||
if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
|
||||
Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
|
||||
// The value from output queue A (denoted by register OQAP) can
|
||||
// only be fetched during the first cycle.
|
||||
return false;
|
||||
@ -332,51 +363,126 @@ R600InstrInfo::isLegal(
|
||||
if (Vector[Src.second][j] < 0)
|
||||
Vector[Src.second][j] = Src.first;
|
||||
if (Vector[Src.second][j] != Src.first)
|
||||
return false;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
// Now check Trans Alu
|
||||
for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
|
||||
const std::pair<int, unsigned> &Src = TransSrcs[i];
|
||||
unsigned Cycle = getTransSwizzle(TransSwz, i);
|
||||
if (Src.first < 0)
|
||||
continue;
|
||||
if (Src.first == 255)
|
||||
continue;
|
||||
if (Vector[Src.second][Cycle] < 0)
|
||||
Vector[Src.second][Cycle] = Src.first;
|
||||
if (Vector[Src.second][Cycle] != Src.first)
|
||||
return IGSrcs.size() - 1;
|
||||
}
|
||||
return IGSrcs.size();
|
||||
}
|
||||
|
||||
/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
|
||||
/// (in lexicographic term) swizzle sequence assuming that all swizzles after
|
||||
/// Idx can be skipped
|
||||
static bool
|
||||
NextPossibleSolution(
|
||||
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
|
||||
unsigned Idx) {
|
||||
assert(Idx < SwzCandidate.size());
|
||||
int ResetIdx = Idx;
|
||||
while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
|
||||
ResetIdx --;
|
||||
for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
|
||||
SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
|
||||
}
|
||||
if (ResetIdx == -1)
|
||||
return false;
|
||||
SwzCandidate[ResetIdx]++;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Enumerate all possible Swizzle sequence to find one that can meet all
|
||||
/// read port requirements.
|
||||
bool R600InstrInfo::FindSwizzleForVectorSlot(
|
||||
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
|
||||
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
|
||||
const std::vector<std::pair<int, unsigned> > &TransSrcs,
|
||||
R600InstrInfo::BankSwizzle TransSwz) const {
|
||||
unsigned ValidUpTo = 0;
|
||||
do {
|
||||
ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
|
||||
if (ValidUpTo == IGSrcs.size())
|
||||
return true;
|
||||
} while (NextPossibleSolution(SwzCandidate, ValidUpTo));
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
|
||||
/// a const, and can't read a gpr at cycle 1 if they read 2 const.
|
||||
static bool
|
||||
isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
|
||||
const std::vector<std::pair<int, unsigned> > &TransOps,
|
||||
unsigned ConstCount) {
|
||||
for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
|
||||
const std::pair<int, unsigned> &Src = TransOps[i];
|
||||
unsigned Cycle = getTransSwizzle(TransSwz, i);
|
||||
if (Src.first < 0)
|
||||
continue;
|
||||
if (ConstCount > 0 && Cycle == 0)
|
||||
return false;
|
||||
if (ConstCount > 1 && Cycle == 1)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
R600InstrInfo::recursiveFitsFPLimitation(
|
||||
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
|
||||
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
|
||||
unsigned Depth) const {
|
||||
if (!isLegal(IGSrcs, SwzCandidate, Depth))
|
||||
return false;
|
||||
if (IGSrcs.size() == Depth)
|
||||
return true;
|
||||
unsigned i = SwzCandidate[Depth];
|
||||
for (; i < 6; i++) {
|
||||
SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i;
|
||||
if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1))
|
||||
return true;
|
||||
}
|
||||
SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
|
||||
const DenseMap<unsigned, unsigned> &PV,
|
||||
std::vector<BankSwizzle> &ValidSwizzle)
|
||||
const DenseMap<unsigned, unsigned> &PV,
|
||||
std::vector<BankSwizzle> &ValidSwizzle,
|
||||
bool isLastAluTrans)
|
||||
const {
|
||||
//Todo : support shared src0 - src1 operand
|
||||
|
||||
std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
|
||||
ValidSwizzle.clear();
|
||||
unsigned ConstCount;
|
||||
BankSwizzle TransBS;
|
||||
for (unsigned i = 0, e = IG.size(); i < e; ++i) {
|
||||
IGSrcs.push_back(ExtractSrcs(IG[i], PV));
|
||||
IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
|
||||
unsigned Op = getOperandIdx(IG[i]->getOpcode(),
|
||||
AMDGPU::OpName::bank_swizzle);
|
||||
ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
|
||||
IG[i]->getOperand(Op).getImm());
|
||||
}
|
||||
bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle);
|
||||
if (!Result)
|
||||
return false;
|
||||
return true;
|
||||
std::vector<std::pair<int, unsigned> > TransOps;
|
||||
if (!isLastAluTrans)
|
||||
return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
|
||||
|
||||
TransOps = IGSrcs.back();
|
||||
IGSrcs.pop_back();
|
||||
ValidSwizzle.pop_back();
|
||||
|
||||
static const R600InstrInfo::BankSwizzle TransSwz[] = {
|
||||
ALU_VEC_012_SCL_210,
|
||||
ALU_VEC_021_SCL_122,
|
||||
ALU_VEC_120_SCL_212,
|
||||
ALU_VEC_102_SCL_221
|
||||
};
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
TransBS = TransSwz[i];
|
||||
if (!isConstCompatible(TransBS, TransOps, ConstCount))
|
||||
continue;
|
||||
bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
|
||||
TransBS);
|
||||
if (Result) {
|
||||
ValidSwizzle.push_back(TransBS);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -406,7 +512,8 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
|
||||
}
|
||||
|
||||
bool
|
||||
R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
|
||||
R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
|
||||
const {
|
||||
std::vector<unsigned> Consts;
|
||||
for (unsigned i = 0, n = MIs.size(); i < n; i++) {
|
||||
MachineInstr *MI = MIs[i];
|
||||
|
@ -84,26 +84,38 @@ namespace llvm {
|
||||
SmallVector<std::pair<MachineOperand *, int64_t>, 3>
|
||||
getSrcs(MachineInstr *MI) const;
|
||||
|
||||
bool isLegal(
|
||||
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
|
||||
const std::vector<R600InstrInfo::BankSwizzle> &Swz,
|
||||
unsigned CheckedSize) const;
|
||||
bool recursiveFitsFPLimitation(
|
||||
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
|
||||
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
|
||||
unsigned Depth = 0) const;
|
||||
unsigned isLegalUpTo(
|
||||
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
|
||||
const std::vector<R600InstrInfo::BankSwizzle> &Swz,
|
||||
const std::vector<std::pair<int, unsigned> > &TransSrcs,
|
||||
R600InstrInfo::BankSwizzle TransSwz) const;
|
||||
|
||||
bool FindSwizzleForVectorSlot(
|
||||
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
|
||||
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
|
||||
const std::vector<std::pair<int, unsigned> > &TransSrcs,
|
||||
R600InstrInfo::BankSwizzle TransSwz) const;
|
||||
|
||||
/// Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210
|
||||
/// returns true and the first (in lexical order) BankSwizzle affectation
|
||||
/// starting from the one already provided in the Instruction Group MIs that
|
||||
/// fits Read Port limitations in BS if available. Otherwise returns false
|
||||
/// and undefined content in BS.
|
||||
/// isLastAluTrans should be set if the last Alu of MIs will be executed on
|
||||
/// Trans ALU. In this case, ValidTSwizzle returns the BankSwizzle value to
|
||||
/// apply to the last instruction.
|
||||
/// PV holds GPR to PV registers in the Instruction Group MIs.
|
||||
bool fitsReadPortLimitations(const std::vector<MachineInstr *> &MIs,
|
||||
const DenseMap<unsigned, unsigned> &PV,
|
||||
std::vector<BankSwizzle> &BS) const;
|
||||
std::vector<BankSwizzle> &BS,
|
||||
bool isLastAluTrans) const;
|
||||
|
||||
/// An instruction group can only access 2 channel pair (either [XY] or [ZW])
|
||||
/// from KCache bank on R700+. This function check if MI set in input meet
|
||||
/// this limitations
|
||||
bool fitsConstReadLimitations(const std::vector<MachineInstr *> &) const;
|
||||
/// Same but using const index set instead of MI set.
|
||||
bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
|
||||
bool canBundle(const std::vector<MachineInstr *> &) const;
|
||||
|
||||
/// \breif Vector instructions are instructions that must fill all
|
||||
/// instruction slots within an instruction group.
|
||||
|
@ -1489,6 +1489,8 @@ let hasSideEffects = 1 in {
|
||||
|
||||
def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
|
||||
let Pattern = [];
|
||||
let TransOnly = 0;
|
||||
let Itinerary = AnyALU;
|
||||
}
|
||||
|
||||
def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;
|
||||
|
@ -32,7 +32,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
|
||||
MRI = &DAG->MRI;
|
||||
CurInstKind = IDOther;
|
||||
CurEmitted = 0;
|
||||
OccupedSlotsMask = 15;
|
||||
OccupedSlotsMask = 31;
|
||||
InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
|
||||
InstKindLimit[IDOther] = 32;
|
||||
|
||||
@ -160,7 +160,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
|
||||
if (NextInstKind != CurInstKind) {
|
||||
DEBUG(dbgs() << "Instruction Type Switch\n");
|
||||
if (NextInstKind != IDAlu)
|
||||
OccupedSlotsMask = 15;
|
||||
OccupedSlotsMask |= 31;
|
||||
CurEmitted = 0;
|
||||
CurInstKind = NextInstKind;
|
||||
}
|
||||
@ -251,6 +251,9 @@ bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
|
||||
R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
|
||||
MachineInstr *MI = SU->getInstr();
|
||||
|
||||
if (TII->isTransOnly(MI))
|
||||
return AluTrans;
|
||||
|
||||
switch (MI->getOpcode()) {
|
||||
case AMDGPU::PRED_X:
|
||||
return AluPredX;
|
||||
@ -346,7 +349,7 @@ SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
|
||||
It != E; ++It) {
|
||||
SUnit *SU = *It;
|
||||
InstructionsGroupCandidate.push_back(SU->getInstr());
|
||||
if (TII->canBundle(InstructionsGroupCandidate)) {
|
||||
if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) {
|
||||
InstructionsGroupCandidate.pop_back();
|
||||
Q.erase((It + 1).base());
|
||||
return SU;
|
||||
@ -421,7 +424,8 @@ unsigned R600SchedStrategy::AvailablesAluCount() const {
|
||||
return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
|
||||
AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
|
||||
AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
|
||||
AvailableAlus[AluDiscarded].size() + AvailableAlus[AluPredX].size();
|
||||
AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
|
||||
AvailableAlus[AluPredX].size();
|
||||
}
|
||||
|
||||
SUnit* R600SchedStrategy::pickAlu() {
|
||||
@ -429,20 +433,27 @@ SUnit* R600SchedStrategy::pickAlu() {
|
||||
if (!OccupedSlotsMask) {
|
||||
// Bottom up scheduling : predX must comes first
|
||||
if (!AvailableAlus[AluPredX].empty()) {
|
||||
OccupedSlotsMask = 15;
|
||||
OccupedSlotsMask |= 31;
|
||||
return PopInst(AvailableAlus[AluPredX]);
|
||||
}
|
||||
// Flush physical reg copies (RA will discard them)
|
||||
if (!AvailableAlus[AluDiscarded].empty()) {
|
||||
OccupedSlotsMask = 15;
|
||||
OccupedSlotsMask |= 31;
|
||||
return PopInst(AvailableAlus[AluDiscarded]);
|
||||
}
|
||||
// If there is a T_XYZW alu available, use it
|
||||
if (!AvailableAlus[AluT_XYZW].empty()) {
|
||||
OccupedSlotsMask = 15;
|
||||
OccupedSlotsMask |= 15;
|
||||
return PopInst(AvailableAlus[AluT_XYZW]);
|
||||
}
|
||||
}
|
||||
bool TransSlotOccuped = OccupedSlotsMask & 16;
|
||||
if (!TransSlotOccuped) {
|
||||
if (!AvailableAlus[AluTrans].empty()) {
|
||||
OccupedSlotsMask |= 16;
|
||||
return PopInst(AvailableAlus[AluTrans]);
|
||||
}
|
||||
}
|
||||
for (int Chan = 3; Chan > -1; --Chan) {
|
||||
bool isOccupied = OccupedSlotsMask & (1 << Chan);
|
||||
if (!isOccupied) {
|
||||
|
@ -46,6 +46,7 @@ class R600SchedStrategy : public MachineSchedStrategy {
|
||||
AluT_W,
|
||||
AluT_XYZW,
|
||||
AluPredX,
|
||||
AluTrans,
|
||||
AluDiscarded, // LLVM Instructions that are going to be eliminated
|
||||
AluLast
|
||||
};
|
||||
|
@ -77,8 +77,6 @@ private:
|
||||
do {
|
||||
if (TII->isPredicated(BI))
|
||||
continue;
|
||||
if (TII->isTransOnly(BI))
|
||||
continue;
|
||||
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
|
||||
if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
|
||||
continue;
|
||||
@ -87,6 +85,10 @@ private:
|
||||
continue;
|
||||
}
|
||||
unsigned Dst = BI->getOperand(DstIdx).getReg();
|
||||
if (TII->isTransOnly(BI)) {
|
||||
Result[Dst] = AMDGPU::PS;
|
||||
continue;
|
||||
}
|
||||
if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
|
||||
BI->getOpcode() == AMDGPU::DOT4_eg) {
|
||||
Result[Dst] = AMDGPU::PV_X;
|
||||
@ -157,10 +159,6 @@ public:
|
||||
return true;
|
||||
if (!TII->isALUInstr(MI->getOpcode()))
|
||||
return true;
|
||||
if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY)
|
||||
return true;
|
||||
if (TII->isTransOnly(MI))
|
||||
return true;
|
||||
if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)
|
||||
return true;
|
||||
return false;
|
||||
@ -170,7 +168,7 @@ public:
|
||||
// together.
|
||||
bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
|
||||
MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
|
||||
if (getSlot(MII) <= getSlot(MIJ))
|
||||
if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII))
|
||||
return false;
|
||||
// Does MII and MIJ share the same pred_sel ?
|
||||
int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
|
||||
@ -204,11 +202,16 @@ public:
|
||||
MI->getOperand(LastOp).setImm(Bit);
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
|
||||
bool isBundlableWithCurrentPMI(MachineInstr *MI,
|
||||
const DenseMap<unsigned, unsigned> &PV,
|
||||
std::vector<R600InstrInfo::BankSwizzle> &BS,
|
||||
bool &isTransSlot) {
|
||||
isTransSlot = TII->isTransOnly(MI);
|
||||
|
||||
// Are the Constants limitations met ?
|
||||
CurrentPacketMIs.push_back(MI);
|
||||
bool FitsConstLimits = TII->canBundle(CurrentPacketMIs);
|
||||
DEBUG(
|
||||
if (!FitsConstLimits) {
|
||||
if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
|
||||
DEBUG(
|
||||
dbgs() << "Couldn't pack :\n";
|
||||
MI->dump();
|
||||
dbgs() << "with the following packets :\n";
|
||||
@ -217,14 +220,15 @@ public:
|
||||
dbgs() << "\n";
|
||||
}
|
||||
dbgs() << "because of Consts read limitations\n";
|
||||
});
|
||||
const DenseMap<unsigned, unsigned> &PV =
|
||||
getPreviousVector(CurrentPacketMIs.front());
|
||||
std::vector<R600InstrInfo::BankSwizzle> BS;
|
||||
bool FitsReadPortLimits =
|
||||
TII->fitsReadPortLimitations(CurrentPacketMIs, PV, BS);
|
||||
DEBUG(
|
||||
if (!FitsReadPortLimits) {
|
||||
);
|
||||
CurrentPacketMIs.pop_back();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Is there a BankSwizzle set that meet Read Port limitations ?
|
||||
if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
|
||||
PV, BS, isTransSlot)) {
|
||||
DEBUG(
|
||||
dbgs() << "Couldn't pack :\n";
|
||||
MI->dump();
|
||||
dbgs() << "with the following packets :\n";
|
||||
@ -233,25 +237,43 @@ public:
|
||||
dbgs() << "\n";
|
||||
}
|
||||
dbgs() << "because of Read port limitations\n";
|
||||
});
|
||||
bool isBundlable = FitsConstLimits && FitsReadPortLimits;
|
||||
if (isBundlable) {
|
||||
);
|
||||
CurrentPacketMIs.pop_back();
|
||||
return false;
|
||||
}
|
||||
|
||||
CurrentPacketMIs.pop_back();
|
||||
return true;
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
|
||||
MachineBasicBlock::iterator FirstInBundle =
|
||||
CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
|
||||
const DenseMap<unsigned, unsigned> &PV =
|
||||
getPreviousVector(FirstInBundle);
|
||||
std::vector<R600InstrInfo::BankSwizzle> BS;
|
||||
bool isTransSlot;
|
||||
|
||||
if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
|
||||
for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
|
||||
MachineInstr *MI = CurrentPacketMIs[i];
|
||||
unsigned Op = TII->getOperandIdx(MI->getOpcode(),
|
||||
AMDGPU::OpName::bank_swizzle);
|
||||
MI->getOperand(Op).setImm(BS[i]);
|
||||
unsigned Op = TII->getOperandIdx(MI->getOpcode(),
|
||||
AMDGPU::OpName::bank_swizzle);
|
||||
MI->getOperand(Op).setImm(BS[i]);
|
||||
}
|
||||
unsigned Op = TII->getOperandIdx(MI->getOpcode(),
|
||||
AMDGPU::OpName::bank_swizzle);
|
||||
MI->getOperand(Op).setImm(BS.back());
|
||||
if (!CurrentPacketMIs.empty())
|
||||
setIsLastBit(CurrentPacketMIs.back(), 0);
|
||||
substitutePV(MI, PV);
|
||||
MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
|
||||
if (isTransSlot) {
|
||||
endPacket(llvm::next(It)->getParent(), llvm::next(It));
|
||||
}
|
||||
return It;
|
||||
}
|
||||
CurrentPacketMIs.pop_back();
|
||||
if (!isBundlable) {
|
||||
endPacket(MI->getParent(), MI);
|
||||
substitutePV(MI, getPreviousVector(MI));
|
||||
return VLIWPacketizerList::addToPacket(MI);
|
||||
}
|
||||
if (!CurrentPacketMIs.empty())
|
||||
setIsLastBit(CurrentPacketMIs.back(), 0);
|
||||
substitutePV(MI, PV);
|
||||
endPacket(MI->getParent(), MI);
|
||||
return VLIWPacketizerList::addToPacket(MI);
|
||||
}
|
||||
};
|
||||
|
@ -96,6 +96,7 @@ def PV_X : R600RegWithChan<"PV.X", 254, "X">;
|
||||
def PV_Y : R600RegWithChan<"PV.Y", 254, "Y">;
|
||||
def PV_Z : R600RegWithChan<"PV.Z", 254, "Z">;
|
||||
def PV_W : R600RegWithChan<"PV.W", 254, "W">;
|
||||
def PS: R600Reg<"PS", 255>;
|
||||
def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
|
||||
def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
|
||||
def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;
|
||||
|
@ -1,13 +1,13 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
|
||||
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
|
||||
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
|
||||
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
|
||||
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
|
||||
|
||||
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
||||
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
|
||||
|
@ -1,10 +1,10 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
|
||||
; CHECK: @fp_to_sint_v4i32
|
||||
; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
|
||||
define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
||||
%value = load <4 x float> addrspace(1) * %in
|
||||
|
@ -1,10 +1,10 @@
|
||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
|
||||
; CHECK: @fp_to_uint_v4i32
|
||||
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
|
||||
define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
||||
%value = load <4 x float> addrspace(1) * %in
|
||||
|
@ -1,6 +1,6 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
|
||||
;CHECK: COS * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
|
||||
define void @test() {
|
||||
%r0 = call float @llvm.R600.load.input(i32 0)
|
||||
|
@ -1,8 +1,8 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
|
||||
;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
|
||||
;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
|
||||
define void @test() {
|
||||
%r0 = call float @llvm.R600.load.input(i32 0)
|
||||
|
@ -1,6 +1,6 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
|
||||
|
||||
;CHECK: SIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
|
||||
define void @test() {
|
||||
%r0 = call float @llvm.R600.load.input(i32 0)
|
||||
|
Loading…
Reference in New Issue
Block a user