R600: Support schedule and packetization of trans-only inst

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185268 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Vincent Lejeune 2013-06-29 19:32:43 +00:00
parent 7d1a0d4e3e
commit 8f9fbd67c3
13 changed files with 267 additions and 111 deletions

View File

@ -250,8 +250,9 @@ R600InstrInfo::getSrcs(MachineInstr *MI) const {
std::vector<std::pair<int, unsigned> >
R600InstrInfo::ExtractSrcs(MachineInstr *MI,
const DenseMap<unsigned, unsigned> &PV)
const {
const DenseMap<unsigned, unsigned> &PV,
unsigned &ConstCount) const {
ConstCount = 0;
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI);
const std::pair<int, unsigned> DummyPair(-1, 0);
std::vector<std::pair<int, unsigned> > Result;
@ -259,18 +260,20 @@ R600InstrInfo::ExtractSrcs(MachineInstr *MI,
for (unsigned n = Srcs.size(); i < n; ++i) {
unsigned Reg = Srcs[i].first->getReg();
unsigned Index = RI.getEncodingValue(Reg) & 0xff;
unsigned Chan = RI.getHWRegChan(Reg);
if (Reg == AMDGPU::OQAP) {
Result.push_back(std::pair<int, unsigned>(Index, 0));
}
if (Index > 127) {
Result.push_back(DummyPair);
continue;
}
if (PV.find(Reg) != PV.end()) {
// 255 is used to tells its a PS/PV reg
Result.push_back(std::pair<int, unsigned>(255, 0));
continue;
}
if (Index > 127) {
ConstCount++;
Result.push_back(DummyPair);
continue;
}
unsigned Chan = RI.getHWRegChan(Reg);
Result.push_back(std::pair<int, unsigned>(Index, Chan));
}
for (; i < 3; ++i)
@ -305,23 +308,51 @@ Swizzle(std::vector<std::pair<int, unsigned> > Src,
return Src;
}
bool
R600InstrInfo::isLegal(
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
const std::vector<R600InstrInfo::BankSwizzle> &Swz,
unsigned CheckedSize) const {
static unsigned
getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) {
switch (Swz) {
case R600InstrInfo::ALU_VEC_012_SCL_210: {
unsigned Cycles[3] = { 2, 1, 0};
return Cycles[Op];
}
case R600InstrInfo::ALU_VEC_021_SCL_122: {
unsigned Cycles[3] = { 1, 2, 2};
return Cycles[Op];
}
case R600InstrInfo::ALU_VEC_120_SCL_212: {
unsigned Cycles[3] = { 2, 1, 2};
return Cycles[Op];
}
case R600InstrInfo::ALU_VEC_102_SCL_221: {
unsigned Cycles[3] = { 2, 2, 1};
return Cycles[Op];
}
default:
llvm_unreachable("Wrong Swizzle for Trans Slot");
return 0;
}
}
/// returns how many MIs (whose inputs are represented by IGSrcs) can be packed
/// in the same Instruction Group while meeting read port limitations given a
/// Swz swizzle sequence.
unsigned R600InstrInfo::isLegalUpTo(
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
const std::vector<R600InstrInfo::BankSwizzle> &Swz,
const std::vector<std::pair<int, unsigned> > &TransSrcs,
R600InstrInfo::BankSwizzle TransSwz) const {
int Vector[4][3];
memset(Vector, -1, sizeof(Vector));
for (unsigned i = 0; i < CheckedSize; i++) {
for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) {
const std::vector<std::pair<int, unsigned> > &Srcs =
Swizzle(IGSrcs[i], Swz[i]);
for (unsigned j = 0; j < 3; j++) {
const std::pair<int, unsigned> &Src = Srcs[j];
if (Src.first < 0)
if (Src.first < 0 || Src.first == 255)
continue;
if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
if (Swz[i] != R600InstrInfo::ALU_VEC_012 &&
Swz[i] != R600InstrInfo::ALU_VEC_021) {
if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
// The value from output queue A (denoted by register OQAP) can
// only be fetched during the first cycle.
return false;
@ -332,51 +363,126 @@ R600InstrInfo::isLegal(
if (Vector[Src.second][j] < 0)
Vector[Src.second][j] = Src.first;
if (Vector[Src.second][j] != Src.first)
return false;
return i;
}
}
// Now check Trans Alu
for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) {
const std::pair<int, unsigned> &Src = TransSrcs[i];
unsigned Cycle = getTransSwizzle(TransSwz, i);
if (Src.first < 0)
continue;
if (Src.first == 255)
continue;
if (Vector[Src.second][Cycle] < 0)
Vector[Src.second][Cycle] = Src.first;
if (Vector[Src.second][Cycle] != Src.first)
return IGSrcs.size() - 1;
}
return IGSrcs.size();
}
/// Given a swizzle sequence SwzCandidate and an index Idx, returns the next
/// (in lexicographic term) swizzle sequence assuming that all swizzles after
/// Idx can be skipped
static bool
NextPossibleSolution(
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
unsigned Idx) {
assert(Idx < SwzCandidate.size());
int ResetIdx = Idx;
while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210)
ResetIdx --;
for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) {
SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210;
}
if (ResetIdx == -1)
return false;
SwzCandidate[ResetIdx]++;
return true;
}
/// Enumerate all possible Swizzle sequence to find one that can meet all
/// read port requirements.
bool R600InstrInfo::FindSwizzleForVectorSlot(
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
const std::vector<std::pair<int, unsigned> > &TransSrcs,
R600InstrInfo::BankSwizzle TransSwz) const {
unsigned ValidUpTo = 0;
do {
ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz);
if (ValidUpTo == IGSrcs.size())
return true;
} while (NextPossibleSolution(SwzCandidate, ValidUpTo));
return false;
}
/// Instructions in Trans slot can't read gpr at cycle 0 if they also read
/// a const, and can't read a gpr at cycle 1 if they read 2 const.
static bool
isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
const std::vector<std::pair<int, unsigned> > &TransOps,
unsigned ConstCount) {
for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
const std::pair<int, unsigned> &Src = TransOps[i];
unsigned Cycle = getTransSwizzle(TransSwz, i);
if (Src.first < 0)
continue;
if (ConstCount > 0 && Cycle == 0)
return false;
if (ConstCount > 1 && Cycle == 1)
return false;
}
return true;
}
bool
R600InstrInfo::recursiveFitsFPLimitation(
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
unsigned Depth) const {
if (!isLegal(IGSrcs, SwzCandidate, Depth))
return false;
if (IGSrcs.size() == Depth)
return true;
unsigned i = SwzCandidate[Depth];
for (; i < 6; i++) {
SwzCandidate[Depth] = (R600InstrInfo::BankSwizzle) i;
if (recursiveFitsFPLimitation(IGSrcs, SwzCandidate, Depth + 1))
return true;
}
SwzCandidate[Depth] = R600InstrInfo::ALU_VEC_012;
return false;
}
bool
R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG,
const DenseMap<unsigned, unsigned> &PV,
std::vector<BankSwizzle> &ValidSwizzle)
const DenseMap<unsigned, unsigned> &PV,
std::vector<BankSwizzle> &ValidSwizzle,
bool isLastAluTrans)
const {
//Todo : support shared src0 - src1 operand
std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs;
ValidSwizzle.clear();
unsigned ConstCount;
BankSwizzle TransBS;
for (unsigned i = 0, e = IG.size(); i < e; ++i) {
IGSrcs.push_back(ExtractSrcs(IG[i], PV));
IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount));
unsigned Op = getOperandIdx(IG[i]->getOpcode(),
AMDGPU::OpName::bank_swizzle);
ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
IG[i]->getOperand(Op).getImm());
}
bool Result = recursiveFitsFPLimitation(IGSrcs, ValidSwizzle);
if (!Result)
return false;
return true;
std::vector<std::pair<int, unsigned> > TransOps;
if (!isLastAluTrans)
return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS);
TransOps = IGSrcs.back();
IGSrcs.pop_back();
ValidSwizzle.pop_back();
static const R600InstrInfo::BankSwizzle TransSwz[] = {
ALU_VEC_012_SCL_210,
ALU_VEC_021_SCL_122,
ALU_VEC_120_SCL_212,
ALU_VEC_102_SCL_221
};
for (unsigned i = 0; i < 4; i++) {
TransBS = TransSwz[i];
if (!isConstCompatible(TransBS, TransOps, ConstCount))
continue;
bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps,
TransBS);
if (Result) {
ValidSwizzle.push_back(TransBS);
return true;
}
}
return false;
}
@ -406,7 +512,8 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
}
bool
R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
const {
std::vector<unsigned> Consts;
for (unsigned i = 0, n = MIs.size(); i < n; i++) {
MachineInstr *MI = MIs[i];

View File

@ -84,26 +84,38 @@ namespace llvm {
SmallVector<std::pair<MachineOperand *, int64_t>, 3>
getSrcs(MachineInstr *MI) const;
bool isLegal(
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
const std::vector<R600InstrInfo::BankSwizzle> &Swz,
unsigned CheckedSize) const;
bool recursiveFitsFPLimitation(
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
unsigned Depth = 0) const;
unsigned isLegalUpTo(
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
const std::vector<R600InstrInfo::BankSwizzle> &Swz,
const std::vector<std::pair<int, unsigned> > &TransSrcs,
R600InstrInfo::BankSwizzle TransSwz) const;
bool FindSwizzleForVectorSlot(
const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs,
std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate,
const std::vector<std::pair<int, unsigned> > &TransSrcs,
R600InstrInfo::BankSwizzle TransSwz) const;
/// Given the order VEC_012 < VEC_021 < VEC_120 < VEC_102 < VEC_201 < VEC_210
/// returns true and the first (in lexical order) BankSwizzle affectation
/// starting from the one already provided in the Instruction Group MIs that
/// fits Read Port limitations in BS if available. Otherwise returns false
/// and undefined content in BS.
/// isLastAluTrans should be set if the last Alu of MIs will be executed on
/// Trans ALU. In this case, ValidTSwizzle returns the BankSwizzle value to
/// apply to the last instruction.
/// PV holds GPR to PV registers in the Instruction Group MIs.
bool fitsReadPortLimitations(const std::vector<MachineInstr *> &MIs,
const DenseMap<unsigned, unsigned> &PV,
std::vector<BankSwizzle> &BS) const;
std::vector<BankSwizzle> &BS,
bool isLastAluTrans) const;
/// An instruction group can only access 2 channel pair (either [XY] or [ZW])
/// from KCache bank on R700+. This function check if MI set in input meet
/// this limitations
bool fitsConstReadLimitations(const std::vector<MachineInstr *> &) const;
/// Same but using const index set instead of MI set.
bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
bool canBundle(const std::vector<MachineInstr *> &) const;
/// \breif Vector instructions are instructions that must fill all
/// instruction slots within an instruction group.

View File

@ -1489,6 +1489,8 @@ let hasSideEffects = 1 in {
def FLT_TO_INT_eg : FLT_TO_INT_Common<0x50> {
let Pattern = [];
let TransOnly = 0;
let Itinerary = AnyALU;
}
def INT_TO_FLT_eg : INT_TO_FLT_Common<0x9B>;

View File

@ -32,7 +32,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
MRI = &DAG->MRI;
CurInstKind = IDOther;
CurEmitted = 0;
OccupedSlotsMask = 15;
OccupedSlotsMask = 31;
InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
InstKindLimit[IDOther] = 32;
@ -160,7 +160,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
if (NextInstKind != CurInstKind) {
DEBUG(dbgs() << "Instruction Type Switch\n");
if (NextInstKind != IDAlu)
OccupedSlotsMask = 15;
OccupedSlotsMask |= 31;
CurEmitted = 0;
CurInstKind = NextInstKind;
}
@ -251,6 +251,9 @@ bool R600SchedStrategy::regBelongsToClass(unsigned Reg,
R600SchedStrategy::AluKind R600SchedStrategy::getAluKind(SUnit *SU) const {
MachineInstr *MI = SU->getInstr();
if (TII->isTransOnly(MI))
return AluTrans;
switch (MI->getOpcode()) {
case AMDGPU::PRED_X:
return AluPredX;
@ -346,7 +349,7 @@ SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
It != E; ++It) {
SUnit *SU = *It;
InstructionsGroupCandidate.push_back(SU->getInstr());
if (TII->canBundle(InstructionsGroupCandidate)) {
if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) {
InstructionsGroupCandidate.pop_back();
Q.erase((It + 1).base());
return SU;
@ -421,7 +424,8 @@ unsigned R600SchedStrategy::AvailablesAluCount() const {
return AvailableAlus[AluAny].size() + AvailableAlus[AluT_XYZW].size() +
AvailableAlus[AluT_X].size() + AvailableAlus[AluT_Y].size() +
AvailableAlus[AluT_Z].size() + AvailableAlus[AluT_W].size() +
AvailableAlus[AluDiscarded].size() + AvailableAlus[AluPredX].size();
AvailableAlus[AluTrans].size() + AvailableAlus[AluDiscarded].size() +
AvailableAlus[AluPredX].size();
}
SUnit* R600SchedStrategy::pickAlu() {
@ -429,20 +433,27 @@ SUnit* R600SchedStrategy::pickAlu() {
if (!OccupedSlotsMask) {
// Bottom up scheduling : predX must comes first
if (!AvailableAlus[AluPredX].empty()) {
OccupedSlotsMask = 15;
OccupedSlotsMask |= 31;
return PopInst(AvailableAlus[AluPredX]);
}
// Flush physical reg copies (RA will discard them)
if (!AvailableAlus[AluDiscarded].empty()) {
OccupedSlotsMask = 15;
OccupedSlotsMask |= 31;
return PopInst(AvailableAlus[AluDiscarded]);
}
// If there is a T_XYZW alu available, use it
if (!AvailableAlus[AluT_XYZW].empty()) {
OccupedSlotsMask = 15;
OccupedSlotsMask |= 15;
return PopInst(AvailableAlus[AluT_XYZW]);
}
}
bool TransSlotOccuped = OccupedSlotsMask & 16;
if (!TransSlotOccuped) {
if (!AvailableAlus[AluTrans].empty()) {
OccupedSlotsMask |= 16;
return PopInst(AvailableAlus[AluTrans]);
}
}
for (int Chan = 3; Chan > -1; --Chan) {
bool isOccupied = OccupedSlotsMask & (1 << Chan);
if (!isOccupied) {

View File

@ -46,6 +46,7 @@ class R600SchedStrategy : public MachineSchedStrategy {
AluT_W,
AluT_XYZW,
AluPredX,
AluTrans,
AluDiscarded, // LLVM Instructions that are going to be eliminated
AluLast
};

View File

@ -77,8 +77,6 @@ private:
do {
if (TII->isPredicated(BI))
continue;
if (TII->isTransOnly(BI))
continue;
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
continue;
@ -87,6 +85,10 @@ private:
continue;
}
unsigned Dst = BI->getOperand(DstIdx).getReg();
if (TII->isTransOnly(BI)) {
Result[Dst] = AMDGPU::PS;
continue;
}
if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
BI->getOpcode() == AMDGPU::DOT4_eg) {
Result[Dst] = AMDGPU::PV_X;
@ -157,10 +159,6 @@ public:
return true;
if (!TII->isALUInstr(MI->getOpcode()))
return true;
if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TRANS_ONLY)
return true;
if (TII->isTransOnly(MI))
return true;
if (MI->getOpcode() == AMDGPU::GROUP_BARRIER)
return true;
return false;
@ -170,7 +168,7 @@ public:
// together.
bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
if (getSlot(MII) <= getSlot(MIJ))
if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII))
return false;
// Does MII and MIJ share the same pred_sel ?
int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
@ -204,11 +202,16 @@ public:
MI->getOperand(LastOp).setImm(Bit);
}
MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
bool isBundlableWithCurrentPMI(MachineInstr *MI,
const DenseMap<unsigned, unsigned> &PV,
std::vector<R600InstrInfo::BankSwizzle> &BS,
bool &isTransSlot) {
isTransSlot = TII->isTransOnly(MI);
// Are the Constants limitations met ?
CurrentPacketMIs.push_back(MI);
bool FitsConstLimits = TII->canBundle(CurrentPacketMIs);
DEBUG(
if (!FitsConstLimits) {
if (!TII->fitsConstReadLimitations(CurrentPacketMIs)) {
DEBUG(
dbgs() << "Couldn't pack :\n";
MI->dump();
dbgs() << "with the following packets :\n";
@ -217,14 +220,15 @@ public:
dbgs() << "\n";
}
dbgs() << "because of Consts read limitations\n";
});
const DenseMap<unsigned, unsigned> &PV =
getPreviousVector(CurrentPacketMIs.front());
std::vector<R600InstrInfo::BankSwizzle> BS;
bool FitsReadPortLimits =
TII->fitsReadPortLimitations(CurrentPacketMIs, PV, BS);
DEBUG(
if (!FitsReadPortLimits) {
);
CurrentPacketMIs.pop_back();
return false;
}
// Is there a BankSwizzle set that meet Read Port limitations ?
if (!TII->fitsReadPortLimitations(CurrentPacketMIs,
PV, BS, isTransSlot)) {
DEBUG(
dbgs() << "Couldn't pack :\n";
MI->dump();
dbgs() << "with the following packets :\n";
@ -233,25 +237,43 @@ public:
dbgs() << "\n";
}
dbgs() << "because of Read port limitations\n";
});
bool isBundlable = FitsConstLimits && FitsReadPortLimits;
if (isBundlable) {
);
CurrentPacketMIs.pop_back();
return false;
}
CurrentPacketMIs.pop_back();
return true;
}
MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
MachineBasicBlock::iterator FirstInBundle =
CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
const DenseMap<unsigned, unsigned> &PV =
getPreviousVector(FirstInBundle);
std::vector<R600InstrInfo::BankSwizzle> BS;
bool isTransSlot;
if (isBundlableWithCurrentPMI(MI, PV, BS, isTransSlot)) {
for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
MachineInstr *MI = CurrentPacketMIs[i];
unsigned Op = TII->getOperandIdx(MI->getOpcode(),
AMDGPU::OpName::bank_swizzle);
MI->getOperand(Op).setImm(BS[i]);
unsigned Op = TII->getOperandIdx(MI->getOpcode(),
AMDGPU::OpName::bank_swizzle);
MI->getOperand(Op).setImm(BS[i]);
}
unsigned Op = TII->getOperandIdx(MI->getOpcode(),
AMDGPU::OpName::bank_swizzle);
MI->getOperand(Op).setImm(BS.back());
if (!CurrentPacketMIs.empty())
setIsLastBit(CurrentPacketMIs.back(), 0);
substitutePV(MI, PV);
MachineBasicBlock::iterator It = VLIWPacketizerList::addToPacket(MI);
if (isTransSlot) {
endPacket(llvm::next(It)->getParent(), llvm::next(It));
}
return It;
}
CurrentPacketMIs.pop_back();
if (!isBundlable) {
endPacket(MI->getParent(), MI);
substitutePV(MI, getPreviousVector(MI));
return VLIWPacketizerList::addToPacket(MI);
}
if (!CurrentPacketMIs.empty())
setIsLastBit(CurrentPacketMIs.back(), 0);
substitutePV(MI, PV);
endPacket(MI->getParent(), MI);
return VLIWPacketizerList::addToPacket(MI);
}
};

View File

@ -96,6 +96,7 @@ def PV_X : R600RegWithChan<"PV.X", 254, "X">;
def PV_Y : R600RegWithChan<"PV.Y", 254, "Y">;
def PV_Z : R600RegWithChan<"PV.Z", 254, "Z">;
def PV_W : R600RegWithChan<"PV.W", 254, "W">;
def PS: R600Reg<"PS", 255>;
def PREDICATE_BIT : R600Reg<"PredicateBit", 0>;
def PRED_SEL_OFF: R600Reg<"Pred_sel_off", 0>;
def PRED_SEL_ZERO : R600Reg<"Pred_sel_zero", 2>;

View File

@ -1,13 +1,13 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1

View File

@ -1,10 +1,10 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
; CHECK: @fp_to_sint_v4i32
; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%value = load <4 x float> addrspace(1) * %in

View File

@ -1,10 +1,10 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
; CHECK: @fp_to_uint_v4i32
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
define void @fp_to_uint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%value = load <4 x float> addrspace(1) * %in

View File

@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK: COS * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
define void @test() {
%r0 = call float @llvm.R600.load.input(i32 0)

View File

@ -1,8 +1,8 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK: LOG_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
define void @test() {
%r0 = call float @llvm.R600.load.input(i32 0)

View File

@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK: SIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
define void @test() {
%r0 = call float @llvm.R600.load.input(i32 0)