Revert "R600: Non vector only instruction can be scheduled on trans unit"

This reverts commit 98ce62780e.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187526 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2013-07-31 20:43:27 +00:00
parent 5519dc9de8
commit 6b3f6a744a
29 changed files with 92 additions and 245 deletions

View File

@ -459,9 +459,6 @@ static bool
isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, isConstCompatible(R600InstrInfo::BankSwizzle TransSwz,
const std::vector<std::pair<int, unsigned> > &TransOps, const std::vector<std::pair<int, unsigned> > &TransOps,
unsigned ConstCount) { unsigned ConstCount) {
// TransALU can't read 3 constants
if (ConstCount > 2)
return false;
for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { for (unsigned i = 0, e = TransOps.size(); i < e; ++i) {
const std::pair<int, unsigned> &Src = TransOps[i]; const std::pair<int, unsigned> &Src = TransOps[i];
unsigned Cycle = getTransSwizzle(TransSwz, i); unsigned Cycle = getTransSwizzle(TransSwz, i);

View File

@ -9,6 +9,7 @@
// //
/// \file /// \file
/// \brief R600 Machine Scheduler interface /// \brief R600 Machine Scheduler interface
// TODO: Scheduling is optimised for VLIW4 arch, modify it to support TRANS slot
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -28,7 +29,6 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
DAG = dag; DAG = dag;
TII = static_cast<const R600InstrInfo*>(DAG->TII); TII = static_cast<const R600InstrInfo*>(DAG->TII);
TRI = static_cast<const R600RegisterInfo*>(DAG->TRI); TRI = static_cast<const R600RegisterInfo*>(DAG->TRI);
VLIW5 = !DAG->MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
MRI = &DAG->MRI; MRI = &DAG->MRI;
CurInstKind = IDOther; CurInstKind = IDOther;
CurEmitted = 0; CurEmitted = 0;
@ -342,16 +342,14 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
} }
} }
SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) { SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q) {
if (Q.empty()) if (Q.empty())
return NULL; return NULL;
for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend(); for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
It != E; ++It) { It != E; ++It) {
SUnit *SU = *It; SUnit *SU = *It;
InstructionsGroupCandidate.push_back(SU->getInstr()); InstructionsGroupCandidate.push_back(SU->getInstr());
if (TII->fitsConstReadLimitations(InstructionsGroupCandidate) if (TII->fitsConstReadLimitations(InstructionsGroupCandidate)) {
&& (!AnyALU || !TII->isVectorOnly(SU->getInstr()))
) {
InstructionsGroupCandidate.pop_back(); InstructionsGroupCandidate.pop_back();
Q.erase((It + 1).base()); Q.erase((It + 1).base());
return SU; return SU;
@ -375,8 +373,6 @@ void R600SchedStrategy::PrepareNextSlot() {
DEBUG(dbgs() << "New Slot\n"); DEBUG(dbgs() << "New Slot\n");
assert (OccupedSlotsMask && "Slot wasn't filled"); assert (OccupedSlotsMask && "Slot wasn't filled");
OccupedSlotsMask = 0; OccupedSlotsMask = 0;
// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
// OccupedSlotsMask |= 16;
InstructionsGroupCandidate.clear(); InstructionsGroupCandidate.clear();
LoadAlu(); LoadAlu();
} }
@ -413,12 +409,12 @@ void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
} }
} }
SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot, bool AnyAlu) { SUnit *R600SchedStrategy::AttemptFillSlot(unsigned Slot) {
static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W}; static const AluKind IndexToID[] = {AluT_X, AluT_Y, AluT_Z, AluT_W};
SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]], AnyAlu); SUnit *SlotedSU = PopInst(AvailableAlus[IndexToID[Slot]]);
if (SlotedSU) if (SlotedSU)
return SlotedSU; return SlotedSU;
SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny], AnyAlu); SUnit *UnslotedSU = PopInst(AvailableAlus[AluAny]);
if (UnslotedSU) if (UnslotedSU)
AssignSlot(UnslotedSU->getInstr(), Slot); AssignSlot(UnslotedSU->getInstr(), Slot);
return UnslotedSU; return UnslotedSU;
@ -438,35 +434,30 @@ SUnit* R600SchedStrategy::pickAlu() {
// Bottom up scheduling : predX must comes first // Bottom up scheduling : predX must comes first
if (!AvailableAlus[AluPredX].empty()) { if (!AvailableAlus[AluPredX].empty()) {
OccupedSlotsMask |= 31; OccupedSlotsMask |= 31;
return PopInst(AvailableAlus[AluPredX], false); return PopInst(AvailableAlus[AluPredX]);
} }
// Flush physical reg copies (RA will discard them) // Flush physical reg copies (RA will discard them)
if (!AvailableAlus[AluDiscarded].empty()) { if (!AvailableAlus[AluDiscarded].empty()) {
OccupedSlotsMask |= 31; OccupedSlotsMask |= 31;
return PopInst(AvailableAlus[AluDiscarded], false); return PopInst(AvailableAlus[AluDiscarded]);
} }
// If there is a T_XYZW alu available, use it // If there is a T_XYZW alu available, use it
if (!AvailableAlus[AluT_XYZW].empty()) { if (!AvailableAlus[AluT_XYZW].empty()) {
OccupedSlotsMask |= 15; OccupedSlotsMask |= 15;
return PopInst(AvailableAlus[AluT_XYZW], false); return PopInst(AvailableAlus[AluT_XYZW]);
} }
} }
bool TransSlotOccuped = OccupedSlotsMask & 16; bool TransSlotOccuped = OccupedSlotsMask & 16;
if (!TransSlotOccuped && VLIW5) { if (!TransSlotOccuped) {
if (!AvailableAlus[AluTrans].empty()) { if (!AvailableAlus[AluTrans].empty()) {
OccupedSlotsMask |= 16; OccupedSlotsMask |= 16;
return PopInst(AvailableAlus[AluTrans], false); return PopInst(AvailableAlus[AluTrans]);
}
SUnit *SU = AttemptFillSlot(3, true);
if (SU) {
OccupedSlotsMask |= 16;
return SU;
} }
} }
for (int Chan = 3; Chan > -1; --Chan) { for (int Chan = 3; Chan > -1; --Chan) {
bool isOccupied = OccupedSlotsMask & (1 << Chan); bool isOccupied = OccupedSlotsMask & (1 << Chan);
if (!isOccupied) { if (!isOccupied) {
SUnit *SU = AttemptFillSlot(Chan, false); SUnit *SU = AttemptFillSlot(Chan);
if (SU) { if (SU) {
OccupedSlotsMask |= (1 << Chan); OccupedSlotsMask |= (1 << Chan);
InstructionsGroupCandidate.push_back(SU->getInstr()); InstructionsGroupCandidate.push_back(SU->getInstr());

View File

@ -84,16 +84,15 @@ public:
private: private:
std::vector<MachineInstr *> InstructionsGroupCandidate; std::vector<MachineInstr *> InstructionsGroupCandidate;
bool VLIW5;
int getInstKind(SUnit *SU); int getInstKind(SUnit *SU);
bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const; bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
AluKind getAluKind(SUnit *SU) const; AluKind getAluKind(SUnit *SU) const;
void LoadAlu(); void LoadAlu();
unsigned AvailablesAluCount() const; unsigned AvailablesAluCount() const;
SUnit *AttemptFillSlot (unsigned Slot, bool AnyAlu); SUnit *AttemptFillSlot (unsigned Slot);
void PrepareNextSlot(); void PrepareNextSlot();
SUnit *PopInst(std::vector<SUnit*> &Q, bool AnyALU); SUnit *PopInst(std::vector<SUnit*> &Q);
void AssignSlot(MachineInstr *MI, unsigned Slot); void AssignSlot(MachineInstr *MI, unsigned Slot);
SUnit* pickAlu(); SUnit* pickAlu();

View File

@ -58,8 +58,6 @@ class R600PacketizerList : public VLIWPacketizerList {
private: private:
const R600InstrInfo *TII; const R600InstrInfo *TII;
const R600RegisterInfo &TRI; const R600RegisterInfo &TRI;
bool VLIW5;
bool ConsideredInstUsesAlreadyWrittenVectorElement;
unsigned getSlot(const MachineInstr *MI) const { unsigned getSlot(const MachineInstr *MI) const {
return TRI.getHWRegChan(MI->getOperand(0).getReg()); return TRI.getHWRegChan(MI->getOperand(0).getReg());
@ -76,13 +74,7 @@ private:
MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
if (I->isBundle()) if (I->isBundle())
BI++; BI++;
int LastDstChan = -1;
do { do {
bool isTrans = false;
int BISlot = getSlot(BI);
if (LastDstChan >= BISlot)
isTrans = true;
LastDstChan = BISlot;
if (TII->isPredicated(BI)) if (TII->isPredicated(BI))
continue; continue;
int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write); int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
@ -93,7 +85,7 @@ private:
continue; continue;
} }
unsigned Dst = BI->getOperand(DstIdx).getReg(); unsigned Dst = BI->getOperand(DstIdx).getReg();
if (isTrans || TII->isTransOnly(BI)) { if (TII->isTransOnly(BI)) {
Result[Dst] = AMDGPU::PS; Result[Dst] = AMDGPU::PS;
continue; continue;
} }
@ -150,14 +142,10 @@ public:
MachineDominatorTree &MDT) MachineDominatorTree &MDT)
: VLIWPacketizerList(MF, MLI, MDT, true), : VLIWPacketizerList(MF, MLI, MDT, true),
TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())), TII (static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo())),
TRI(TII->getRegisterInfo()) { TRI(TII->getRegisterInfo()) { }
VLIW5 = !MF.getTarget().getSubtarget<AMDGPUSubtarget>().hasCaymanISA();
}
// initPacketizerState - initialize some internal flags. // initPacketizerState - initialize some internal flags.
void initPacketizerState() { void initPacketizerState() { }
ConsideredInstUsesAlreadyWrittenVectorElement = false;
}
// ignorePseudoInstruction - Ignore bundling of pseudo instructions. // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) { bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
@ -184,8 +172,8 @@ public:
// together. // together.
bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr(); MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
if (getSlot(MII) == getSlot(MIJ)) if (getSlot(MII) <= getSlot(MIJ) && !TII->isTransOnly(MII))
ConsideredInstUsesAlreadyWrittenVectorElement = true; return false;
// Does MII and MIJ share the same pred_sel ? // Does MII and MIJ share the same pred_sel ?
int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel), int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel); OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
@ -223,20 +211,6 @@ public:
std::vector<R600InstrInfo::BankSwizzle> &BS, std::vector<R600InstrInfo::BankSwizzle> &BS,
bool &isTransSlot) { bool &isTransSlot) {
isTransSlot = TII->isTransOnly(MI); isTransSlot = TII->isTransOnly(MI);
assert (!isTransSlot || VLIW5);
// Is the dst reg sequence legal ?
if (!isTransSlot && !CurrentPacketMIs.empty()) {
if (getSlot(MI) <= getSlot(CurrentPacketMIs.back())) {
if (ConsideredInstUsesAlreadyWrittenVectorElement &&
!TII->isVectorOnly(MI) && VLIW5) {
isTransSlot = true;
DEBUG(dbgs() << "Considering as Trans Inst :"; MI->dump(););
}
else
return false;
}
}
// Are the Constants limitations met ? // Are the Constants limitations met ?
CurrentPacketMIs.push_back(MI); CurrentPacketMIs.push_back(MI);
@ -304,8 +278,6 @@ public:
return It; return It;
} }
endPacket(MI->getParent(), MI); endPacket(MI->getParent(), MI);
if (TII->isTransOnly(MI))
return MI;
return VLIWPacketizerList::addToPacket(MI); return VLIWPacketizerList::addToPacket(MI);
} }
}; };

View File

@ -21,7 +21,7 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
;EG-CHECK: @test4 ;EG-CHECK: @test4
;EG-CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: AND_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: AND_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: @test4 ;SI-CHECK: @test4

View File

@ -18,7 +18,7 @@ declare void @llvm.AMDGPU.store.output(float, i32)
; CHECK: @fadd_v4f32 ; CHECK: @fadd_v4f32
; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {

View File

@ -2,7 +2,7 @@
;Not checking arguments 2 and 3 to CNDE, because they may change between ;Not checking arguments 2 and 3 to CNDE, because they may change between
;registers and literal.x depending on what the optimizer does. ;registers and literal.x depending on what the optimizer does.
;CHECK: CNDE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) { define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry: entry:

View File

@ -1,7 +1,7 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
; CHECK: @fcmp_sext ; CHECK: @fcmp_sext
; CHECK: SETE_DX10 T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) { define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
entry: entry:

View File

@ -1,13 +1,13 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}} ;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
;CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW]}} ;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}} ;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
;CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW]}} ;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}} ;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
;CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW]}} ;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}} ;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
;CHECK-DAG: MUL_IEEE T{{[0-9]+\.[XYZW]}} ;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
define void @test(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) { define void @test(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
entry: entry:

View File

@ -18,7 +18,7 @@ declare void @llvm.AMDGPU.store.output(float, i32)
; CHECK: @fmul_v4f32 ; CHECK: @fmul_v4f32
; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {

View File

@ -2,7 +2,7 @@
;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {

View File

@ -12,7 +12,7 @@ entry:
; CHECK: @fneg_v4 ; CHECK: @fneg_v4
; CHECK: -PV ; CHECK: -PV
; CHECK: -T ; CHECK: -PV
; CHECK: -PV ; CHECK: -PV
; CHECK: -PV ; CHECK: -PV
define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) { define void @fneg_v4(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) {

View File

@ -2,9 +2,9 @@
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
; R600-CHECK: @fp_to_sint_v4i32 ; R600-CHECK: @fp_to_sint_v4i32
; R600-CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; R600-CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW]}} ; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; R600-CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; R600-CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; SI-CHECK: @fp_to_sint_v4i32 ; SI-CHECK: @fp_to_sint_v4i32
; SI-CHECK: V_CVT_I32_F32_e32 ; SI-CHECK: V_CVT_I32_F32_e32

View File

@ -2,7 +2,7 @@
; CHECK: @fp_to_uint_v4i32 ; CHECK: @fp_to_uint_v4i32
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW]}} ; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; CHECK: FLT_TO_UINT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}

View File

@ -18,7 +18,7 @@ declare void @llvm.AMDGPU.store.output(float, i32)
; CHECK: @fsub_v4f32 ; CHECK: @fsub_v4f32
; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: ADD T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; CHECK: ADD * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { define void @fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {

View File

@ -3,7 +3,7 @@
;Test that a select with reversed True/False values is correctly lowered ;Test that a select with reversed True/False values is correctly lowered
;to a SETNE_INT. There should only be one SETNE_INT instruction. ;to a SETNE_INT. There should only be one SETNE_INT instruction.
;CHECK: SETNE_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;CHECK: SETNE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK-NOT: SETNE_INT ;CHECK-NOT: SETNE_INT
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {

View File

@ -1,7 +1,7 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
; CHECK: @main1 ; CHECK: @main1
; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}} ; CHECK: MOV T{{[0-9]+\.[XYZW], KC0}}
define void @main1() { define void @main1() {
main_body: main_body:
%0 = load <4 x float> addrspace(8)* null %0 = load <4 x float> addrspace(8)* null

View File

@ -7,8 +7,7 @@
; ADD_INT literal.x KC0[2].Z, 5 ; ADD_INT literal.x KC0[2].Z, 5
; CHECK: @i32_literal ; CHECK: @i32_literal
; CHECK: ADD_INT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x ; CHECK: ADD_INT * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
; CHECK-NEXT: LSHR
; CHECK-NEXT: 5 ; CHECK-NEXT: 5
define void @i32_literal(i32 addrspace(1)* %out, i32 %in) { define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
entry: entry:
@ -24,8 +23,7 @@ entry:
; ADD literal.x KC0[2].Z, 5.0 ; ADD literal.x KC0[2].Z, 5.0
; CHECK: @float_literal ; CHECK: @float_literal
; CHECK: ADD T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x ; CHECK: ADD * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.0 ; CHECK-NEXT: 1084227584(5.0
define void @float_literal(float addrspace(1)* %out, float %in) { define void @float_literal(float addrspace(1)* %out, float %in) {
entry: entry:

View File

@ -2,7 +2,7 @@
; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s ; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
; R600-CHECK: @amdgpu_trunc ; R600-CHECK: @amdgpu_trunc
; R600-CHECK: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; R600-CHECK: TRUNC * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK: @amdgpu_trunc ; SI-CHECK: @amdgpu_trunc
; SI-CHECK: V_TRUNC_F32 ; SI-CHECK: V_TRUNC_F32

View File

@ -12,7 +12,7 @@
; CHECK-NEXT: .long 8 ; CHECK-NEXT: .long 8
; Make sure the lds writes are using different addresses. ; Make sure the lds writes are using different addresses.
; CHECK: LDS_WRITE {{\** *}}{{PV|T}}[[ADDRW:[0-9]*\.[XYZW]]] ; CHECK: LDS_WRITE {{[*]*}} {{PV|T}}[[ADDRW:[0-9]*\.[XYZW]]]
; CHECK-NOT: LDS_WRITE {{[*]*}} T[[ADDRW]] ; CHECK-NOT: LDS_WRITE {{[*]*}} T[[ADDRW]]
; GROUP_BARRIER must be the last instruction in a clause ; GROUP_BARRIER must be the last instruction in a clause

View File

@ -19,8 +19,7 @@ entry:
; R600-CHECK: @rotl ; R600-CHECK: @rotl
; R600-CHECK: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x ; R600-CHECK: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x
; R600-CHECK-NEXT: 32 ; R600-CHECK-NEXT: 32
; R600-CHECK: BIT_ALIGN_INT {{T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}} ; R600-CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
; SI-CHECK: @rotl ; SI-CHECK: @rotl
; SI-CHECK: V_SUB_I32_e64 [[DST:VGPR[0-9]+]], 32, {{[SV]GPR[0-9]+}} ; SI-CHECK: V_SUB_I32_e64 [[DST:VGPR[0-9]+]], 32, {{[SV]GPR[0-9]+}}

View File

@ -1,8 +1,8 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK-NOT: SETE ;CHECK-NOT: SETE
;CHECK: CNDE {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x, ;CHECK: CNDE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
;CHECK: 1073741824 ;CHECK-NEXT: {{[-0-9]+\(2.0}}
define void @test(float addrspace(1)* %out, float addrspace(1)* %in) { define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
%1 = load float addrspace(1)* %in %1 = load float addrspace(1)* %in
%2 = fcmp oeq float %1, 0.0 %2 = fcmp oeq float %1, 0.0

View File

@ -1,7 +1,7 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK-NOT: SETE_INT ;CHECK-NOT: SETE_INT
;CHECK: CNDE_INT {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x, ;CHECK: CNDE_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
;CHECK-NEXT: 2 ;CHECK-NEXT: 2
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%1 = load i32 addrspace(1)* %in %1 = load i32 addrspace(1)* %in

View File

@ -5,8 +5,7 @@
; SET*DX10 instructions. ; SET*DX10 instructions.
; CHECK: @fcmp_une_select_fptosi ; CHECK: @fcmp_une_select_fptosi
; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) { define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry: entry:
@ -19,8 +18,7 @@ entry:
} }
; CHECK: @fcmp_une_select_i32 ; CHECK: @fcmp_une_select_i32
; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) { define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
entry: entry:
@ -31,8 +29,7 @@ entry:
} }
; CHECK: @fcmp_ueq_select_fptosi ; CHECK: @fcmp_ueq_select_fptosi
; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) { define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry: entry:
@ -45,8 +42,7 @@ entry:
} }
; CHECK: @fcmp_ueq_select_i32 ; CHECK: @fcmp_ueq_select_i32
; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) { define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
entry: entry:
@ -57,8 +53,7 @@ entry:
} }
; CHECK: @fcmp_ugt_select_fptosi ; CHECK: @fcmp_ugt_select_fptosi
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) { define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry: entry:
@ -71,8 +66,7 @@ entry:
} }
; CHECK: @fcmp_ugt_select_i32 ; CHECK: @fcmp_ugt_select_i32
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) { define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
entry: entry:
@ -83,8 +77,7 @@ entry:
} }
; CHECK: @fcmp_uge_select_fptosi ; CHECK: @fcmp_uge_select_fptosi
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) { define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry: entry:
@ -97,8 +90,7 @@ entry:
} }
; CHECK: @fcmp_uge_select_i32 ; CHECK: @fcmp_uge_select_i32
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) { define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
entry: entry:
@ -109,8 +101,7 @@ entry:
} }
; CHECK: @fcmp_ule_select_fptosi ; CHECK: @fcmp_ule_select_fptosi
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) { define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry: entry:
@ -123,8 +114,7 @@ entry:
} }
; CHECK: @fcmp_ule_select_i32 ; CHECK: @fcmp_ule_select_i32
; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, ; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) { define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
entry: entry:
@ -135,8 +125,7 @@ entry:
} }
; CHECK: @fcmp_ult_select_fptosi ; CHECK: @fcmp_ult_select_fptosi
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) { define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry: entry:
@ -149,8 +138,7 @@ entry:
} }
; CHECK: @fcmp_ult_select_i32 ; CHECK: @fcmp_ult_select_i32
; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, ; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) { define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
entry: entry:

View File

@ -2,8 +2,8 @@
;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s ;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
;EG-CHECK: @test2 ;EG-CHECK: @test2
;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: SUB_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: @test2 ;SI-CHECK: @test2
;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}} ;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
@ -19,10 +19,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
} }
;EG-CHECK: @test4 ;EG-CHECK: @test4
;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: SUB_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: SUB_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: @test4 ;SI-CHECK: @test4
;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}} ;SI-CHECK: V_SUB_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}

View File

@ -3,8 +3,7 @@
; These tests are for condition codes that are not supported by the hardware ; These tests are for condition codes that are not supported by the hardware
; CHECK: @slt ; CHECK: @slt
; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT: 5(7.006492e-45) ; CHECK-NEXT: 5(7.006492e-45)
define void @slt(i32 addrspace(1)* %out, i32 %in) { define void @slt(i32 addrspace(1)* %out, i32 %in) {
entry: entry:
@ -15,8 +14,7 @@ entry:
} }
; CHECK: @ult_i32 ; CHECK: @ult_i32
; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT: 5(7.006492e-45) ; CHECK-NEXT: 5(7.006492e-45)
define void @ult_i32(i32 addrspace(1)* %out, i32 %in) { define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
entry: entry:
@ -27,8 +25,7 @@ entry:
} }
; CHECK: @ult_float ; CHECK: @ult_float
; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: 1084227584(5.000000e+00)
define void @ult_float(float addrspace(1)* %out, float %in) { define void @ult_float(float addrspace(1)* %out, float %in) {
entry: entry:
@ -39,8 +36,7 @@ entry:
} }
; CHECK: @olt ; CHECK: @olt
; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
;CHECK-NEXT: 1084227584(5.000000e+00) ;CHECK-NEXT: 1084227584(5.000000e+00)
define void @olt(float addrspace(1)* %out, float %in) { define void @olt(float addrspace(1)* %out, float %in) {
entry: entry:
@ -51,8 +47,7 @@ entry:
} }
; CHECK: @sle ; CHECK: @sle
; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT: 6(8.407791e-45) ; CHECK-NEXT: 6(8.407791e-45)
define void @sle(i32 addrspace(1)* %out, i32 %in) { define void @sle(i32 addrspace(1)* %out, i32 %in) {
entry: entry:
@ -63,8 +58,7 @@ entry:
} }
; CHECK: @ule_i32 ; CHECK: @ule_i32
; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT: 6(8.407791e-45) ; CHECK-NEXT: 6(8.407791e-45)
define void @ule_i32(i32 addrspace(1)* %out, i32 %in) { define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
entry: entry:
@ -75,8 +69,7 @@ entry:
} }
; CHECK: @ule_float ; CHECK: @ule_float
; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT: 1084227584(5.000000e+00) ; CHECK-NEXT: 1084227584(5.000000e+00)
define void @ule_float(float addrspace(1)* %out, float %in) { define void @ule_float(float addrspace(1)* %out, float %in) {
entry: entry:
@ -87,8 +80,7 @@ entry:
} }
; CHECK: @ole ; CHECK: @ole
; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z ; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: LSHR
; CHECK-NEXT:1084227584(5.000000e+00) ; CHECK-NEXT:1084227584(5.000000e+00)
define void @ole(float addrspace(1)* %out, float %in) { define void @ole(float addrspace(1)* %out, float %in) {
entry: entry:

View File

@ -2,8 +2,8 @@
;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s ;RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s
;EG-CHECK: @test_select_v2i32 ;EG-CHECK: @test_select_v2i32
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: @test_select_v2i32 ;SI-CHECK: @test_select_v2i32
;SI-CHECK: V_CNDMASK_B32_e64 ;SI-CHECK: V_CNDMASK_B32_e64
@ -20,8 +20,8 @@ entry:
} }
;EG-CHECK: @test_select_v2f32 ;EG-CHECK: @test_select_v2f32
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: @test_select_v2f32 ;SI-CHECK: @test_select_v2f32
;SI-CHECK: V_CNDMASK_B32_e64 ;SI-CHECK: V_CNDMASK_B32_e64
@ -38,10 +38,10 @@ entry:
} }
;EG-CHECK: @test_select_v4i32 ;EG-CHECK: @test_select_v4i32
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: @test_select_v4i32 ;SI-CHECK: @test_select_v4i32
;SI-CHECK: V_CNDMASK_B32_e64 ;SI-CHECK: V_CNDMASK_B32_e64
@ -60,10 +60,10 @@ entry:
} }
;EG-CHECK: @test_select_v4f32 ;EG-CHECK: @test_select_v4f32
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG-CHECK: CNDE_INT {{\*? *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) { define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) {
entry: entry:

View File

@ -3,7 +3,7 @@
; R600-CHECK: @ngroups_x ; R600-CHECK: @ngroups_x
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[0].X ; R600-CHECK: MOV * [[VAL]], KC0[0].X
; SI-CHECK: @ngroups_x ; SI-CHECK: @ngroups_x
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 0 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 0
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -17,7 +17,7 @@ entry:
; R600-CHECK: @ngroups_y ; R600-CHECK: @ngroups_y
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[0].Y ; R600-CHECK: MOV * [[VAL]], KC0[0].Y
; SI-CHECK: @ngroups_y ; SI-CHECK: @ngroups_y
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 1 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 1
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -31,7 +31,7 @@ entry:
; R600-CHECK: @ngroups_z ; R600-CHECK: @ngroups_z
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[0].Z ; R600-CHECK: MOV * [[VAL]], KC0[0].Z
; SI-CHECK: @ngroups_z ; SI-CHECK: @ngroups_z
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 2 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 2
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -45,7 +45,7 @@ entry:
; R600-CHECK: @global_size_x ; R600-CHECK: @global_size_x
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[0].W ; R600-CHECK: MOV * [[VAL]], KC0[0].W
; SI-CHECK: @global_size_x ; SI-CHECK: @global_size_x
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 3 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 3
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -59,7 +59,7 @@ entry:
; R600-CHECK: @global_size_y ; R600-CHECK: @global_size_y
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[1].X ; R600-CHECK: MOV * [[VAL]], KC0[1].X
; SI-CHECK: @global_size_y ; SI-CHECK: @global_size_y
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 4 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 4
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -73,7 +73,7 @@ entry:
; R600-CHECK: @global_size_z ; R600-CHECK: @global_size_z
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[1].Y ; R600-CHECK: MOV * [[VAL]], KC0[1].Y
; SI-CHECK: @global_size_z ; SI-CHECK: @global_size_z
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 5 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 5
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -87,7 +87,7 @@ entry:
; R600-CHECK: @local_size_x ; R600-CHECK: @local_size_x
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[1].Z ; R600-CHECK: MOV * [[VAL]], KC0[1].Z
; SI-CHECK: @local_size_x ; SI-CHECK: @local_size_x
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 6 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 6
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -101,7 +101,7 @@ entry:
; R600-CHECK: @local_size_y ; R600-CHECK: @local_size_y
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[1].W ; R600-CHECK: MOV * [[VAL]], KC0[1].W
; SI-CHECK: @local_size_y ; SI-CHECK: @local_size_y
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 7 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 7
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -115,7 +115,7 @@ entry:
; R600-CHECK: @local_size_z ; R600-CHECK: @local_size_z
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]] ; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: MOV {{\** *}}[[VAL]], KC0[2].X ; R600-CHECK: MOV * [[VAL]], KC0[2].X
; SI-CHECK: @local_size_z ; SI-CHECK: @local_size_z
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 8 ; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 8
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]] ; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]

View File

@ -1,89 +0,0 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
; We want all MULLO_INT inst to be last in their instruction group
;CHECK: @fill3d
;CHECK-NOT: MULLO_INT T[0-9]+
; ModuleID = 'radeon'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
target triple = "r600--"
; Function Attrs: nounwind
define void @fill3d(i32 addrspace(1)* nocapture %out) #0 {
entry:
%x.i = tail call i32 @llvm.r600.read.global.size.x() #1
%y.i18 = tail call i32 @llvm.r600.read.global.size.y() #1
%mul = mul i32 %y.i18, %x.i
%z.i17 = tail call i32 @llvm.r600.read.global.size.z() #1
%mul3 = mul i32 %mul, %z.i17
%x.i.i = tail call i32 @llvm.r600.read.tgid.x() #1
%x.i12.i = tail call i32 @llvm.r600.read.local.size.x() #1
%mul26.i = mul i32 %x.i12.i, %x.i.i
%x.i4.i = tail call i32 @llvm.r600.read.tidig.x() #1
%add.i16 = add i32 %x.i4.i, %mul26.i
%mul7 = mul i32 %add.i16, %y.i18
%y.i.i = tail call i32 @llvm.r600.read.tgid.y() #1
%y.i14.i = tail call i32 @llvm.r600.read.local.size.y() #1
%mul30.i = mul i32 %y.i14.i, %y.i.i
%y.i6.i = tail call i32 @llvm.r600.read.tidig.y() #1
%add.i14 = add i32 %mul30.i, %mul7
%mul819 = add i32 %add.i14, %y.i6.i
%add = mul i32 %mul819, %z.i17
%z.i.i = tail call i32 @llvm.r600.read.tgid.z() #1
%z.i16.i = tail call i32 @llvm.r600.read.local.size.z() #1
%mul33.i = mul i32 %z.i16.i, %z.i.i
%z.i8.i = tail call i32 @llvm.r600.read.tidig.z() #1
%add.i = add i32 %z.i8.i, %mul33.i
%add13 = add i32 %add.i, %add
%arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %add13
store i32 %mul3, i32 addrspace(1)* %arrayidx, align 4, !tbaa !3
ret void
}
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tgid.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tgid.y() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tgid.z() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.local.size.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.local.size.y() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.local.size.z() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.y() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.tidig.z() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.global.size.x() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.global.size.y() #1
; Function Attrs: nounwind readnone
declare i32 @llvm.r600.read.global.size.z() #1
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!opencl.kernels = !{!0, !1, !2}
!0 = metadata !{null}
!1 = metadata !{null}
!2 = metadata !{void (i32 addrspace(1)*)* @fill3d}
!3 = metadata !{metadata !"int", metadata !4}
!4 = metadata !{metadata !"omnipotent char", metadata !5}
!5 = metadata !{metadata !"Simple C/C++ TBAA"}