R600: Fix JUMP handling so that MachineInstr verification can occur

This allows R600 Target to use the newly created -verify-misched llc flag

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176819 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Vincent Lejeune 2013-03-11 18:15:06 +00:00
parent 48f8015121
commit fd49dac48f
10 changed files with 553 additions and 45 deletions

View File

@ -2595,6 +2595,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
static int getBranchNzeroOpcode(int oldOpcode) {
switch(oldOpcode) {
case AMDGPU::JUMP_COND:
case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
case AMDGPU::BRANCH_COND_i32:
case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
@ -2606,6 +2607,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
static int getBranchZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
case AMDGPU::JUMP_COND:
case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
case AMDGPU::BRANCH_COND_i32:
case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
@ -2617,6 +2619,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
static int getContinueNzeroOpcode(int oldOpcode) {
switch(oldOpcode) {
case AMDGPU::JUMP_COND:
case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
default:
assert(0 && "internal error");
@ -2626,6 +2629,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
static int getContinueZeroOpcode(int oldOpcode) {
switch(oldOpcode) {
case AMDGPU::JUMP_COND:
case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
default:
assert(0 && "internal error");
@ -2654,8 +2658,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
static bool isCondBranch(MachineInstr *instr) {
switch (instr->getOpcode()) {
case AMDGPU::JUMP:
return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
case AMDGPU::JUMP_COND:
case AMDGPU::BRANCH_COND_i32:
case AMDGPU::BRANCH_COND_f32:
break;
@ -2668,7 +2671,6 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
static bool isUncondBranch(MachineInstr *instr) {
switch (instr->getOpcode()) {
case AMDGPU::JUMP:
return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
case AMDGPU::BRANCH:
return true;
default:

View File

@ -221,8 +221,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
case AMDGPU::BRANCH:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
.addOperand(MI->getOperand(0))
.addReg(0);
.addOperand(MI->getOperand(0));
break;
case AMDGPU::BRANCH_COND_f32: {
@ -233,7 +232,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
.addImm(OPCODE_IS_NOT_ZERO)
.addImm(0); // Flags
TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
.addOperand(MI->getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;
@ -247,7 +246,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
.addImm(OPCODE_IS_NOT_ZERO_INT)
.addImm(0); // Flags
TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
.addOperand(MI->getOperand(0))
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
break;

View File

@ -168,6 +168,11 @@ findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
return NULL;
}
static
bool isJump(unsigned Opcode) {
return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
}
bool
R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
@ -186,7 +191,7 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
return false;
--I;
}
if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
return false;
}
@ -196,22 +201,20 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// If there is only one terminator instruction, process it.
unsigned LastOpc = LastInst->getOpcode();
if (I == MBB.begin() ||
static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
!isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
if (LastOpc == AMDGPU::JUMP) {
if(!isPredicated(LastInst)) {
TBB = LastInst->getOperand(0).getMBB();
return false;
} else {
MachineInstr *predSet = I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
}
TBB = LastInst->getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
return false;
TBB = LastInst->getOperand(0).getMBB();
return false;
} else if (LastOpc == AMDGPU::JUMP_COND) {
MachineInstr *predSet = I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
}
TBB = LastInst->getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
return false;
}
return true; // Can't handle indirect branch.
}
@ -221,10 +224,7 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
unsigned SecondLastOpc = SecondLastInst->getOpcode();
// If the block ends with a B and a Bcc, handle it.
if (SecondLastOpc == AMDGPU::JUMP &&
isPredicated(SecondLastInst) &&
LastOpc == AMDGPU::JUMP &&
!isPredicated(LastInst)) {
if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
MachineInstr *predSet = --I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
@ -261,7 +261,7 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
if (FBB == 0) {
if (Cond.empty()) {
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
return 1;
} else {
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
@ -269,7 +269,7 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
addFlag(PredSet, 0, MO_FLAG_PUSH);
PredSet->getOperand(2).setImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(AMDGPU::JUMP))
BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
return 1;
@ -279,10 +279,10 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
assert(PredSet && "No previous predicate !");
addFlag(PredSet, 0, MO_FLAG_PUSH);
PredSet->getOperand(2).setImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(AMDGPU::JUMP))
BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
.addMBB(TBB)
.addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
return 2;
}
}
@ -302,11 +302,13 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
switch (I->getOpcode()) {
default:
return 0;
case AMDGPU::JUMP_COND: {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 0, MO_FLAG_PUSH);
I->eraseFromParent();
break;
}
case AMDGPU::JUMP:
if (isPredicated(I)) {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 0, MO_FLAG_PUSH);
}
I->eraseFromParent();
break;
}
@ -320,11 +322,13 @@ R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
// FIXME: only one case??
default:
return 1;
case AMDGPU::JUMP_COND: {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 0, MO_FLAG_PUSH);
I->eraseFromParent();
break;
}
case AMDGPU::JUMP:
if (isPredicated(I)) {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(predSet, 0, MO_FLAG_PUSH);
}
I->eraseFromParent();
break;
}

View File

@ -1587,19 +1587,28 @@ def PRED_X : InstR600 <
(ins R600_Reg32:$src0, i32imm:$src1, i32imm:$flags),
"", [], NullALU> {
let FlagOperandIdx = 3;
let isTerminator = 1;
}
let isTerminator = 1, isBranch = 1, isBarrier = 1 in {
def JUMP : InstR600 <0x10,
let isTerminator = 1, isBranch = 1 in {
def JUMP_COND : InstR600 <0x10,
(outs),
(ins brtarget:$target, R600_Pred:$p),
(ins brtarget:$target, R600_Predicate_Bit:$p),
"JUMP $target ($p)",
[], AnyALU
>;
} // End isTerminator = 1, isBranch = 1, isBarrier = 1
def JUMP : InstR600 <0x10,
(outs),
(ins brtarget:$target),
"JUMP $target",
[], AnyALU
>
{
let isPredicable = 1;
let isBarrier = 1;
}
} // End isTerminator = 1, isBranch = 1
let usesCustomInserter = 1 in {
@ -1639,7 +1648,7 @@ def FNEG_R600 : FNEG<R600_Reg32>;
//===---------------------------------------------------------------------===//
// Return instruction
//===---------------------------------------------------------------------===//
let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1,
let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
usesCustomInserter = 1 in {
def RETURN : ILFormat<(outs), (ins variable_ops),
"RETURN", [(IL_retflag)]>;

View File

@ -0,0 +1,82 @@
;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
define void @main() {
main_body:
%0 = call float @llvm.R600.interp.input(i32 0, i32 0)
%1 = call float @llvm.R600.interp.input(i32 1, i32 0)
%2 = call float @llvm.R600.interp.input(i32 2, i32 0)
%3 = call float @llvm.R600.interp.input(i32 3, i32 0)
%4 = fcmp ult float %1, 0.000000e+00
%5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
%6 = fsub float -0.000000e+00, %5
%7 = fptosi float %6 to i32
%8 = bitcast i32 %7 to float
%9 = fcmp ult float %0, 5.700000e+01
%10 = select i1 %9, float 1.000000e+00, float 0.000000e+00
%11 = fsub float -0.000000e+00, %10
%12 = fptosi float %11 to i32
%13 = bitcast i32 %12 to float
%14 = bitcast float %8 to i32
%15 = bitcast float %13 to i32
%16 = and i32 %14, %15
%17 = bitcast i32 %16 to float
%18 = bitcast float %17 to i32
%19 = icmp ne i32 %18, 0
%20 = fcmp ult float %0, 0.000000e+00
%21 = select i1 %20, float 1.000000e+00, float 0.000000e+00
%22 = fsub float -0.000000e+00, %21
%23 = fptosi float %22 to i32
%24 = bitcast i32 %23 to float
%25 = bitcast float %24 to i32
%26 = icmp ne i32 %25, 0
br i1 %19, label %IF, label %ELSE
IF: ; preds = %main_body
%. = select i1 %26, float 0.000000e+00, float 1.000000e+00
%.18 = select i1 %26, float 1.000000e+00, float 0.000000e+00
br label %ENDIF
ELSE: ; preds = %main_body
br i1 %26, label %ENDIF, label %ELSE17
ENDIF: ; preds = %ELSE17, %ELSE, %IF
%temp1.0 = phi float [ %., %IF ], [ %48, %ELSE17 ], [ 0.000000e+00, %ELSE ]
%temp2.0 = phi float [ 0.000000e+00, %IF ], [ %49, %ELSE17 ], [ 1.000000e+00, %ELSE ]
%temp.0 = phi float [ %.18, %IF ], [ %47, %ELSE17 ], [ 0.000000e+00, %ELSE ]
%27 = call float @llvm.AMDIL.clamp.(float %temp.0, float 0.000000e+00, float 1.000000e+00)
%28 = call float @llvm.AMDIL.clamp.(float %temp1.0, float 0.000000e+00, float 1.000000e+00)
%29 = call float @llvm.AMDIL.clamp.(float %temp2.0, float 0.000000e+00, float 1.000000e+00)
%30 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
%31 = insertelement <4 x float> undef, float %27, i32 0
%32 = insertelement <4 x float> %31, float %28, i32 1
%33 = insertelement <4 x float> %32, float %29, i32 2
%34 = insertelement <4 x float> %33, float %30, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %34, i32 0, i32 0)
ret void
ELSE17: ; preds = %ELSE
%35 = fadd float 0.000000e+00, 0x3FC99999A0000000
%36 = fadd float 0.000000e+00, 0x3FC99999A0000000
%37 = fadd float 0.000000e+00, 0x3FC99999A0000000
%38 = fadd float %35, 0x3FC99999A0000000
%39 = fadd float %36, 0x3FC99999A0000000
%40 = fadd float %37, 0x3FC99999A0000000
%41 = fadd float %38, 0x3FC99999A0000000
%42 = fadd float %39, 0x3FC99999A0000000
%43 = fadd float %40, 0x3FC99999A0000000
%44 = fadd float %41, 0x3FC99999A0000000
%45 = fadd float %42, 0x3FC99999A0000000
%46 = fadd float %43, 0x3FC99999A0000000
%47 = fadd float %44, 0x3FC99999A0000000
%48 = fadd float %45, 0x3FC99999A0000000
%49 = fadd float %46, 0x3FC99999A0000000
br label %ENDIF
}
declare float @llvm.R600.interp.input(i32, i32) #0
declare float @llvm.AMDIL.clamp.(float, float, float) #0
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { readnone }

View File

@ -0,0 +1,87 @@
;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
define void @main() {
main_body:
%0 = load <4 x float> addrspace(9)* null
%1 = extractelement <4 x float> %0, i32 3
%2 = fptosi float %1 to i32
%3 = bitcast i32 %2 to float
%4 = bitcast float %3 to i32
%5 = sdiv i32 %4, 4
%6 = bitcast i32 %5 to float
%7 = bitcast float %6 to i32
%8 = mul i32 %7, 4
%9 = bitcast i32 %8 to float
%10 = bitcast float %9 to i32
%11 = sub i32 0, %10
%12 = bitcast i32 %11 to float
%13 = bitcast float %3 to i32
%14 = bitcast float %12 to i32
%15 = add i32 %13, %14
%16 = bitcast i32 %15 to float
%17 = load <4 x float> addrspace(9)* null
%18 = extractelement <4 x float> %17, i32 0
%19 = load <4 x float> addrspace(9)* null
%20 = extractelement <4 x float> %19, i32 1
%21 = load <4 x float> addrspace(9)* null
%22 = extractelement <4 x float> %21, i32 2
br label %LOOP
LOOP: ; preds = %IF31, %main_body
%temp12.0 = phi float [ 0.000000e+00, %main_body ], [ %47, %IF31 ]
%temp6.0 = phi float [ %22, %main_body ], [ %temp6.1, %IF31 ]
%temp5.0 = phi float [ %20, %main_body ], [ %temp5.1, %IF31 ]
%temp4.0 = phi float [ %18, %main_body ], [ %temp4.1, %IF31 ]
%23 = bitcast float %temp12.0 to i32
%24 = bitcast float %6 to i32
%25 = icmp sge i32 %23, %24
%26 = sext i1 %25 to i32
%27 = bitcast i32 %26 to float
%28 = bitcast float %27 to i32
%29 = icmp ne i32 %28, 0
br i1 %29, label %IF, label %LOOP29
IF: ; preds = %LOOP
%30 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00)
%31 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00)
%32 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00)
%33 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
%34 = insertelement <4 x float> undef, float %30, i32 0
%35 = insertelement <4 x float> %34, float %31, i32 1
%36 = insertelement <4 x float> %35, float %32, i32 2
%37 = insertelement <4 x float> %36, float %33, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %37, i32 0, i32 0)
ret void
LOOP29: ; preds = %LOOP, %ENDIF30
%temp6.1 = phi float [ %temp4.1, %ENDIF30 ], [ %temp6.0, %LOOP ]
%temp5.1 = phi float [ %temp6.1, %ENDIF30 ], [ %temp5.0, %LOOP ]
%temp4.1 = phi float [ %temp5.1, %ENDIF30 ], [ %temp4.0, %LOOP ]
%temp20.0 = phi float [ %50, %ENDIF30 ], [ 0.000000e+00, %LOOP ]
%38 = bitcast float %temp20.0 to i32
%39 = bitcast float %16 to i32
%40 = icmp sge i32 %38, %39
%41 = sext i1 %40 to i32
%42 = bitcast i32 %41 to float
%43 = bitcast float %42 to i32
%44 = icmp ne i32 %43, 0
br i1 %44, label %IF31, label %ENDIF30
IF31: ; preds = %LOOP29
%45 = bitcast float %temp12.0 to i32
%46 = add i32 %45, 1
%47 = bitcast i32 %46 to float
br label %LOOP
ENDIF30: ; preds = %LOOP29
%48 = bitcast float %temp20.0 to i32
%49 = add i32 %48, 1
%50 = bitcast i32 %49 to float
br label %LOOP29
}
declare float @llvm.AMDIL.clamp.(float, float, float) #0
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { readnone }

View File

@ -0,0 +1,54 @@
;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
define void @main() {
main_body:
%0 = load <4 x float> addrspace(9)* null
%1 = extractelement <4 x float> %0, i32 3
%2 = fptosi float %1 to i32
%3 = bitcast i32 %2 to float
%4 = load <4 x float> addrspace(9)* null
%5 = extractelement <4 x float> %4, i32 0
%6 = load <4 x float> addrspace(9)* null
%7 = extractelement <4 x float> %6, i32 1
%8 = load <4 x float> addrspace(9)* null
%9 = extractelement <4 x float> %8, i32 2
br label %LOOP
LOOP: ; preds = %ENDIF, %main_body
%temp4.0 = phi float [ %5, %main_body ], [ %temp5.0, %ENDIF ]
%temp5.0 = phi float [ %7, %main_body ], [ %temp6.0, %ENDIF ]
%temp6.0 = phi float [ %9, %main_body ], [ %temp4.0, %ENDIF ]
%temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %27, %ENDIF ]
%10 = bitcast float %temp8.0 to i32
%11 = bitcast float %3 to i32
%12 = icmp sge i32 %10, %11
%13 = sext i1 %12 to i32
%14 = bitcast i32 %13 to float
%15 = bitcast float %14 to i32
%16 = icmp ne i32 %15, 0
br i1 %16, label %IF, label %ENDIF
IF: ; preds = %LOOP
%17 = call float @llvm.AMDIL.clamp.(float %temp4.0, float 0.000000e+00, float 1.000000e+00)
%18 = call float @llvm.AMDIL.clamp.(float %temp5.0, float 0.000000e+00, float 1.000000e+00)
%19 = call float @llvm.AMDIL.clamp.(float %temp6.0, float 0.000000e+00, float 1.000000e+00)
%20 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
%21 = insertelement <4 x float> undef, float %17, i32 0
%22 = insertelement <4 x float> %21, float %18, i32 1
%23 = insertelement <4 x float> %22, float %19, i32 2
%24 = insertelement <4 x float> %23, float %20, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %24, i32 0, i32 0)
ret void
ENDIF: ; preds = %LOOP
%25 = bitcast float %temp8.0 to i32
%26 = add i32 %25, 1
%27 = bitcast i32 %26 to float
br label %LOOP
}
declare float @llvm.AMDIL.clamp.(float, float, float) #0
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { readnone }

View File

@ -0,0 +1,93 @@
;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
define void @main() {
main_body:
%0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
%1 = extractelement <4 x float> %0, i32 0
%2 = fadd float 1.000000e+03, %1
%3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%4 = extractelement <4 x float> %3, i32 0
%5 = bitcast float %4 to i32
%6 = icmp eq i32 %5, 0
%7 = sext i1 %6 to i32
%8 = bitcast i32 %7 to float
%9 = bitcast float %8 to i32
%10 = icmp ne i32 %9, 0
br i1 %10, label %IF, label %ELSE
IF: ; preds = %main_body
%11 = call float @fabs(float %2)
%12 = fcmp ueq float %11, 0x7FF0000000000000
%13 = select i1 %12, float 1.000000e+00, float 0.000000e+00
%14 = fsub float -0.000000e+00, %13
%15 = fptosi float %14 to i32
%16 = bitcast i32 %15 to float
%17 = bitcast float %16 to i32
%18 = icmp ne i32 %17, 0
%. = select i1 %18, float 0x36A0000000000000, float 0.000000e+00
%19 = fcmp une float %2, %2
%20 = select i1 %19, float 1.000000e+00, float 0.000000e+00
%21 = fsub float -0.000000e+00, %20
%22 = fptosi float %21 to i32
%23 = bitcast i32 %22 to float
%24 = bitcast float %23 to i32
%25 = icmp ne i32 %24, 0
%temp8.0 = select i1 %25, float 0x36A0000000000000, float 0.000000e+00
%26 = bitcast float %. to i32
%27 = sitofp i32 %26 to float
%28 = bitcast float %temp8.0 to i32
%29 = sitofp i32 %28 to float
%30 = fcmp ugt float %2, 0.000000e+00
%31 = select i1 %30, float 1.000000e+00, float %2
%32 = fcmp uge float %31, 0.000000e+00
%33 = select i1 %32, float %31, float -1.000000e+00
%34 = fadd float %33, 1.000000e+00
%35 = fmul float %34, 5.000000e-01
br label %ENDIF
ELSE: ; preds = %main_body
%36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%37 = extractelement <4 x float> %36, i32 0
%38 = bitcast float %37 to i32
%39 = icmp eq i32 %38, 1
%40 = sext i1 %39 to i32
%41 = bitcast i32 %40 to float
%42 = bitcast float %41 to i32
%43 = icmp ne i32 %42, 0
br i1 %43, label %IF23, label %ENDIF
ENDIF: ; preds = %IF23, %ELSE, %IF
%temp4.0 = phi float [ %2, %IF ], [ %56, %IF23 ], [ 0.000000e+00, %ELSE ]
%temp5.0 = phi float [ %27, %IF ], [ %60, %IF23 ], [ 0.000000e+00, %ELSE ]
%temp6.0 = phi float [ %29, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ]
%temp7.0 = phi float [ %35, %IF ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF23 ]
%44 = insertelement <4 x float> undef, float %temp4.0, i32 0
%45 = insertelement <4 x float> %44, float %temp5.0, i32 1
%46 = insertelement <4 x float> %45, float %temp6.0, i32 2
%47 = insertelement <4 x float> %46, float %temp7.0, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %47, i32 0, i32 0)
ret void
IF23: ; preds = %ELSE
%48 = fcmp ult float 0.000000e+00, %2
%49 = select i1 %48, float 1.000000e+00, float 0.000000e+00
%50 = fsub float -0.000000e+00, %49
%51 = fptosi float %50 to i32
%52 = bitcast i32 %51 to float
%53 = bitcast float %52 to i32
%54 = icmp ne i32 %53, 0
%.28 = select i1 %54, float 0x36A0000000000000, float 0.000000e+00
%55 = bitcast float %.28 to i32
%56 = sitofp i32 %55 to float
%57 = load <4 x float> addrspace(8)* null
%58 = extractelement <4 x float> %57, i32 0
%59 = fsub float -0.000000e+00, %58
%60 = fadd float %2, %59
br label %ENDIF
}
declare float @fabs(float) #0
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { readonly }

View File

@ -0,0 +1,45 @@
;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
define void @main() {
main_body:
%0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%1 = extractelement <4 x float> %0, i32 0
%2 = bitcast float %1 to i32
%3 = icmp eq i32 %2, 0
%4 = sext i1 %3 to i32
%5 = bitcast i32 %4 to float
%6 = bitcast float %5 to i32
%7 = icmp ne i32 %6, 0
br i1 %7, label %ENDIF, label %ELSE
ELSE: ; preds = %main_body
%8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%9 = extractelement <4 x float> %8, i32 0
%10 = bitcast float %9 to i32
%11 = icmp eq i32 %10, 1
%12 = sext i1 %11 to i32
%13 = bitcast i32 %12 to float
%14 = bitcast float %13 to i32
%15 = icmp ne i32 %14, 0
br i1 %15, label %IF13, label %ENDIF
ENDIF: ; preds = %IF13, %ELSE, %main_body
%temp.0 = phi float [ 1.000000e+03, %main_body ], [ 1.000000e+00, %IF13 ], [ 0.000000e+00, %ELSE ]
%temp1.0 = phi float [ 0.000000e+00, %main_body ], [ %23, %IF13 ], [ 0.000000e+00, %ELSE ]
%temp3.0 = phi float [ 1.000000e+00, %main_body ], [ 0.000000e+00, %ELSE ], [ 0.000000e+00, %IF13 ]
%16 = insertelement <4 x float> undef, float %temp.0, i32 0
%17 = insertelement <4 x float> %16, float %temp1.0, i32 1
%18 = insertelement <4 x float> %17, float 0.000000e+00, i32 2
%19 = insertelement <4 x float> %18, float %temp3.0, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0)
ret void
IF13: ; preds = %ELSE
%20 = load <4 x float> addrspace(8)* null
%21 = extractelement <4 x float> %20, i32 0
%22 = fsub float -0.000000e+00, %21
%23 = fadd float 1.000000e+03, %22
br label %ENDIF
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)

View File

@ -0,0 +1,133 @@
;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
define void @main() {
main_body:
%0 = call float @llvm.R600.load.input(i32 4)
%1 = call float @llvm.R600.load.input(i32 5)
%2 = call float @llvm.R600.load.input(i32 6)
%3 = call float @llvm.R600.load.input(i32 7)
%4 = fcmp ult float %0, 0.000000e+00
%5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
%6 = fsub float -0.000000e+00, %5
%7 = fptosi float %6 to i32
%8 = bitcast i32 %7 to float
%9 = bitcast float %8 to i32
%10 = icmp ne i32 %9, 0
br i1 %10, label %LOOP, label %ENDIF
ENDIF: ; preds = %ENDIF16, %LOOP, %main_body
%temp.0 = phi float [ 0.000000e+00, %main_body ], [ %temp.1, %LOOP ], [ %temp.1, %ENDIF16 ]
%temp1.0 = phi float [ 1.000000e+00, %main_body ], [ %temp1.1, %LOOP ], [ %temp1.1, %ENDIF16 ]
%temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %temp2.1, %LOOP ], [ %temp2.1, %ENDIF16 ]
%temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %temp3.1, %LOOP ], [ %temp3.1, %ENDIF16 ]
%11 = load <4 x float> addrspace(9)* null
%12 = extractelement <4 x float> %11, i32 0
%13 = fmul float %12, %0
%14 = load <4 x float> addrspace(9)* null
%15 = extractelement <4 x float> %14, i32 1
%16 = fmul float %15, %0
%17 = load <4 x float> addrspace(9)* null
%18 = extractelement <4 x float> %17, i32 2
%19 = fmul float %18, %0
%20 = load <4 x float> addrspace(9)* null
%21 = extractelement <4 x float> %20, i32 3
%22 = fmul float %21, %0
%23 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%24 = extractelement <4 x float> %23, i32 0
%25 = fmul float %24, %1
%26 = fadd float %25, %13
%27 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%28 = extractelement <4 x float> %27, i32 1
%29 = fmul float %28, %1
%30 = fadd float %29, %16
%31 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%32 = extractelement <4 x float> %31, i32 2
%33 = fmul float %32, %1
%34 = fadd float %33, %19
%35 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
%36 = extractelement <4 x float> %35, i32 3
%37 = fmul float %36, %1
%38 = fadd float %37, %22
%39 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%40 = extractelement <4 x float> %39, i32 0
%41 = fmul float %40, %2
%42 = fadd float %41, %26
%43 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%44 = extractelement <4 x float> %43, i32 1
%45 = fmul float %44, %2
%46 = fadd float %45, %30
%47 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%48 = extractelement <4 x float> %47, i32 2
%49 = fmul float %48, %2
%50 = fadd float %49, %34
%51 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
%52 = extractelement <4 x float> %51, i32 3
%53 = fmul float %52, %2
%54 = fadd float %53, %38
%55 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%56 = extractelement <4 x float> %55, i32 0
%57 = fmul float %56, %3
%58 = fadd float %57, %42
%59 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%60 = extractelement <4 x float> %59, i32 1
%61 = fmul float %60, %3
%62 = fadd float %61, %46
%63 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%64 = extractelement <4 x float> %63, i32 2
%65 = fmul float %64, %3
%66 = fadd float %65, %50
%67 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
%68 = extractelement <4 x float> %67, i32 3
%69 = fmul float %68, %3
%70 = fadd float %69, %54
%71 = insertelement <4 x float> undef, float %58, i32 0
%72 = insertelement <4 x float> %71, float %62, i32 1
%73 = insertelement <4 x float> %72, float %66, i32 2
%74 = insertelement <4 x float> %73, float %70, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %74, i32 60, i32 1)
%75 = insertelement <4 x float> undef, float %temp.0, i32 0
%76 = insertelement <4 x float> %75, float %temp1.0, i32 1
%77 = insertelement <4 x float> %76, float %temp2.0, i32 2
%78 = insertelement <4 x float> %77, float %temp3.0, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %78, i32 0, i32 2)
ret void
LOOP: ; preds = %main_body, %ENDIF19
%temp.1 = phi float [ %93, %ENDIF19 ], [ 0.000000e+00, %main_body ]
%temp1.1 = phi float [ %94, %ENDIF19 ], [ 1.000000e+00, %main_body ]
%temp2.1 = phi float [ %95, %ENDIF19 ], [ 0.000000e+00, %main_body ]
%temp3.1 = phi float [ %96, %ENDIF19 ], [ 0.000000e+00, %main_body ]
%temp4.0 = phi float [ %97, %ENDIF19 ], [ -2.000000e+00, %main_body ]
%79 = fcmp uge float %temp4.0, %0
%80 = select i1 %79, float 1.000000e+00, float 0.000000e+00
%81 = fsub float -0.000000e+00, %80
%82 = fptosi float %81 to i32
%83 = bitcast i32 %82 to float
%84 = bitcast float %83 to i32
%85 = icmp ne i32 %84, 0
br i1 %85, label %ENDIF, label %ENDIF16
ENDIF16: ; preds = %LOOP
%86 = fcmp une float %2, %temp4.0
%87 = select i1 %86, float 1.000000e+00, float 0.000000e+00
%88 = fsub float -0.000000e+00, %87
%89 = fptosi float %88 to i32
%90 = bitcast i32 %89 to float
%91 = bitcast float %90 to i32
%92 = icmp ne i32 %91, 0
br i1 %92, label %ENDIF, label %ENDIF19
ENDIF19: ; preds = %ENDIF16
%93 = fadd float %temp.1, 1.000000e+00
%94 = fadd float %temp1.1, 0.000000e+00
%95 = fadd float %temp2.1, 0.000000e+00
%96 = fadd float %temp3.1, 0.000000e+00
%97 = fadd float %temp4.0, 1.000000e+00
br label %LOOP
}
declare float @llvm.R600.load.input(i32) #0
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { readnone }