R600: Add CF_END

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180123 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Vincent Lejeune 2013-04-23 17:34:00 +00:00
parent a7d9a6ee63
commit 7a28d8afa7
6 changed files with 80 additions and 47 deletions

View File

@ -281,7 +281,11 @@ void R600MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case AMDGPU::EG_ExportSwz: case AMDGPU::EG_ExportSwz:
case AMDGPU::R600_ExportSwz: case AMDGPU::R600_ExportSwz:
case AMDGPU::EG_ExportBuf: case AMDGPU::EG_ExportBuf:
case AMDGPU::R600_ExportBuf: { case AMDGPU::R600_ExportBuf:
case AMDGPU::PAD:
case AMDGPU::CF_END_R600:
case AMDGPU::CF_END_EG:
case AMDGPU::CF_END_CM: {
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups); uint64_t Inst = getBinaryCodeForInstr(MI, Fixups);
EmitByte(INSTR_NATIVE, OS); EmitByte(INSTR_NATIVE, OS);
Emit(Inst, OS); Emit(Inst, OS);

View File

@ -39,7 +39,8 @@ private:
CF_LOOP_CONTINUE, CF_LOOP_CONTINUE,
CF_JUMP, CF_JUMP,
CF_ELSE, CF_ELSE,
CF_POP CF_POP,
CF_END
}; };
static char ID; static char ID;
@ -91,49 +92,46 @@ private:
} }
const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD4XXX) { unsigned Opcode = 0;
switch (CFI) { bool isEg = (ST.device()->getGeneration() >= AMDGPUDeviceInfo::HD5XXX);
case CF_TC: switch (CFI) {
return TII->get(AMDGPU::CF_TC_R600); case CF_TC:
case CF_CALL_FS: Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
return TII->get(AMDGPU::CF_CALL_FS_R600); break;
case CF_WHILE_LOOP: case CF_CALL_FS:
return TII->get(AMDGPU::WHILE_LOOP_R600); Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
case CF_END_LOOP: break;
return TII->get(AMDGPU::END_LOOP_R600); case CF_WHILE_LOOP:
case CF_LOOP_BREAK: Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
return TII->get(AMDGPU::LOOP_BREAK_R600); break;
case CF_LOOP_CONTINUE: case CF_END_LOOP:
return TII->get(AMDGPU::CF_CONTINUE_R600); Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
case CF_JUMP: break;
return TII->get(AMDGPU::CF_JUMP_R600); case CF_LOOP_BREAK:
case CF_ELSE: Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
return TII->get(AMDGPU::CF_ELSE_R600); break;
case CF_POP: case CF_LOOP_CONTINUE:
return TII->get(AMDGPU::POP_R600); Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
} break;
} else { case CF_JUMP:
switch (CFI) { Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
case CF_TC: break;
return TII->get(AMDGPU::CF_TC_EG); case CF_ELSE:
case CF_CALL_FS: Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
return TII->get(AMDGPU::CF_CALL_FS_EG); break;
case CF_WHILE_LOOP: case CF_POP:
return TII->get(AMDGPU::WHILE_LOOP_EG); Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
case CF_END_LOOP: break;
return TII->get(AMDGPU::END_LOOP_EG); case CF_END:
case CF_LOOP_BREAK: if (ST.device()->getGeneration() == AMDGPUDeviceInfo::HD6XXX) {
return TII->get(AMDGPU::LOOP_BREAK_EG); Opcode = AMDGPU::CF_END_CM;
case CF_LOOP_CONTINUE: break;
return TII->get(AMDGPU::CF_CONTINUE_EG);
case CF_JUMP:
return TII->get(AMDGPU::CF_JUMP_EG);
case CF_ELSE:
return TII->get(AMDGPU::CF_ELSE_EG);
case CF_POP:
return TII->get(AMDGPU::POP_EG);
} }
Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
break;
} }
assert (Opcode && "No opcode selected");
return TII->get(Opcode);
} }
MachineBasicBlock::iterator MachineBasicBlock::iterator
@ -310,6 +308,15 @@ public:
CfCount++; CfCount++;
break; break;
} }
case AMDGPU::RETURN: {
BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
CfCount++;
MI->eraseFromParent();
if (CfCount % 2) {
BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
CfCount++;
}
}
default: default:
break; break;
} }

View File

@ -897,6 +897,7 @@ class CF_WORD1_EG {
bits<2> COND; bits<2> COND;
bits<6> COUNT; bits<6> COUNT;
bits<1> VALID_PIXEL_MODE; bits<1> VALID_PIXEL_MODE;
bits<1> END_OF_PROGRAM;
bits<8> CF_INST; bits<8> CF_INST;
bits<1> BARRIER; bits<1> BARRIER;
@ -919,6 +920,7 @@ ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
let CF_CONST = 0; let CF_CONST = 0;
let VALID_PIXEL_MODE = 0; let VALID_PIXEL_MODE = 0;
let COND = 0; let COND = 0;
let END_OF_PROGRAM = 0;
let Inst{31-0} = Word0; let Inst{31-0} = Word0;
let Inst{63-32} = Word1; let Inst{63-32} = Word1;
@ -934,6 +936,10 @@ def STACK_SIZE : AMDGPUInst <(outs),
let Inst = num; let Inst = num;
} }
def PAD : AMDGPUInst <(outs), (ins), "PAD", [] > {
field bits<64> Inst;
}
let Predicates = [isR600toCayman] in { let Predicates = [isR600toCayman] in {
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -1486,6 +1492,12 @@ let Predicates = [isR600] in {
"POP @$ADDR POP:$POP_COUNT"> { "POP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0; let COUNT = 0;
} }
def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
let COUNT = 0;
let POP_COUNT = 0;
let ADDR = 0;
let END_OF_PROGRAM = 1;
}
} }
@ -1690,7 +1702,12 @@ let hasSideEffects = 1 in {
"POP @$ADDR POP:$POP_COUNT"> { "POP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0; let COUNT = 0;
} }
def CF_END_EG : CF_CLAUSE_EG<0, (ins), "CF_END"> {
let COUNT = 0;
let POP_COUNT = 0;
let ADDR = 0;
let END_OF_PROGRAM = 1;
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Memory read/write instructions // Memory read/write instructions
@ -1935,6 +1952,11 @@ def : Pat <
(MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1))) (MOV_IMM_I32 CONST.FP_UINT_MAX_PLUS_1)))
>; >;
def CF_END_CM : CF_CLAUSE_EG<32, (ins), "CF_END"> {
let ADDR = 0;
let POP_COUNT = 0;
let COUNT = 0;
}
def : Pat<(fsqrt R600_Reg32:$src), def : Pat<(fsqrt R600_Reg32:$src),
(MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>; (MUL R600_Reg32:$src, (RECIPSQRT_CLAMPED_cm R600_Reg32:$src))>;

View File

@ -9,7 +9,7 @@
; This was fixed by adding an additional pattern in R600Instructions.td to ; This was fixed by adding an additional pattern in R600Instructions.td to
; match this pattern with a CNDGE_INT. ; match this pattern with a CNDGE_INT.
; CHECK: RETURN ; CHECK: CF_END
define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%den_ptr = getelementptr i32 addrspace(1)* %in, i32 1 %den_ptr = getelementptr i32 addrspace(1)* %in, i32 1

View File

@ -3,7 +3,7 @@
;The code generated by udiv is long and complex and may frequently change. ;The code generated by udiv is long and complex and may frequently change.
;The goal of this test is to make sure the ISel doesn't fail when it gets ;The goal of this test is to make sure the ISel doesn't fail when it gets
;a v4i32 udiv ;a v4i32 udiv
;CHECK: RETURN ;CHECK: CF_END
define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1

View File

@ -3,7 +3,7 @@
;The code generated by urem is long and complex and may frequently change. ;The code generated by urem is long and complex and may frequently change.
;The goal of this test is to make sure the ISel doesn't fail when it gets ;The goal of this test is to make sure the ISel doesn't fail when it gets
;a v4i32 urem ;a v4i32 urem
;CHECK: RETURN ;CHECK: CF_END
define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { define void @test(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1 %b_ptr = getelementptr <4 x i32> addrspace(1)* %in, i32 1