diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index cfaa36e1227..235013088b2 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -67,6 +67,13 @@ private: case AMDGPU::TEX_SAMPLE_C_G: case AMDGPU::TXD: case AMDGPU::TXD_SHADOW: + case AMDGPU::VTX_READ_GLOBAL_8_eg: + case AMDGPU::VTX_READ_GLOBAL_32_eg: + case AMDGPU::VTX_READ_GLOBAL_128_eg: + case AMDGPU::VTX_READ_PARAM_8_eg: + case AMDGPU::VTX_READ_PARAM_16_eg: + case AMDGPU::VTX_READ_PARAM_32_eg: + case AMDGPU::VTX_READ_PARAM_128_eg: return true; default: return false; @@ -207,6 +214,8 @@ public: case AMDGPU::EG_ExportSwz: case AMDGPU::R600_ExportBuf: case AMDGPU::R600_ExportSwz: + case AMDGPU::RAT_WRITE_CACHELESS_32_eg: + case AMDGPU::RAT_WRITE_CACHELESS_128_eg: DEBUG(dbgs() << CfCount << ":"; MI->dump();); CfCount++; break; @@ -215,7 +224,7 @@ public: MaxStack = std::max(MaxStack, CurrentStack); MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_WHILE_LOOP)) - .addImm(2); + .addImm(1); std::pair > Pair(CfCount, std::set()); Pair.second.insert(MIb); diff --git a/test/CodeGen/R600/loop-adress.ll b/test/CodeGen/R600/loop-adress.ll new file mode 100644 index 00000000000..dc9295e8e77 --- /dev/null +++ b/test/CodeGen/R600/loop-adress.ll @@ -0,0 +1,44 @@ +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s + +;CHECK: TEX +;CHECK: ALU_PUSH +;CHECK: JUMP @4 +;CHECK: ELSE @16 +;CHECK: TEX +;CHECK: LOOP_START_DX10 @15 +;CHECK: LOOP_BREAK @14 +;CHECK: POP @16 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64" +target triple = "r600--" + +define void @loop_ge(i32 addrspace(1)* nocapture %out, i32 %iterations) #0 { +entry: + %cmp5 = icmp sgt i32 %iterations, 0 + br i1 %cmp5, label %for.body, label %for.end + +for.body: ; preds = %for.body, %entry + %i.07.in = phi i32 [ %i.07, %for.body ], [ %iterations, %entry ] + %ai.06 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %i.07 = add nsw i32 %i.07.in, -1 + %arrayidx = getelementptr inbounds i32 addrspace(1)* %out, i32 %ai.06 + store i32 %i.07, i32 addrspace(1)* %arrayidx, align 4, !tbaa !4 + %add = add nsw i32 %ai.06, 1 + %exitcond = icmp eq i32 %add, %iterations + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +attributes #0 = { nounwind "fp-contract-model"="standard" "relocation-model"="pic" "ssp-buffers-size"="8" } + +!opencl.kernels = !{!0, !1, !2, !3} + +!0 = metadata !{void (i32 addrspace(1)*, i32)* @loop_ge} +!1 = metadata !{null} +!2 = metadata !{null} +!3 = metadata !{null} +!4 = metadata !{metadata !"int", metadata !5} +!5 = metadata !{metadata !"omnipotent char", metadata !6} +!6 = metadata !{metadata !"Simple C/C++ TBAA"}