mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-18 10:31:57 +00:00
f15b696b79
Summary: CUDA driver can unroll loops when jit-compiling PTX. To prevent CUDA driver from unrolling a loop marked with llvm.loop.unroll.disable is not unrolled by CUDA driver, we need to emit .pragma "nounroll" at the header of that loop. This patch also extracts getting unroll metadata from loop ID metadata into a shared helper function. Test Plan: test/CodeGen/NVPTX/nounroll.ll Reviewers: eliben, meheff, jholewinski Reviewed By: jholewinski Subscribers: jholewinski, llvm-commits Differential Revision: http://reviews.llvm.org/D7041 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227703 91177308-0d34-0410-b5e6-96231b3b80d8
38 lines
1.0 KiB
LLVM
38 lines
1.0 KiB
LLVM
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
|
|
|
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
|
|
target triple = "nvptx64-unknown-unknown"
|
|
|
|
; Compiled from the following CUDA code:
|
|
;
|
|
; #pragma nounroll
|
|
; for (int i = 0; i < 2; ++i)
|
|
; output[i] = input[i];
|
|
define void @nounroll(float* %input, float* %output) {
|
|
; CHECK-LABEL: .visible .func nounroll(
|
|
entry:
|
|
br label %for.body
|
|
|
|
for.body:
|
|
; CHECK: .pragma "nounroll"
|
|
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
|
%idxprom = sext i32 %i.06 to i64
|
|
%arrayidx = getelementptr inbounds float* %input, i64 %idxprom
|
|
%0 = load float* %arrayidx, align 4
|
|
; CHECK: ld.f32
|
|
%arrayidx2 = getelementptr inbounds float* %output, i64 %idxprom
|
|
store float %0, float* %arrayidx2, align 4
|
|
; CHECK: st.f32
|
|
%inc = add nuw nsw i32 %i.06, 1
|
|
%exitcond = icmp eq i32 %inc, 2
|
|
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
|
|
; CHECK-NOT: ld.f32
|
|
; CHECK-NOT: st.f32
|
|
|
|
for.end:
|
|
ret void
|
|
}
|
|
|
|
!0 = distinct !{!0, !1}
|
|
!1 = !{!"llvm.loop.unroll.disable"}
|