llvm-6502/test/CodeGen/NVPTX/nounroll.ll
Jingyue Wu f15b696b79 [NVPTX] Emit .pragma "nounroll" for loops marked with nounroll
Summary:
CUDA driver can unroll loops when jit-compiling PTX. To prevent CUDA
driver from unrolling a loop marked with llvm.loop.unroll.disable is not
unrolled by CUDA driver, we need to emit .pragma "nounroll" at the
header of that loop.

This patch also extracts getting unroll metadata from loop ID metadata
into a shared helper function.

Test Plan: test/CodeGen/NVPTX/nounroll.ll

Reviewers: eliben, meheff, jholewinski

Reviewed By: jholewinski

Subscribers: jholewinski, llvm-commits

Differential Revision: http://reviews.llvm.org/D7041

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227703 91177308-0d34-0410-b5e6-96231b3b80d8
2015-02-01 02:27:45 +00:00

38 lines
1.0 KiB
LLVM

; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-unknown-unknown"
; Compiled from the following CUDA code:
;
; #pragma nounroll
; for (int i = 0; i < 2; ++i)
; output[i] = input[i];
define void @nounroll(float* %input, float* %output) {
; CHECK-LABEL: .visible .func nounroll(
entry:
br label %for.body
for.body:
; CHECK: .pragma "nounroll"
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%idxprom = sext i32 %i.06 to i64
%arrayidx = getelementptr inbounds float* %input, i64 %idxprom
%0 = load float* %arrayidx, align 4
; CHECK: ld.f32
%arrayidx2 = getelementptr inbounds float* %output, i64 %idxprom
store float %0, float* %arrayidx2, align 4
; CHECK: st.f32
%inc = add nuw nsw i32 %i.06, 1
%exitcond = icmp eq i32 %inc, 2
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
; CHECK-NOT: ld.f32
; CHECK-NOT: st.f32
for.end:
ret void
}
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.unroll.disable"}