#include #define N (1024 * 512) #define COUNTERTYPE unsigned int int main (void) { unsigned int *__restrict a; unsigned int *__restrict b; unsigned int *__restrict c; a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); /* Parallelism dimensions: compiler/runtime decides. */ #pragma acc kernels copyout (a[0:N]) { for (COUNTERTYPE i = 0; i < N; i++) a[i] = i * 2; } /* Parallelism dimensions: variable. */ #pragma acc kernels copyout (b[0:N]) \ num_gangs (3 + a[3]) num_workers (5 + a[5]) vector_length (7 + a[7]) /* { dg-prune-output "using vector_length \\(32\\), ignoring runtime setting" } */ { for (COUNTERTYPE i = 0; i < N; i++) b[i] = i * 4; } /* Parallelism dimensions: literal. */ #pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) \ num_gangs (3) num_workers (5) vector_length (7) /* { dg-prune-output "using vector_length \\(32\\), ignoring 7" } */ { for (COUNTERTYPE ii = 0; ii < N; ii++) c[ii] = a[ii] + b[ii]; } for (COUNTERTYPE i = 0; i < N; i++) { if (a[i] != i * 2) abort (); if (b[i] != i * 4) abort (); if (c[i] != a[i] + b[i]) abort (); } free (a); free (b); free (c); return 0; }