mirror of
https://github.com/autc04/Retro68.git
synced 2024-11-28 21:49:33 +00:00
184 lines
3.7 KiB
C
184 lines
3.7 KiB
C
/* { dg-require-effective-target size32plus } */
|
|
/* { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } */
|
|
/* { dg-additional-options "-msse2" { target sse2_runtime } } */
|
|
/* { dg-additional-options "-mavx" { target avx_runtime } } */
|
|
/* { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { target sse2_runtime } } } */
|
|
|
|
extern void abort (void);
|
|
int r, a[1024], b[1024];
|
|
unsigned short r2, b2[1024];
|
|
unsigned char r3, b3[1024];
|
|
|
|
__attribute__((noipa)) void
|
|
foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
|
|
{
|
|
#pragma omp for simd reduction (inscan, +:r, r2, r3) if (simd:0)
|
|
for (int i = 0; i < 1024; i++)
|
|
{
|
|
{ r += a[i]; r2 += a[i]; r3 += a[i]; }
|
|
#pragma omp scan inclusive(r, r2, r3)
|
|
{
|
|
b[i] = r;
|
|
b2[i] = r2;
|
|
b3[i] = r3;
|
|
}
|
|
}
|
|
}
|
|
|
|
__attribute__((noipa)) int
|
|
bar (unsigned short *s2p, unsigned char *s3p)
|
|
{
|
|
int s = 0;
|
|
unsigned short s2 = 0;
|
|
unsigned char s3 = 0;
|
|
#pragma omp parallel
|
|
#pragma omp for simd reduction (inscan, +:s, s2, s3) simdlen (1)
|
|
for (int i = 0; i < 1024; i++)
|
|
{
|
|
{
|
|
s += 2 * a[i];
|
|
s2 += 2 * a[i];
|
|
s3 += 2 * a[i];
|
|
}
|
|
#pragma omp scan inclusive(s, s2, s3)
|
|
{ b[i] = s; b2[i] = s2; b3[i] = s3; }
|
|
}
|
|
*s2p = s2;
|
|
*s3p = s3;
|
|
return s;
|
|
}
|
|
|
|
__attribute__((noipa)) void
|
|
baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
|
|
{
|
|
#pragma omp parallel for simd reduction (inscan, +:r, r2, r3)
|
|
for (int i = 0; i < 1024; i++)
|
|
{
|
|
{
|
|
r += a[i];
|
|
r2 += a[i];
|
|
r3 += a[i];
|
|
}
|
|
#pragma omp scan inclusive(r, r2, r3)
|
|
{
|
|
b[i] = r;
|
|
b2[i] = r2;
|
|
b3[i] = r3;
|
|
}
|
|
}
|
|
}
|
|
|
|
__attribute__((noipa)) int
|
|
qux (unsigned short *s2p, unsigned char *s3p)
|
|
{
|
|
int s = 0;
|
|
unsigned short s2 = 0;
|
|
unsigned char s3 = 0;
|
|
#pragma omp parallel for simd reduction (inscan, +:s, s2, s3)
|
|
for (int i = 0; i < 1024; i++)
|
|
{
|
|
{ s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
|
|
#pragma omp scan inclusive(s, s2, s3)
|
|
{ b[i] = s; b2[i] = s2; b3[i] = s3; }
|
|
}
|
|
*s2p = s2;
|
|
*s3p = s3;
|
|
return s;
|
|
}
|
|
|
|
int
|
|
main ()
|
|
{
|
|
int s = 0;
|
|
unsigned short s2;
|
|
unsigned char s3;
|
|
for (int i = 0; i < 1024; ++i)
|
|
{
|
|
a[i] = i;
|
|
b[i] = -1;
|
|
b2[i] = -1;
|
|
b3[i] = -1;
|
|
asm ("" : "+g" (i));
|
|
}
|
|
#pragma omp parallel
|
|
foo (a, b, b2, b3);
|
|
if (r != 1024 * 1023 / 2
|
|
|| r2 != (unsigned short) r
|
|
|| r3 != (unsigned char) r)
|
|
abort ();
|
|
for (int i = 0; i < 1024; ++i)
|
|
{
|
|
s += i;
|
|
if (b[i] != s
|
|
|| b2[i] != (unsigned short) s
|
|
|| b3[i] != (unsigned char) s)
|
|
abort ();
|
|
else
|
|
{
|
|
b[i] = 25;
|
|
b2[i] = 24;
|
|
b3[i] = 26;
|
|
}
|
|
}
|
|
if (bar (&s2, &s3) != 1024 * 1023)
|
|
abort ();
|
|
if (s2 != (unsigned short) (1024 * 1023)
|
|
|| s3 != (unsigned char) (1024 * 1023))
|
|
abort ();
|
|
s = 0;
|
|
for (int i = 0; i < 1024; ++i)
|
|
{
|
|
s += 2 * i;
|
|
if (b[i] != s
|
|
|| b2[i] != (unsigned short) s
|
|
|| b3[i] != (unsigned char) s)
|
|
abort ();
|
|
else
|
|
{
|
|
b[i] = -1;
|
|
b2[i] = -1;
|
|
b3[i] = -1;
|
|
}
|
|
}
|
|
r = 0;
|
|
r2 = 0;
|
|
r3 = 0;
|
|
baz (a, b, b2, b3);
|
|
if (r != 1024 * 1023 / 2
|
|
|| r2 != (unsigned short) r
|
|
|| r3 != (unsigned char) r)
|
|
abort ();
|
|
s = 0;
|
|
for (int i = 0; i < 1024; ++i)
|
|
{
|
|
s += i;
|
|
if (b[i] != s
|
|
|| b2[i] != (unsigned short) s
|
|
|| b3[i] != (unsigned char) s)
|
|
abort ();
|
|
else
|
|
{
|
|
b[i] = 25;
|
|
b2[i] = 24;
|
|
b3[i] = 26;
|
|
}
|
|
}
|
|
s2 = 0;
|
|
s3 = 0;
|
|
if (qux (&s2, &s3) != 1024 * 1023)
|
|
abort ();
|
|
if (s2 != (unsigned short) (1024 * 1023)
|
|
|| s3 != (unsigned char) (1024 * 1023))
|
|
abort ();
|
|
s = 0;
|
|
for (int i = 0; i < 1024; ++i)
|
|
{
|
|
s += 2 * i;
|
|
if (b[i] != s
|
|
|| b2[i] != (unsigned short) s
|
|
|| b3[i] != (unsigned char) s)
|
|
abort ();
|
|
}
|
|
return 0;
|
|
}
|