mirror of
https://github.com/autc04/Retro68.git
synced 2024-11-30 19:53:46 +00:00
1147 lines
31 KiB
C
1147 lines
31 KiB
C
/* Affinity tests.
|
|
Copyright (C) 2013-2015 Free Software Foundation, Inc.
|
|
|
|
GCC is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free
|
|
Software Foundation; either version 3, or (at your option) any later
|
|
version.
|
|
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GCC; see the file COPYING3. If not see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
/* { dg-do run } */
|
|
/* { dg-set-target-env-var OMP_PROC_BIND "false" } */
|
|
/* { dg-additional-options "-DINTERPOSE_GETAFFINITY -DDO_FORK -ldl" { target *-*-linux* } } */
|
|
|
|
#ifndef _GNU_SOURCE
|
|
#define _GNU_SOURCE
|
|
#endif
|
|
#include "config.h"
|
|
#include <omp.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
|
|
#ifdef DO_FORK
|
|
#include <signal.h>
|
|
#include <sys/wait.h>
|
|
#endif
|
|
#ifdef HAVE_PTHREAD_AFFINITY_NP
|
|
#include <sched.h>
|
|
#include <pthread.h>
|
|
#ifdef INTERPOSE_GETAFFINITY
|
|
#include <dlfcn.h>
|
|
#endif
|
|
#endif
|
|
|
|
struct place
|
|
{
|
|
int start, len;
|
|
};
|
|
struct places
|
|
{
|
|
char name[40];
|
|
int count;
|
|
struct place places[8];
|
|
} places_array[] = {
|
|
{ "", 1, { { -1, -1 } } },
|
|
{ "{0}:8", 8,
|
|
{ { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 },
|
|
{ 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } },
|
|
{ "{7,6}:2:-3", 2, { { 6, 2 }, { 3, 2 } } },
|
|
{ "{6,7}:4:-2,!{2,3}", 3, { { 6, 2 }, { 4, 2 }, { 0, 2 } } },
|
|
{ "{1}:7:1", 7,
|
|
{ { 1, 1 }, { 2, 1 }, { 3, 1 },
|
|
{ 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } },
|
|
{ "{0,1},{3,2,4},{6,5,!6},{6},{7:2:-1,!6}", 5,
|
|
{ { 0, 2 }, { 2, 3 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }
|
|
};
|
|
|
|
unsigned long contig_cpucount;
|
|
unsigned long min_cpusetsize;
|
|
|
|
#if defined (HAVE_PTHREAD_AFFINITY_NP) && defined (_SC_NPROCESSORS_CONF) \
|
|
&& defined (CPU_ALLOC_SIZE)
|
|
|
|
#if defined (RTLD_NEXT) && defined (INTERPOSE_GETAFFINITY)
|
|
int (*orig_getaffinity_np) (pthread_t, size_t, cpu_set_t *);
|
|
|
|
int
|
|
pthread_getaffinity_np (pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset)
|
|
{
|
|
int ret;
|
|
unsigned long i, max;
|
|
if (orig_getaffinity_np == NULL)
|
|
{
|
|
orig_getaffinity_np = (int (*) (pthread_t, size_t, cpu_set_t *))
|
|
dlsym (RTLD_NEXT, "pthread_getaffinity_np");
|
|
if (orig_getaffinity_np == NULL)
|
|
exit (0);
|
|
}
|
|
ret = orig_getaffinity_np (thread, cpusetsize, cpuset);
|
|
if (ret != 0)
|
|
return ret;
|
|
if (contig_cpucount == 0)
|
|
{
|
|
max = 8 * cpusetsize;
|
|
for (i = 0; i < max; i++)
|
|
if (!CPU_ISSET_S (i, cpusetsize, cpuset))
|
|
break;
|
|
contig_cpucount = i;
|
|
min_cpusetsize = cpusetsize;
|
|
}
|
|
return ret;
|
|
}
|
|
#endif
|
|
|
|
void
|
|
print_affinity (struct place p)
|
|
{
|
|
static unsigned long size;
|
|
if (size == 0)
|
|
{
|
|
if (min_cpusetsize)
|
|
size = min_cpusetsize;
|
|
else
|
|
{
|
|
size = sysconf (_SC_NPROCESSORS_CONF);
|
|
size = CPU_ALLOC_SIZE (size);
|
|
if (size < sizeof (cpu_set_t))
|
|
size = sizeof (cpu_set_t);
|
|
}
|
|
}
|
|
cpu_set_t *cpusetp = (cpu_set_t *) __builtin_alloca (size);
|
|
if (pthread_getaffinity_np (pthread_self (), size, cpusetp) == 0)
|
|
{
|
|
unsigned long i, len, max = 8 * size;
|
|
int notfirst = 0, unexpected = 1;
|
|
|
|
printf (" bound to {");
|
|
for (i = 0, len = 0; i < max; i++)
|
|
if (CPU_ISSET_S (i, size, cpusetp))
|
|
{
|
|
if (len == 0)
|
|
{
|
|
if (notfirst)
|
|
{
|
|
unexpected = 1;
|
|
printf (",");
|
|
}
|
|
else if (i == (unsigned long) p.start)
|
|
unexpected = 0;
|
|
notfirst = 1;
|
|
printf ("%lu", i);
|
|
}
|
|
++len;
|
|
}
|
|
else
|
|
{
|
|
if (len && len != (unsigned long) p.len)
|
|
unexpected = 1;
|
|
if (len > 1)
|
|
printf (":%lu", len);
|
|
len = 0;
|
|
}
|
|
if (len && len != (unsigned long) p.len)
|
|
unexpected = 1;
|
|
if (len > 1)
|
|
printf (":%lu", len);
|
|
printf ("}");
|
|
if (p.start != -1 && unexpected)
|
|
{
|
|
printf (", expected {%d", p.start);
|
|
if (p.len != 1)
|
|
printf (":%d", p.len);
|
|
printf ("} instead");
|
|
}
|
|
else if (p.start != -1)
|
|
printf (", verified");
|
|
}
|
|
}
|
|
#else
|
|
void
|
|
print_affinity (struct place p)
|
|
{
|
|
(void) p.start;
|
|
(void) p.len;
|
|
}
|
|
#endif
|
|
|
|
|
|
int
|
|
main ()
|
|
{
|
|
char *env_proc_bind = getenv ("OMP_PROC_BIND");
|
|
int test_false = env_proc_bind && strcmp (env_proc_bind, "false") == 0;
|
|
int test_true = env_proc_bind && strcmp (env_proc_bind, "true") == 0;
|
|
int test_spread_master_close
|
|
= env_proc_bind && strcmp (env_proc_bind, "spread,master,close") == 0;
|
|
char *env_places = getenv ("OMP_PLACES");
|
|
int test_places = 0;
|
|
|
|
#ifdef DO_FORK
|
|
if (env_places == NULL && contig_cpucount >= 8 && test_false
|
|
&& getenv ("GOMP_AFFINITY") == NULL)
|
|
{
|
|
int i, j, status;
|
|
pid_t pid;
|
|
for (j = 0; j < 2; j++)
|
|
{
|
|
if (setenv ("OMP_PROC_BIND", j ? "spread,master,close" : "true", 1)
|
|
< 0)
|
|
break;
|
|
for (i = sizeof (places_array) / sizeof (places_array[0]) - 1;
|
|
i; --i)
|
|
{
|
|
if (setenv ("OMP_PLACES", places_array[i].name, 1) < 0)
|
|
break;
|
|
pid = fork ();
|
|
if (pid == -1)
|
|
break;
|
|
if (pid == 0)
|
|
{
|
|
execl ("/proc/self/exe", "affinity-1.exe", NULL);
|
|
_exit (1);
|
|
}
|
|
if (waitpid (pid, &status, 0) < 0)
|
|
break;
|
|
if (WIFSIGNALED (status) && WTERMSIG (status) == SIGABRT)
|
|
abort ();
|
|
else if (!WIFEXITED (status) || WEXITSTATUS (status) != 0)
|
|
break;
|
|
}
|
|
if (i)
|
|
break;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
int first = 1;
|
|
if (env_proc_bind)
|
|
{
|
|
printf ("OMP_PROC_BIND='%s'", env_proc_bind);
|
|
first = 0;
|
|
}
|
|
if (env_places)
|
|
printf ("%sOMP_PLACES='%s'", first ? "" : " ", env_places);
|
|
printf ("\n");
|
|
|
|
if (env_places && contig_cpucount >= 8
|
|
&& (test_true || test_spread_master_close))
|
|
{
|
|
for (test_places = sizeof (places_array) / sizeof (places_array[0]) - 1;
|
|
test_places; --test_places)
|
|
if (strcmp (env_places, places_array[test_places].name) == 0)
|
|
break;
|
|
}
|
|
|
|
#define verify(if_true, if_s_m_c) \
|
|
if (test_false && omp_get_proc_bind () != omp_proc_bind_false) \
|
|
abort (); \
|
|
if (test_true && omp_get_proc_bind () != if_true) \
|
|
abort (); \
|
|
if (test_spread_master_close && omp_get_proc_bind () != if_s_m_c) \
|
|
abort ();
|
|
|
|
verify (omp_proc_bind_true, omp_proc_bind_spread);
|
|
|
|
printf ("Initial thread");
|
|
print_affinity (places_array[test_places].places[0]);
|
|
printf ("\n");
|
|
omp_set_nested (1);
|
|
omp_set_dynamic (0);
|
|
|
|
#pragma omp parallel if (0)
|
|
{
|
|
verify (omp_proc_bind_true, omp_proc_bind_master);
|
|
#pragma omp parallel if (0)
|
|
{
|
|
verify (omp_proc_bind_true, omp_proc_bind_close);
|
|
#pragma omp parallel if (0)
|
|
{
|
|
verify (omp_proc_bind_true, omp_proc_bind_close);
|
|
}
|
|
#pragma omp parallel if (0) proc_bind (spread)
|
|
{
|
|
verify (omp_proc_bind_spread, omp_proc_bind_spread);
|
|
}
|
|
}
|
|
#pragma omp parallel if (0) proc_bind (master)
|
|
{
|
|
verify (omp_proc_bind_master, omp_proc_bind_close);
|
|
#pragma omp parallel if (0)
|
|
{
|
|
verify (omp_proc_bind_master, omp_proc_bind_close);
|
|
}
|
|
#pragma omp parallel if (0) proc_bind (spread)
|
|
{
|
|
verify (omp_proc_bind_spread, omp_proc_bind_spread);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* True/spread */
|
|
#pragma omp parallel num_threads (4)
|
|
{
|
|
verify (omp_proc_bind_true, omp_proc_bind_master);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#1 thread %d", thr);
|
|
if (omp_get_num_threads () == 4 && test_spread_master_close)
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
/* T = 4, P = 8, each subpartition has 2 places. */
|
|
case 7:
|
|
/* T = 4, P = 7, each subpartition has 2 places, but
|
|
last partition, which has just one place. */
|
|
p = places_array[test_places].places[2 * thr];
|
|
break;
|
|
case 5:
|
|
/* T = 4, P = 5, first subpartition has 2 places, the
|
|
rest just one. */
|
|
p = places_array[test_places].places[thr ? 1 + thr : 0];
|
|
break;
|
|
case 3:
|
|
/* T = 4, P = 3, unit sized subpartitions, first gets
|
|
thr0 and thr3, second thr1, third thr2. */
|
|
p = places_array[test_places].places[thr == 3 ? 0 : thr];
|
|
break;
|
|
case 2:
|
|
/* T = 4, P = 2, unit sized subpartitions, each with
|
|
2 threads. */
|
|
p = places_array[test_places].places[thr / 2];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
#pragma omp barrier
|
|
if (omp_get_thread_num () == 3)
|
|
{
|
|
/* True/spread, true/master. */
|
|
#pragma omp parallel num_threads (3)
|
|
{
|
|
verify (omp_proc_bind_true, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#1,#1 thread 3,%d", thr);
|
|
if (omp_get_num_threads () == 3 && test_spread_master_close)
|
|
/* Outer is spread, inner master, so just bind to the
|
|
place or the master thread, which is thr 3 above. */
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
case 7:
|
|
p = places_array[test_places].places[6];
|
|
break;
|
|
case 5:
|
|
p = places_array[test_places].places[4];
|
|
break;
|
|
case 3:
|
|
p = places_array[test_places].places[0];
|
|
break;
|
|
case 2:
|
|
p = places_array[test_places].places[1];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
/* True/spread, spread. */
|
|
#pragma omp parallel num_threads (5) proc_bind (spread)
|
|
{
|
|
verify (omp_proc_bind_spread, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#1,#2 thread 3,%d", thr);
|
|
if (omp_get_num_threads () == 5 && test_spread_master_close)
|
|
/* Outer is spread, inner spread. */
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
/* T = 5, P = 2, unit sized subpartitions. */
|
|
p = places_array[test_places].places[thr == 4 ? 6
|
|
: 6 + thr / 2];
|
|
break;
|
|
/* The rest are T = 5, P = 1. */
|
|
case 7:
|
|
p = places_array[test_places].places[6];
|
|
break;
|
|
case 5:
|
|
p = places_array[test_places].places[4];
|
|
break;
|
|
case 3:
|
|
p = places_array[test_places].places[0];
|
|
break;
|
|
case 2:
|
|
p = places_array[test_places].places[1];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
#pragma omp barrier
|
|
if (omp_get_thread_num () == 3)
|
|
{
|
|
/* True/spread, spread, close. */
|
|
#pragma omp parallel num_threads (5) proc_bind (close)
|
|
{
|
|
verify (omp_proc_bind_close, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#1,#2,#1 thread 3,3,%d", thr);
|
|
if (omp_get_num_threads () == 5 && test_spread_master_close)
|
|
/* Outer is spread, inner spread, innermost close. */
|
|
switch (places_array[test_places].count)
|
|
{
|
|
/* All are T = 5, P = 1. */
|
|
case 8:
|
|
p = places_array[test_places].places[7];
|
|
break;
|
|
case 7:
|
|
p = places_array[test_places].places[6];
|
|
break;
|
|
case 5:
|
|
p = places_array[test_places].places[4];
|
|
break;
|
|
case 3:
|
|
p = places_array[test_places].places[0];
|
|
break;
|
|
case 2:
|
|
p = places_array[test_places].places[1];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/* True/spread, master. */
|
|
#pragma omp parallel num_threads (4) proc_bind(master)
|
|
{
|
|
verify (omp_proc_bind_master, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#1,#3 thread 3,%d", thr);
|
|
if (omp_get_num_threads () == 4 && test_spread_master_close)
|
|
/* Outer is spread, inner master, so just bind to the
|
|
place or the master thread, which is thr 3 above. */
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
case 7:
|
|
p = places_array[test_places].places[6];
|
|
break;
|
|
case 5:
|
|
p = places_array[test_places].places[4];
|
|
break;
|
|
case 3:
|
|
p = places_array[test_places].places[0];
|
|
break;
|
|
case 2:
|
|
p = places_array[test_places].places[1];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
/* True/spread, close. */
|
|
#pragma omp parallel num_threads (6) proc_bind (close)
|
|
{
|
|
verify (omp_proc_bind_close, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#1,#4 thread 3,%d", thr);
|
|
if (omp_get_num_threads () == 6 && test_spread_master_close)
|
|
/* Outer is spread, inner close. */
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
/* T = 6, P = 2, unit sized subpartitions. */
|
|
p = places_array[test_places].places[6 + thr / 3];
|
|
break;
|
|
/* The rest are T = 6, P = 1. */
|
|
case 7:
|
|
p = places_array[test_places].places[6];
|
|
break;
|
|
case 5:
|
|
p = places_array[test_places].places[4];
|
|
break;
|
|
case 3:
|
|
p = places_array[test_places].places[0];
|
|
break;
|
|
case 2:
|
|
p = places_array[test_places].places[1];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Spread. */
|
|
#pragma omp parallel num_threads (5) proc_bind(spread)
|
|
{
|
|
verify (omp_proc_bind_spread, omp_proc_bind_master);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#2 thread %d", thr);
|
|
if (omp_get_num_threads () == 5
|
|
&& (test_spread_master_close || test_true))
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
/* T = 5, P = 8, first 3 subpartitions have 2 places, last
|
|
2 one place. */
|
|
p = places_array[test_places].places[thr < 3 ? 2 * thr : 3 + thr];
|
|
break;
|
|
case 7:
|
|
/* T = 5, P = 7, first 2 subpartitions have 2 places, last
|
|
3 one place. */
|
|
p = places_array[test_places].places[thr < 2 ? 2 * thr : 2 + thr];
|
|
break;
|
|
case 5:
|
|
/* T = 5, P = 5, unit sized subpartitions, each one with one
|
|
thread. */
|
|
p = places_array[test_places].places[thr];
|
|
break;
|
|
case 3:
|
|
/* T = 5, P = 3, unit sized subpartitions, first gets
|
|
thr0 and thr3, second thr1 and thr4, third thr2. */
|
|
p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr];
|
|
break;
|
|
case 2:
|
|
/* T = 5, P = 2, unit sized subpartitions, first with
|
|
thr{0,1,4} and second with thr{2,3}. */
|
|
p = places_array[test_places].places[thr == 4 ? 0 : thr / 2];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
#pragma omp barrier
|
|
if (omp_get_thread_num () == 3)
|
|
{
|
|
int pp = 0;
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8: pp = 6; break;
|
|
case 7: pp = 5; break;
|
|
case 5: pp = 3; break;
|
|
case 2: pp = 1; break;
|
|
}
|
|
/* Spread, spread/master. */
|
|
#pragma omp parallel num_threads (3) firstprivate (pp)
|
|
{
|
|
verify (omp_proc_bind_spread, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#2,#1 thread 3,%d", thr);
|
|
if (test_spread_master_close || test_true)
|
|
/* Outer is spread, inner spread resp. master, bit we have
|
|
just unit sized partitions. */
|
|
p = places_array[test_places].places[pp];
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
/* Spread, spread. */
|
|
#pragma omp parallel num_threads (5) proc_bind (spread) \
|
|
firstprivate (pp)
|
|
{
|
|
verify (omp_proc_bind_spread, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#2,#2 thread 3,%d", thr);
|
|
if (test_spread_master_close || test_true)
|
|
/* Outer is spread, inner spread, bit we have
|
|
just unit sized partitions. */
|
|
p = places_array[test_places].places[pp];
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
/* Spread, master. */
|
|
#pragma omp parallel num_threads (4) proc_bind(master) \
|
|
firstprivate(pp)
|
|
{
|
|
verify (omp_proc_bind_master, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#2,#3 thread 3,%d", thr);
|
|
if (test_spread_master_close || test_true)
|
|
/* Outer is spread, inner master, bit we have
|
|
just unit sized partitions. */
|
|
p = places_array[test_places].places[pp];
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
/* Spread, close. */
|
|
#pragma omp parallel num_threads (6) proc_bind (close) \
|
|
firstprivate (pp)
|
|
{
|
|
verify (omp_proc_bind_close, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#2,#4 thread 3,%d", thr);
|
|
if (test_spread_master_close || test_true)
|
|
/* Outer is spread, inner close, bit we have
|
|
just unit sized partitions. */
|
|
p = places_array[test_places].places[pp];
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Master. */
|
|
#pragma omp parallel num_threads (3) proc_bind(master)
|
|
{
|
|
verify (omp_proc_bind_master, omp_proc_bind_master);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#3 thread %d", thr);
|
|
if (test_spread_master_close || test_true)
|
|
p = places_array[test_places].places[0];
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
#pragma omp barrier
|
|
if (omp_get_thread_num () == 2)
|
|
{
|
|
/* Master, master. */
|
|
#pragma omp parallel num_threads (4)
|
|
{
|
|
verify (omp_proc_bind_master, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#3,#1 thread 2,%d", thr);
|
|
if (test_spread_master_close || test_true)
|
|
/* Outer is master, inner is master. */
|
|
p = places_array[test_places].places[0];
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
/* Master, spread. */
|
|
#pragma omp parallel num_threads (4) proc_bind (spread)
|
|
{
|
|
verify (omp_proc_bind_spread, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#3,#2 thread 2,%d", thr);
|
|
if (omp_get_num_threads () == 4
|
|
&& (test_spread_master_close || test_true))
|
|
/* Outer is master, inner is spread. */
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
/* T = 4, P = 8, each subpartition has 2 places. */
|
|
case 7:
|
|
/* T = 4, P = 7, each subpartition has 2 places, but
|
|
last partition, which has just one place. */
|
|
p = places_array[test_places].places[2 * thr];
|
|
break;
|
|
case 5:
|
|
/* T = 4, P = 5, first subpartition has 2 places, the
|
|
rest just one. */
|
|
p = places_array[test_places].places[thr ? 1 + thr : 0];
|
|
break;
|
|
case 3:
|
|
/* T = 4, P = 3, unit sized subpartitions, first gets
|
|
thr0 and thr3, second thr1, third thr2. */
|
|
p = places_array[test_places].places[thr == 3 ? 0 : thr];
|
|
break;
|
|
case 2:
|
|
/* T = 4, P = 2, unit sized subpartitions, each with
|
|
2 threads. */
|
|
p = places_array[test_places].places[thr / 2];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
#pragma omp barrier
|
|
if (omp_get_thread_num () == 0)
|
|
{
|
|
/* Master, spread, close. */
|
|
#pragma omp parallel num_threads (5) proc_bind (close)
|
|
{
|
|
verify (omp_proc_bind_close, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#3,#2,#1 thread 2,0,%d", thr);
|
|
if (omp_get_num_threads () == 5
|
|
&& (test_spread_master_close || test_true))
|
|
/* Outer is master, inner spread, innermost close. */
|
|
switch (places_array[test_places].count)
|
|
{
|
|
/* First 3 are T = 5, P = 2. */
|
|
case 8:
|
|
case 7:
|
|
case 5:
|
|
p = places_array[test_places].places[(thr & 2) / 2];
|
|
break;
|
|
/* All the rest are T = 5, P = 1. */
|
|
case 3:
|
|
case 2:
|
|
p = places_array[test_places].places[0];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
}
|
|
#pragma omp barrier
|
|
if (omp_get_thread_num () == 3)
|
|
{
|
|
/* Master, spread, close. */
|
|
#pragma omp parallel num_threads (5) proc_bind (close)
|
|
{
|
|
verify (omp_proc_bind_close, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#3,#2,#2 thread 2,3,%d", thr);
|
|
if (omp_get_num_threads () == 5
|
|
&& (test_spread_master_close || test_true))
|
|
/* Outer is master, inner spread, innermost close. */
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
/* T = 5, P = 2. */
|
|
p = places_array[test_places].places[6
|
|
+ (thr & 2) / 2];
|
|
break;
|
|
/* All the rest are T = 5, P = 1. */
|
|
case 7:
|
|
p = places_array[test_places].places[6];
|
|
break;
|
|
case 5:
|
|
p = places_array[test_places].places[4];
|
|
break;
|
|
case 3:
|
|
p = places_array[test_places].places[0];
|
|
break;
|
|
case 2:
|
|
p = places_array[test_places].places[1];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/* Master, master. */
|
|
#pragma omp parallel num_threads (4) proc_bind(master)
|
|
{
|
|
verify (omp_proc_bind_master, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#3,#3 thread 2,%d", thr);
|
|
if (test_spread_master_close || test_true)
|
|
/* Outer is master, inner master. */
|
|
p = places_array[test_places].places[0];
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
/* Master, close. */
|
|
#pragma omp parallel num_threads (6) proc_bind (close)
|
|
{
|
|
verify (omp_proc_bind_close, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#3,#4 thread 2,%d", thr);
|
|
if (omp_get_num_threads () == 6
|
|
&& (test_spread_master_close || test_true))
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
/* T = 6, P = 8. */
|
|
case 7:
|
|
/* T = 6, P = 7. */
|
|
p = places_array[test_places].places[thr];
|
|
break;
|
|
case 5:
|
|
/* T = 6, P = 5. thr{0,5} go into the first place. */
|
|
p = places_array[test_places].places[thr == 5 ? 0 : thr];
|
|
break;
|
|
case 3:
|
|
/* T = 6, P = 3, two threads into each place. */
|
|
p = places_array[test_places].places[thr / 2];
|
|
break;
|
|
case 2:
|
|
/* T = 6, P = 2, 3 threads into each place. */
|
|
p = places_array[test_places].places[thr / 3];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#pragma omp parallel num_threads (5) proc_bind(close)
|
|
{
|
|
verify (omp_proc_bind_close, omp_proc_bind_master);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#4 thread %d", thr);
|
|
if (omp_get_num_threads () == 5
|
|
&& (test_spread_master_close || test_true))
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
/* T = 5, P = 8. */
|
|
case 7:
|
|
/* T = 5, P = 7. */
|
|
case 5:
|
|
/* T = 5, P = 5. */
|
|
p = places_array[test_places].places[thr];
|
|
break;
|
|
case 3:
|
|
/* T = 5, P = 3, thr{0,3} in first place, thr{1,4} in second,
|
|
thr2 in third. */
|
|
p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr];
|
|
break;
|
|
case 2:
|
|
/* T = 5, P = 2, thr{0,1,4} in first place, thr{2,3} in second. */
|
|
p = places_array[test_places].places[thr == 4 ? 0 : thr / 2];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
#pragma omp barrier
|
|
if (omp_get_thread_num () == 2)
|
|
{
|
|
int pp = 0;
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
case 7:
|
|
case 5:
|
|
case 3:
|
|
pp = 2;
|
|
break;
|
|
case 2:
|
|
pp = 1;
|
|
break;
|
|
}
|
|
/* Close, close/master. */
|
|
#pragma omp parallel num_threads (4) firstprivate (pp)
|
|
{
|
|
verify (omp_proc_bind_close, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#4,#1 thread 2,%d", thr);
|
|
if (test_spread_master_close)
|
|
/* Outer is close, inner is master. */
|
|
p = places_array[test_places].places[pp];
|
|
else if (omp_get_num_threads () == 4 && test_true)
|
|
/* Outer is close, inner is close. */
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
/* T = 4, P = 8. */
|
|
case 7:
|
|
/* T = 4, P = 7. */
|
|
p = places_array[test_places].places[2 + thr];
|
|
break;
|
|
case 5:
|
|
/* T = 4, P = 5. There is wrap-around for thr3. */
|
|
p = places_array[test_places].places[thr == 3 ? 0 : 2 + thr];
|
|
break;
|
|
case 3:
|
|
/* T = 4, P = 3, thr{0,3} go into p2, thr1 into p0, thr2
|
|
into p1. */
|
|
p = places_array[test_places].places[(2 + thr) % 3];
|
|
break;
|
|
case 2:
|
|
/* T = 4, P = 2, 2 threads into each place. */
|
|
p = places_array[test_places].places[1 - thr / 2];
|
|
break;
|
|
}
|
|
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
/* Close, spread. */
|
|
#pragma omp parallel num_threads (4) proc_bind (spread)
|
|
{
|
|
verify (omp_proc_bind_spread, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#4,#2 thread 2,%d", thr);
|
|
if (omp_get_num_threads () == 4
|
|
&& (test_spread_master_close || test_true))
|
|
/* Outer is close, inner is spread. */
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
/* T = 4, P = 8, each subpartition has 2 places. */
|
|
case 7:
|
|
/* T = 4, P = 7, each subpartition has 2 places, but
|
|
last partition, which has just one place. */
|
|
p = places_array[test_places].places[thr == 3 ? 0
|
|
: 2 + 2 * thr];
|
|
break;
|
|
case 5:
|
|
/* T = 4, P = 5, first subpartition has 2 places, the
|
|
rest just one. */
|
|
p = places_array[test_places].places[thr == 3 ? 0
|
|
: 2 + thr];
|
|
break;
|
|
case 3:
|
|
/* T = 4, P = 3, unit sized subpartitions, third gets
|
|
thr0 and thr3, first thr1, second thr2. */
|
|
p = places_array[test_places].places[thr == 0 ? 2 : thr - 1];
|
|
break;
|
|
case 2:
|
|
/* T = 4, P = 2, unit sized subpartitions, each with
|
|
2 threads. */
|
|
p = places_array[test_places].places[1 - thr / 2];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
#pragma omp barrier
|
|
if (omp_get_thread_num () == 0)
|
|
{
|
|
/* Close, spread, close. */
|
|
#pragma omp parallel num_threads (5) proc_bind (close)
|
|
{
|
|
verify (omp_proc_bind_close, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#4,#2,#1 thread 2,0,%d", thr);
|
|
if (omp_get_num_threads () == 5
|
|
&& (test_spread_master_close || test_true))
|
|
/* Outer is close, inner spread, innermost close. */
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
case 7:
|
|
/* T = 5, P = 2. */
|
|
p = places_array[test_places].places[2
|
|
+ (thr & 2) / 2];
|
|
break;
|
|
/* All the rest are T = 5, P = 1. */
|
|
case 5:
|
|
case 3:
|
|
p = places_array[test_places].places[2];
|
|
break;
|
|
case 2:
|
|
p = places_array[test_places].places[1];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
}
|
|
#pragma omp barrier
|
|
if (omp_get_thread_num () == 2)
|
|
{
|
|
/* Close, spread, close. */
|
|
#pragma omp parallel num_threads (5) proc_bind (close)
|
|
{
|
|
verify (omp_proc_bind_close, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#4,#2,#2 thread 2,2,%d", thr);
|
|
if (omp_get_num_threads () == 5
|
|
&& (test_spread_master_close || test_true))
|
|
/* Outer is close, inner spread, innermost close. */
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
/* T = 5, P = 2. */
|
|
p = places_array[test_places].places[6
|
|
+ (thr & 2) / 2];
|
|
break;
|
|
/* All the rest are T = 5, P = 1. */
|
|
case 7:
|
|
p = places_array[test_places].places[6];
|
|
break;
|
|
case 5:
|
|
p = places_array[test_places].places[4];
|
|
break;
|
|
case 3:
|
|
p = places_array[test_places].places[1];
|
|
break;
|
|
case 2:
|
|
p = places_array[test_places].places[0];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
}
|
|
#pragma omp barrier
|
|
if (omp_get_thread_num () == 3)
|
|
{
|
|
/* Close, spread, close. */
|
|
#pragma omp parallel num_threads (5) proc_bind (close)
|
|
{
|
|
verify (omp_proc_bind_close, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#4,#2,#3 thread 2,3,%d", thr);
|
|
if (omp_get_num_threads () == 5
|
|
&& (test_spread_master_close || test_true))
|
|
/* Outer is close, inner spread, innermost close. */
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
case 7:
|
|
case 5:
|
|
/* T = 5, P = 2. */
|
|
p = places_array[test_places].places[(thr & 2) / 2];
|
|
break;
|
|
/* All the rest are T = 5, P = 1. */
|
|
case 3:
|
|
p = places_array[test_places].places[2];
|
|
break;
|
|
case 2:
|
|
p = places_array[test_places].places[0];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/* Close, master. */
|
|
#pragma omp parallel num_threads (4) proc_bind(master) \
|
|
firstprivate (pp)
|
|
{
|
|
verify (omp_proc_bind_master, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#4,#3 thread 2,%d", thr);
|
|
if (test_spread_master_close || test_true)
|
|
/* Outer is close, inner master. */
|
|
p = places_array[test_places].places[pp];
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
/* Close, close. */
|
|
#pragma omp parallel num_threads (6) proc_bind (close)
|
|
{
|
|
verify (omp_proc_bind_close, omp_proc_bind_close);
|
|
#pragma omp critical
|
|
{
|
|
struct place p = places_array[0].places[0];
|
|
int thr = omp_get_thread_num ();
|
|
printf ("#4,#4 thread 2,%d", thr);
|
|
if (omp_get_num_threads () == 6
|
|
&& (test_spread_master_close || test_true))
|
|
switch (places_array[test_places].count)
|
|
{
|
|
case 8:
|
|
/* T = 6, P = 8. */
|
|
p = places_array[test_places].places[2 + thr];
|
|
break;
|
|
case 7:
|
|
/* T = 6, P = 7. */
|
|
p = places_array[test_places].places[thr == 5 ? 0 : 2 + thr];
|
|
break;
|
|
case 5:
|
|
/* T = 6, P = 5. thr{0,5} go into the third place. */
|
|
p = places_array[test_places].places[thr >= 3 ? thr - 3
|
|
: 2 + thr];
|
|
break;
|
|
case 3:
|
|
/* T = 6, P = 3, two threads into each place. */
|
|
p = places_array[test_places].places[thr < 2 ? 2
|
|
: thr / 2 - 1];
|
|
break;
|
|
case 2:
|
|
/* T = 6, P = 2, 3 threads into each place. */
|
|
p = places_array[test_places].places[1 - thr / 3];
|
|
break;
|
|
}
|
|
print_affinity (p);
|
|
printf ("\n");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|