mirror of
https://github.com/autc04/Retro68.git
synced 2024-12-29 05:29:47 +00:00
380 lines
7.6 KiB
C
380 lines
7.6 KiB
C
|
/* Test and benchmark of a couple of parallel sorting algorithms.
|
||
|
Copyright (C) 2008 Free Software Foundation, Inc.
|
||
|
|
||
|
GCC is free software; you can redistribute it and/or modify it under
|
||
|
the terms of the GNU General Public License as published by the Free
|
||
|
Software Foundation; either version 3, or (at your option) any later
|
||
|
version.
|
||
|
|
||
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with GCC; see the file COPYING3. If not see
|
||
|
<http://www.gnu.org/licenses/>. */
|
||
|
|
||
|
#include <limits.h>
|
||
|
#include <omp.h>
|
||
|
#include <stdbool.h>
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
int failures;
|
||
|
|
||
|
#define THRESHOLD 100
|
||
|
|
||
|
static void
|
||
|
verify (const char *name, double stime, int *array, int count)
|
||
|
{
|
||
|
int i;
|
||
|
double etime = omp_get_wtime ();
|
||
|
|
||
|
printf ("%s: %g\n", name, etime - stime);
|
||
|
for (i = 1; i < count; i++)
|
||
|
if (array[i] < array[i - 1])
|
||
|
{
|
||
|
printf ("%s: incorrectly sorted\n", name);
|
||
|
failures = 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
insertsort (int *array, int s, int e)
|
||
|
{
|
||
|
int i, j, val;
|
||
|
for (i = s + 1; i <= e; i++)
|
||
|
{
|
||
|
val = array[i];
|
||
|
j = i;
|
||
|
while (j-- > s && val < array[j])
|
||
|
array[j + 1] = array[j];
|
||
|
array[j + 1] = val;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
struct int_pair
|
||
|
{
|
||
|
int lo;
|
||
|
int hi;
|
||
|
};
|
||
|
|
||
|
struct int_pair_stack
|
||
|
{
|
||
|
struct int_pair *top;
|
||
|
#define STACK_SIZE 4 * CHAR_BIT * sizeof (int)
|
||
|
struct int_pair arr[STACK_SIZE];
|
||
|
};
|
||
|
|
||
|
static inline void
|
||
|
init_int_pair_stack (struct int_pair_stack *stack)
|
||
|
{
|
||
|
stack->top = &stack->arr[0];
|
||
|
}
|
||
|
|
||
|
static inline void
|
||
|
push_int_pair_stack (struct int_pair_stack *stack, int lo, int hi)
|
||
|
{
|
||
|
stack->top->lo = lo;
|
||
|
stack->top->hi = hi;
|
||
|
stack->top++;
|
||
|
}
|
||
|
|
||
|
static inline void
|
||
|
pop_int_pair_stack (struct int_pair_stack *stack, int *lo, int *hi)
|
||
|
{
|
||
|
stack->top--;
|
||
|
*lo = stack->top->lo;
|
||
|
*hi = stack->top->hi;
|
||
|
}
|
||
|
|
||
|
static inline int
|
||
|
size_int_pair_stack (struct int_pair_stack *stack)
|
||
|
{
|
||
|
return stack->top - &stack->arr[0];
|
||
|
}
|
||
|
|
||
|
static inline void
|
||
|
busy_wait (void)
|
||
|
{
|
||
|
#if defined __i386__ || defined __x86_64__
|
||
|
__asm volatile ("rep; nop" : : : "memory");
|
||
|
#elif defined __ia64__
|
||
|
__asm volatile ("hint @pause" : : : "memory");
|
||
|
#elif defined __sparc__ && (defined __arch64__ || defined __sparc_v9__)
|
||
|
__asm volatile ("membar #LoadLoad" : : : "memory");
|
||
|
#else
|
||
|
__asm volatile ("" : : : "memory");
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
static inline void
|
||
|
swap (int *array, int a, int b)
|
||
|
{
|
||
|
int val = array[a];
|
||
|
array[a] = array[b];
|
||
|
array[b] = val;
|
||
|
}
|
||
|
|
||
|
static inline int
|
||
|
choose_pivot (int *array, int lo, int hi)
|
||
|
{
|
||
|
int mid = (lo + hi) / 2;
|
||
|
|
||
|
if (array[mid] < array[lo])
|
||
|
swap (array, lo, mid);
|
||
|
if (array[hi] < array[mid])
|
||
|
{
|
||
|
swap (array, mid, hi);
|
||
|
if (array[mid] < array[lo])
|
||
|
swap (array, lo, mid);
|
||
|
}
|
||
|
return array[mid];
|
||
|
}
|
||
|
|
||
|
static inline int
|
||
|
partition (int *array, int lo, int hi)
|
||
|
{
|
||
|
int pivot = choose_pivot (array, lo, hi);
|
||
|
int left = lo;
|
||
|
int right = hi;
|
||
|
|
||
|
for (;;)
|
||
|
{
|
||
|
while (array[++left] < pivot);
|
||
|
while (array[--right] > pivot);
|
||
|
if (left >= right)
|
||
|
break;
|
||
|
swap (array, left, right);
|
||
|
}
|
||
|
return left;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
sort1 (int *array, int count)
|
||
|
{
|
||
|
omp_lock_t lock;
|
||
|
struct int_pair_stack global_stack;
|
||
|
int busy = 1;
|
||
|
int num_threads;
|
||
|
|
||
|
omp_init_lock (&lock);
|
||
|
init_int_pair_stack (&global_stack);
|
||
|
#pragma omp parallel firstprivate (array, count)
|
||
|
{
|
||
|
int lo = 0, hi = 0, mid, next_lo, next_hi;
|
||
|
bool idle = true;
|
||
|
struct int_pair_stack local_stack;
|
||
|
|
||
|
init_int_pair_stack (&local_stack);
|
||
|
if (omp_get_thread_num () == 0)
|
||
|
{
|
||
|
num_threads = omp_get_num_threads ();
|
||
|
hi = count - 1;
|
||
|
idle = false;
|
||
|
}
|
||
|
|
||
|
for (;;)
|
||
|
{
|
||
|
if (hi - lo < THRESHOLD)
|
||
|
{
|
||
|
insertsort (array, lo, hi);
|
||
|
lo = hi;
|
||
|
}
|
||
|
if (lo >= hi)
|
||
|
{
|
||
|
if (size_int_pair_stack (&local_stack) == 0)
|
||
|
{
|
||
|
again:
|
||
|
omp_set_lock (&lock);
|
||
|
if (size_int_pair_stack (&global_stack) == 0)
|
||
|
{
|
||
|
if (!idle)
|
||
|
busy--;
|
||
|
if (busy == 0)
|
||
|
{
|
||
|
omp_unset_lock (&lock);
|
||
|
break;
|
||
|
}
|
||
|
omp_unset_lock (&lock);
|
||
|
idle = true;
|
||
|
while (size_int_pair_stack (&global_stack) == 0
|
||
|
&& busy)
|
||
|
busy_wait ();
|
||
|
goto again;
|
||
|
}
|
||
|
if (idle)
|
||
|
busy++;
|
||
|
pop_int_pair_stack (&global_stack, &lo, &hi);
|
||
|
omp_unset_lock (&lock);
|
||
|
idle = false;
|
||
|
}
|
||
|
else
|
||
|
pop_int_pair_stack (&local_stack, &lo, &hi);
|
||
|
}
|
||
|
|
||
|
mid = partition (array, lo, hi);
|
||
|
if (mid - lo < hi - mid)
|
||
|
{
|
||
|
next_lo = mid;
|
||
|
next_hi = hi;
|
||
|
hi = mid - 1;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
next_lo = lo;
|
||
|
next_hi = mid - 1;
|
||
|
lo = mid;
|
||
|
}
|
||
|
|
||
|
if (next_hi - next_lo < THRESHOLD)
|
||
|
insertsort (array, next_lo, next_hi);
|
||
|
else
|
||
|
{
|
||
|
if (size_int_pair_stack (&global_stack) < num_threads - 1)
|
||
|
{
|
||
|
int size;
|
||
|
|
||
|
omp_set_lock (&lock);
|
||
|
size = size_int_pair_stack (&global_stack);
|
||
|
if (size < num_threads - 1 && size < STACK_SIZE)
|
||
|
push_int_pair_stack (&global_stack, next_lo, next_hi);
|
||
|
else
|
||
|
push_int_pair_stack (&local_stack, next_lo, next_hi);
|
||
|
omp_unset_lock (&lock);
|
||
|
}
|
||
|
else
|
||
|
push_int_pair_stack (&local_stack, next_lo, next_hi);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
omp_destroy_lock (&lock);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
sort2_1 (int *array, int lo, int hi, int num_threads, int *busy)
|
||
|
{
|
||
|
int mid;
|
||
|
|
||
|
if (hi - lo < THRESHOLD)
|
||
|
{
|
||
|
insertsort (array, lo, hi);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
mid = partition (array, lo, hi);
|
||
|
|
||
|
if (*busy >= num_threads)
|
||
|
{
|
||
|
sort2_1 (array, lo, mid - 1, num_threads, busy);
|
||
|
sort2_1 (array, mid, hi, num_threads, busy);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
#pragma omp atomic
|
||
|
*busy += 1;
|
||
|
|
||
|
#pragma omp parallel num_threads (2) \
|
||
|
firstprivate (array, lo, hi, mid, num_threads, busy)
|
||
|
{
|
||
|
if (omp_get_thread_num () == 0)
|
||
|
sort2_1 (array, lo, mid - 1, num_threads, busy);
|
||
|
else
|
||
|
{
|
||
|
sort2_1 (array, mid, hi, num_threads, busy);
|
||
|
#pragma omp atomic
|
||
|
*busy -= 1;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
sort2 (int *array, int count)
|
||
|
{
|
||
|
int num_threads;
|
||
|
int busy = 1;
|
||
|
|
||
|
#pragma omp parallel
|
||
|
#pragma omp single nowait
|
||
|
num_threads = omp_get_num_threads ();
|
||
|
|
||
|
sort2_1 (array, 0, count - 1, num_threads, &busy);
|
||
|
}
|
||
|
|
||
|
#if _OPENMP >= 200805
|
||
|
static void
|
||
|
sort3_1 (int *array, int lo, int hi)
|
||
|
{
|
||
|
int mid;
|
||
|
|
||
|
if (hi - lo < THRESHOLD)
|
||
|
{
|
||
|
insertsort (array, lo, hi);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
mid = partition (array, lo, hi);
|
||
|
#pragma omp task
|
||
|
sort3_1 (array, lo, mid - 1);
|
||
|
sort3_1 (array, mid, hi);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
sort3 (int *array, int count)
|
||
|
{
|
||
|
#pragma omp parallel
|
||
|
#pragma omp single
|
||
|
sort3_1 (array, 0, count - 1);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
int
|
||
|
main (int argc, char **argv)
|
||
|
{
|
||
|
int i, count = 1000000;
|
||
|
double stime;
|
||
|
int *unsorted, *sorted, num_threads;
|
||
|
if (argc >= 2)
|
||
|
count = strtoul (argv[1], NULL, 0);
|
||
|
|
||
|
unsorted = malloc (count * sizeof (int));
|
||
|
sorted = malloc (count * sizeof (int));
|
||
|
if (unsorted == NULL || sorted == NULL)
|
||
|
{
|
||
|
puts ("allocation failure");
|
||
|
exit (1);
|
||
|
}
|
||
|
|
||
|
srand (0xdeadbeef);
|
||
|
for (i = 0; i < count; i++)
|
||
|
unsorted[i] = rand ();
|
||
|
|
||
|
omp_set_nested (1);
|
||
|
omp_set_dynamic (0);
|
||
|
#pragma omp parallel
|
||
|
#pragma omp single nowait
|
||
|
num_threads = omp_get_num_threads ();
|
||
|
printf ("Threads: %d\n", num_threads);
|
||
|
|
||
|
memcpy (sorted, unsorted, count * sizeof (int));
|
||
|
stime = omp_get_wtime ();
|
||
|
sort1 (sorted, count);
|
||
|
verify ("sort1", stime, sorted, count);
|
||
|
|
||
|
memcpy (sorted, unsorted, count * sizeof (int));
|
||
|
stime = omp_get_wtime ();
|
||
|
sort2 (sorted, count);
|
||
|
verify ("sort2", stime, sorted, count);
|
||
|
|
||
|
#if _OPENMP >= 200805
|
||
|
memcpy (sorted, unsorted, count * sizeof (int));
|
||
|
stime = omp_get_wtime ();
|
||
|
sort3 (sorted, count);
|
||
|
verify ("sort3", stime, sorted, count);
|
||
|
#endif
|
||
|
|
||
|
return 0;
|
||
|
}
|