testsuite/libgomp.c/sort-1.c

Go to the documentation of this file.
00001 /* Test and benchmark of a couple of parallel sorting algorithms.
00002    Copyright (C) 2008 Free Software Foundation, Inc.
00003 
00004    GCC is free software; you can redistribute it and/or modify it under
00005    the terms of the GNU General Public License as published by the Free
00006    Software Foundation; either version 3, or (at your option) any later
00007    version.
00008 
00009    GCC is distributed in the hope that it will be useful, but WITHOUT ANY
00010    WARRANTY; without even the implied warranty of MERCHANTABILITY or
00011    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
00012    for more details.
00013 
00014    You should have received a copy of the GNU General Public License
00015    along with GCC; see the file COPYING3.  If not see
00016    <http://www.gnu.org/licenses/>.  */
00017 
00018 #include <limits.h>
00019 #include <omp.h>
00020 #include <stdbool.h>
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024 
00025 int failures;
00026 
00027 #define THRESHOLD 100
00028 
00029 static void
00030 verify (const char *name, double stime, int *array, int count)
00031 {
00032   int i;
00033   double etime = omp_get_wtime ();
00034 
00035   printf ("%s: %g\n", name, etime - stime);
00036   for (i = 1; i < count; i++)
00037     if (array[i] < array[i - 1])
00038       {
00039     printf ("%s: incorrectly sorted\n", name);
00040     failures = 1;
00041       }
00042 }
00043 
00044 static void
00045 insertsort (int *array, int s, int e)
00046 {
00047   int i, j, val;
00048   for (i = s + 1; i <= e; i++)
00049     {
00050       val = array[i];
00051       j = i;
00052       while (j-- > s && val < array[j])
00053     array[j + 1] = array[j];
00054       array[j + 1] = val;
00055     }
00056 }
00057 
00058 struct int_pair
00059 {
00060   int lo;
00061   int hi;
00062 };
00063 
00064 struct int_pair_stack
00065 {
00066   struct int_pair *top;
00067 #define STACK_SIZE 4 * CHAR_BIT * sizeof (int)
00068   struct int_pair arr[STACK_SIZE];
00069 };
00070 
00071 static inline void
00072 init_int_pair_stack (struct int_pair_stack *stack)
00073 {
00074   stack->top = &stack->arr[0];
00075 }
00076 
00077 static inline void
00078 push_int_pair_stack (struct int_pair_stack *stack, int lo, int hi)
00079 {
00080   stack->top->lo = lo;
00081   stack->top->hi = hi;
00082   stack->top++;
00083 }
00084 
00085 static inline void
00086 pop_int_pair_stack (struct int_pair_stack *stack, int *lo, int *hi)
00087 {
00088   stack->top--;
00089   *lo = stack->top->lo;
00090   *hi = stack->top->hi;
00091 }
00092 
00093 static inline int
00094 size_int_pair_stack (struct int_pair_stack *stack)
00095 {
00096   return stack->top - &stack->arr[0];
00097 }
00098 
00099 static inline void
00100 busy_wait (void)
00101 {
00102 #if defined __i386__ || defined __x86_64__
00103   __asm volatile ("rep; nop" : : : "memory");
00104 #elif defined __ia64__
00105   __asm volatile ("hint @pause" : : : "memory");
00106 #elif defined __sparc__ && (defined __arch64__ || defined __sparc_v9__)
00107   __asm volatile ("membar #LoadLoad" : : : "memory");
00108 #else
00109   __asm volatile ("" : : : "memory");
00110 #endif
00111 }
00112 
00113 static inline void
00114 swap (int *array, int a, int b)
00115 {
00116   int val = array[a];
00117   array[a] = array[b];
00118   array[b] = val;
00119 }
00120 
00121 static inline int
00122 choose_pivot (int *array, int lo, int hi)
00123 {
00124   int mid = (lo + hi) / 2;
00125 
00126   if (array[mid] < array[lo])
00127     swap (array, lo, mid);
00128   if (array[hi] < array[mid])
00129     {
00130       swap (array, mid, hi);
00131       if (array[mid] < array[lo])
00132     swap (array, lo, mid);
00133     }
00134   return array[mid];
00135 }
00136 
00137 static inline int
00138 partition (int *array, int lo, int hi)
00139 {
00140   int pivot = choose_pivot (array, lo, hi);
00141   int left = lo;
00142   int right = hi;
00143 
00144   for (;;)
00145     {
00146       while (array[++left] < pivot);
00147       while (array[--right] > pivot);
00148       if (left >= right)
00149     break;
00150       swap (array, left, right);
00151     }
00152   return left;
00153 }
00154 
00155 static void
00156 sort1 (int *array, int count)
00157 {
00158   omp_lock_t lock;
00159   struct int_pair_stack global_stack;
00160   int busy = 1;
00161   int num_threads;
00162 
00163   omp_init_lock (&lock);
00164   init_int_pair_stack (&global_stack);
00165   #pragma omp parallel firstprivate (array, count)
00166   {
00167     int lo = 0, hi = 0, mid, next_lo, next_hi;
00168     bool idle = true;
00169     struct int_pair_stack local_stack;
00170 
00171     init_int_pair_stack (&local_stack);
00172     if (omp_get_thread_num () == 0)
00173       {
00174     num_threads = omp_get_num_threads ();
00175     hi = count - 1;
00176     idle = false;
00177       }
00178 
00179     for (;;)
00180       {
00181     if (hi - lo < THRESHOLD)
00182       {
00183         insertsort (array, lo, hi);
00184         lo = hi;
00185       }
00186     if (lo >= hi)
00187       {
00188         if (size_int_pair_stack (&local_stack) == 0)
00189           {
00190           again:
00191         omp_set_lock (&lock);
00192         if (size_int_pair_stack (&global_stack) == 0)
00193           {
00194             if (!idle)
00195               busy--;
00196             if (busy == 0)
00197               {
00198             omp_unset_lock (&lock);
00199             break;
00200               }
00201             omp_unset_lock (&lock);
00202             idle = true;
00203             while (size_int_pair_stack (&global_stack) == 0
00204                && busy)
00205               busy_wait ();
00206             goto again;
00207           }
00208         if (idle)
00209           busy++;
00210         pop_int_pair_stack (&global_stack, &lo, &hi);
00211         omp_unset_lock (&lock);
00212         idle = false;
00213           }
00214         else
00215           pop_int_pair_stack (&local_stack, &lo, &hi);
00216       }
00217 
00218     mid = partition (array, lo, hi);
00219     if (mid - lo < hi - mid)
00220       {
00221         next_lo = mid;
00222         next_hi = hi;
00223         hi = mid - 1;
00224       }
00225     else
00226       {
00227         next_lo = lo;
00228         next_hi = mid - 1;
00229         lo = mid;
00230       }
00231 
00232     if (next_hi - next_lo < THRESHOLD)
00233       insertsort (array, next_lo, next_hi);
00234     else
00235       {
00236         if (size_int_pair_stack (&global_stack) < num_threads - 1)
00237           {
00238         int size;
00239 
00240         omp_set_lock (&lock);
00241         size = size_int_pair_stack (&global_stack);
00242         if (size < num_threads - 1 && size < STACK_SIZE)
00243           push_int_pair_stack (&global_stack, next_lo, next_hi);
00244         else
00245           push_int_pair_stack (&local_stack, next_lo, next_hi);
00246         omp_unset_lock (&lock);
00247           }
00248         else
00249           push_int_pair_stack (&local_stack, next_lo, next_hi);
00250       }
00251       }
00252     }
00253   omp_destroy_lock (&lock);
00254 }
00255 
00256 static void
00257 sort2_1 (int *array, int lo, int hi, int num_threads, int *busy)
00258 {
00259   int mid;
00260 
00261   if (hi - lo < THRESHOLD)
00262     {
00263       insertsort (array, lo, hi);
00264       return;
00265     }
00266 
00267   mid = partition (array, lo, hi);
00268 
00269   if (*busy >= num_threads)
00270     {
00271       sort2_1 (array, lo, mid - 1, num_threads, busy);
00272       sort2_1 (array, mid, hi, num_threads, busy);
00273       return;
00274     }
00275 
00276   #pragma omp atomic
00277     *busy += 1;
00278 
00279   #pragma omp parallel num_threads (2) \
00280                firstprivate (array, lo, hi, mid, num_threads, busy)
00281   {
00282     if (omp_get_thread_num () == 0)
00283       sort2_1 (array, lo, mid - 1, num_threads, busy);
00284     else
00285       {
00286     sort2_1 (array, mid, hi, num_threads, busy);
00287     #pragma omp atomic
00288       *busy -= 1;
00289       }
00290   }
00291 }
00292 
00293 static void
00294 sort2 (int *array, int count)
00295 {
00296   int num_threads;
00297   int busy = 1;
00298 
00299   #pragma omp parallel
00300     #pragma omp single nowait
00301       num_threads = omp_get_num_threads ();
00302 
00303   sort2_1 (array, 0, count - 1, num_threads, &busy);
00304 }
00305 
00306 #if _OPENMP >= 200805
00307 static void
00308 sort3_1 (int *array, int lo, int hi)
00309 {
00310   int mid;
00311 
00312   if (hi - lo < THRESHOLD)
00313     {
00314       insertsort (array, lo, hi);
00315       return;
00316     }
00317 
00318   mid = partition (array, lo, hi);
00319   #pragma omp task
00320     sort3_1 (array, lo, mid - 1);
00321   sort3_1 (array, mid, hi);
00322 }
00323 
00324 static void
00325 sort3 (int *array, int count)
00326 {
00327   #pragma omp parallel
00328     #pragma omp single
00329       sort3_1 (array, 0, count - 1);
00330 }
00331 #endif
00332 
00333 int
00334 main (int argc, char **argv)
00335 {
00336   int i, count = 1000000;
00337   double stime;
00338   int *unsorted, *sorted, num_threads;
00339   if (argc >= 2)
00340     count = strtoul (argv[1], NULL, 0);
00341 
00342   unsorted = malloc (count * sizeof (int));
00343   sorted = malloc (count * sizeof (int));
00344   if (unsorted == NULL || sorted == NULL)
00345     {
00346       puts ("allocation failure");
00347       exit (1);
00348     }
00349 
00350   srand (0xdeadbeef);
00351   for (i = 0; i < count; i++)
00352     unsorted[i] = rand ();
00353 
00354   omp_set_nested (1);
00355   omp_set_dynamic (0);
00356   #pragma omp parallel
00357     #pragma omp single nowait
00358       num_threads = omp_get_num_threads ();
00359   printf ("Threads: %d\n", num_threads);
00360 
00361   memcpy (sorted, unsorted, count * sizeof (int));
00362   stime = omp_get_wtime ();
00363   sort1 (sorted, count);
00364   verify ("sort1", stime, sorted, count);
00365 
00366   memcpy (sorted, unsorted, count * sizeof (int));
00367   stime = omp_get_wtime ();
00368   sort2 (sorted, count);
00369   verify ("sort2", stime, sorted, count);
00370 
00371 #if _OPENMP >= 200805
00372   memcpy (sorted, unsorted, count * sizeof (int));
00373   stime = omp_get_wtime ();
00374   sort3 (sorted, count);
00375   verify ("sort3", stime, sorted, count);
00376 #endif
00377 
00378   return 0;
00379 }

Generated on Fri Apr 5 05:38:10 2013 for Libgomp by  doxygen 1.4.7