00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <limits.h>
00019 #include <omp.h>
00020 #include <stdbool.h>
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024
00025 int failures;
00026
00027 #define THRESHOLD 100
00028
00029 static void
00030 verify (const char *name, double stime, int *array, int count)
00031 {
00032 int i;
00033 double etime = omp_get_wtime ();
00034
00035 printf ("%s: %g\n", name, etime - stime);
00036 for (i = 1; i < count; i++)
00037 if (array[i] < array[i - 1])
00038 {
00039 printf ("%s: incorrectly sorted\n", name);
00040 failures = 1;
00041 }
00042 }
00043
00044 static void
00045 insertsort (int *array, int s, int e)
00046 {
00047 int i, j, val;
00048 for (i = s + 1; i <= e; i++)
00049 {
00050 val = array[i];
00051 j = i;
00052 while (j-- > s && val < array[j])
00053 array[j + 1] = array[j];
00054 array[j + 1] = val;
00055 }
00056 }
00057
00058 struct int_pair
00059 {
00060 int lo;
00061 int hi;
00062 };
00063
00064 struct int_pair_stack
00065 {
00066 struct int_pair *top;
00067 #define STACK_SIZE 4 * CHAR_BIT * sizeof (int)
00068 struct int_pair arr[STACK_SIZE];
00069 };
00070
00071 static inline void
00072 init_int_pair_stack (struct int_pair_stack *stack)
00073 {
00074 stack->top = &stack->arr[0];
00075 }
00076
00077 static inline void
00078 push_int_pair_stack (struct int_pair_stack *stack, int lo, int hi)
00079 {
00080 stack->top->lo = lo;
00081 stack->top->hi = hi;
00082 stack->top++;
00083 }
00084
00085 static inline void
00086 pop_int_pair_stack (struct int_pair_stack *stack, int *lo, int *hi)
00087 {
00088 stack->top--;
00089 *lo = stack->top->lo;
00090 *hi = stack->top->hi;
00091 }
00092
00093 static inline int
00094 size_int_pair_stack (struct int_pair_stack *stack)
00095 {
00096 return stack->top - &stack->arr[0];
00097 }
00098
00099 static inline void
00100 busy_wait (void)
00101 {
00102 #if defined __i386__ || defined __x86_64__
00103 __asm volatile ("rep; nop" : : : "memory");
00104 #elif defined __ia64__
00105 __asm volatile ("hint @pause" : : : "memory");
00106 #elif defined __sparc__ && (defined __arch64__ || defined __sparc_v9__)
00107 __asm volatile ("membar #LoadLoad" : : : "memory");
00108 #else
00109 __asm volatile ("" : : : "memory");
00110 #endif
00111 }
00112
00113 static inline void
00114 swap (int *array, int a, int b)
00115 {
00116 int val = array[a];
00117 array[a] = array[b];
00118 array[b] = val;
00119 }
00120
00121 static inline int
00122 choose_pivot (int *array, int lo, int hi)
00123 {
00124 int mid = (lo + hi) / 2;
00125
00126 if (array[mid] < array[lo])
00127 swap (array, lo, mid);
00128 if (array[hi] < array[mid])
00129 {
00130 swap (array, mid, hi);
00131 if (array[mid] < array[lo])
00132 swap (array, lo, mid);
00133 }
00134 return array[mid];
00135 }
00136
00137 static inline int
00138 partition (int *array, int lo, int hi)
00139 {
00140 int pivot = choose_pivot (array, lo, hi);
00141 int left = lo;
00142 int right = hi;
00143
00144 for (;;)
00145 {
00146 while (array[++left] < pivot);
00147 while (array[--right] > pivot);
00148 if (left >= right)
00149 break;
00150 swap (array, left, right);
00151 }
00152 return left;
00153 }
00154
00155 static void
00156 sort1 (int *array, int count)
00157 {
00158 omp_lock_t lock;
00159 struct int_pair_stack global_stack;
00160 int busy = 1;
00161 int num_threads;
00162
00163 omp_init_lock (&lock);
00164 init_int_pair_stack (&global_stack);
00165 #pragma omp parallel firstprivate (array, count)
00166 {
00167 int lo = 0, hi = 0, mid, next_lo, next_hi;
00168 bool idle = true;
00169 struct int_pair_stack local_stack;
00170
00171 init_int_pair_stack (&local_stack);
00172 if (omp_get_thread_num () == 0)
00173 {
00174 num_threads = omp_get_num_threads ();
00175 hi = count - 1;
00176 idle = false;
00177 }
00178
00179 for (;;)
00180 {
00181 if (hi - lo < THRESHOLD)
00182 {
00183 insertsort (array, lo, hi);
00184 lo = hi;
00185 }
00186 if (lo >= hi)
00187 {
00188 if (size_int_pair_stack (&local_stack) == 0)
00189 {
00190 again:
00191 omp_set_lock (&lock);
00192 if (size_int_pair_stack (&global_stack) == 0)
00193 {
00194 if (!idle)
00195 busy--;
00196 if (busy == 0)
00197 {
00198 omp_unset_lock (&lock);
00199 break;
00200 }
00201 omp_unset_lock (&lock);
00202 idle = true;
00203 while (size_int_pair_stack (&global_stack) == 0
00204 && busy)
00205 busy_wait ();
00206 goto again;
00207 }
00208 if (idle)
00209 busy++;
00210 pop_int_pair_stack (&global_stack, &lo, &hi);
00211 omp_unset_lock (&lock);
00212 idle = false;
00213 }
00214 else
00215 pop_int_pair_stack (&local_stack, &lo, &hi);
00216 }
00217
00218 mid = partition (array, lo, hi);
00219 if (mid - lo < hi - mid)
00220 {
00221 next_lo = mid;
00222 next_hi = hi;
00223 hi = mid - 1;
00224 }
00225 else
00226 {
00227 next_lo = lo;
00228 next_hi = mid - 1;
00229 lo = mid;
00230 }
00231
00232 if (next_hi - next_lo < THRESHOLD)
00233 insertsort (array, next_lo, next_hi);
00234 else
00235 {
00236 if (size_int_pair_stack (&global_stack) < num_threads - 1)
00237 {
00238 int size;
00239
00240 omp_set_lock (&lock);
00241 size = size_int_pair_stack (&global_stack);
00242 if (size < num_threads - 1 && size < STACK_SIZE)
00243 push_int_pair_stack (&global_stack, next_lo, next_hi);
00244 else
00245 push_int_pair_stack (&local_stack, next_lo, next_hi);
00246 omp_unset_lock (&lock);
00247 }
00248 else
00249 push_int_pair_stack (&local_stack, next_lo, next_hi);
00250 }
00251 }
00252 }
00253 omp_destroy_lock (&lock);
00254 }
00255
00256 static void
00257 sort2_1 (int *array, int lo, int hi, int num_threads, int *busy)
00258 {
00259 int mid;
00260
00261 if (hi - lo < THRESHOLD)
00262 {
00263 insertsort (array, lo, hi);
00264 return;
00265 }
00266
00267 mid = partition (array, lo, hi);
00268
00269 if (*busy >= num_threads)
00270 {
00271 sort2_1 (array, lo, mid - 1, num_threads, busy);
00272 sort2_1 (array, mid, hi, num_threads, busy);
00273 return;
00274 }
00275
00276 #pragma omp atomic
00277 *busy += 1;
00278
00279 #pragma omp parallel num_threads (2) \
00280 firstprivate (array, lo, hi, mid, num_threads, busy)
00281 {
00282 if (omp_get_thread_num () == 0)
00283 sort2_1 (array, lo, mid - 1, num_threads, busy);
00284 else
00285 {
00286 sort2_1 (array, mid, hi, num_threads, busy);
00287 #pragma omp atomic
00288 *busy -= 1;
00289 }
00290 }
00291 }
00292
00293 static void
00294 sort2 (int *array, int count)
00295 {
00296 int num_threads;
00297 int busy = 1;
00298
00299 #pragma omp parallel
00300 #pragma omp single nowait
00301 num_threads = omp_get_num_threads ();
00302
00303 sort2_1 (array, 0, count - 1, num_threads, &busy);
00304 }
00305
00306 #if _OPENMP >= 200805
00307 static void
00308 sort3_1 (int *array, int lo, int hi)
00309 {
00310 int mid;
00311
00312 if (hi - lo < THRESHOLD)
00313 {
00314 insertsort (array, lo, hi);
00315 return;
00316 }
00317
00318 mid = partition (array, lo, hi);
00319 #pragma omp task
00320 sort3_1 (array, lo, mid - 1);
00321 sort3_1 (array, mid, hi);
00322 }
00323
00324 static void
00325 sort3 (int *array, int count)
00326 {
00327 #pragma omp parallel
00328 #pragma omp single
00329 sort3_1 (array, 0, count - 1);
00330 }
00331 #endif
00332
00333 int
00334 main (int argc, char **argv)
00335 {
00336 int i, count = 1000000;
00337 double stime;
00338 int *unsorted, *sorted, num_threads;
00339 if (argc >= 2)
00340 count = strtoul (argv[1], NULL, 0);
00341
00342 unsorted = malloc (count * sizeof (int));
00343 sorted = malloc (count * sizeof (int));
00344 if (unsorted == NULL || sorted == NULL)
00345 {
00346 puts ("allocation failure");
00347 exit (1);
00348 }
00349
00350 srand (0xdeadbeef);
00351 for (i = 0; i < count; i++)
00352 unsorted[i] = rand ();
00353
00354 omp_set_nested (1);
00355 omp_set_dynamic (0);
00356 #pragma omp parallel
00357 #pragma omp single nowait
00358 num_threads = omp_get_num_threads ();
00359 printf ("Threads: %d\n", num_threads);
00360
00361 memcpy (sorted, unsorted, count * sizeof (int));
00362 stime = omp_get_wtime ();
00363 sort1 (sorted, count);
00364 verify ("sort1", stime, sorted, count);
00365
00366 memcpy (sorted, unsorted, count * sizeof (int));
00367 stime = omp_get_wtime ();
00368 sort2 (sorted, count);
00369 verify ("sort2", stime, sorted, count);
00370
00371 #if _OPENMP >= 200805
00372 memcpy (sorted, unsorted, count * sizeof (int));
00373 stime = omp_get_wtime ();
00374 sort3 (sorted, count);
00375 verify ("sort3", stime, sorted, count);
00376 #endif
00377
00378 return 0;
00379 }