loop.c

Go to the documentation of this file.
00001 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
00002    Contributed by Richard Henderson <rth@redhat.com>.
00003 
00004    This file is part of the GNU OpenMP Library (libgomp).
00005 
00006    Libgomp is free software; you can redistribute it and/or modify it
00007    under the terms of the GNU General Public License as published by
00008    the Free Software Foundation; either version 3, or (at your option)
00009    any later version.
00010 
00011    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
00012    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00013    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
00014    more details.
00015 
00016    Under Section 7 of GPL version 3, you are granted additional
00017    permissions described in the GCC Runtime Library Exception, version
00018    3.1, as published by the Free Software Foundation.
00019 
00020    You should have received a copy of the GNU General Public License and
00021    a copy of the GCC Runtime Library Exception along with this program;
00022    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023    <http://www.gnu.org/licenses/>.  */
00024 
00025 /* This file handles the LOOP (FOR/DO) construct.  */
00026 
00027 #include <limits.h>
00028 #include <stdlib.h>
00029 #include "libgomp.h"
00030 
00031 
00032 /* Initialize the given work share construct from the given arguments.  */
00033 
00034 static inline void
00035 gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
00036         enum gomp_schedule_type sched, long chunk_size)
00037 {
00038   ws->sched = sched;
00039   ws->chunk_size = chunk_size;
00040   /* Canonicalize loops that have zero iterations to ->next == ->end.  */
00041   ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
00042         ? start : end;
00043   ws->incr = incr;
00044   ws->next = start;
00045   if (sched == GFS_DYNAMIC)
00046     {
00047       ws->chunk_size *= incr;
00048 
00049 #ifdef HAVE_SYNC_BUILTINS
00050       {
00051     /* For dynamic scheduling prepare things to make each iteration
00052        faster.  */
00053     struct gomp_thread *thr = gomp_thread ();
00054     struct gomp_team *team = thr->ts.team;
00055     long nthreads = team ? team->nthreads : 1;
00056 
00057     if (__builtin_expect (incr > 0, 1))
00058       {
00059         /* Cheap overflow protection.  */
00060         if (__builtin_expect ((nthreads | ws->chunk_size)
00061                   >= 1UL << (sizeof (long)
00062                          * __CHAR_BIT__ / 2 - 1), 0))
00063           ws->mode = 0;
00064         else
00065           ws->mode = ws->end < (LONG_MAX
00066                     - (nthreads + 1) * ws->chunk_size);
00067       }
00068     /* Cheap overflow protection.  */
00069     else if (__builtin_expect ((nthreads | -ws->chunk_size)
00070                    >= 1UL << (sizeof (long)
00071                           * __CHAR_BIT__ / 2 - 1), 0))
00072       ws->mode = 0;
00073     else
00074       ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
00075       }
00076 #endif
00077     }
00078 }
00079 
00080 /* The *_start routines are called when first encountering a loop construct
00081    that is not bound directly to a parallel construct.  The first thread 
00082    that arrives will create the work-share construct; subsequent threads
00083    will see the construct exists and allocate work from it.
00084 
00085    START, END, INCR are the bounds of the loop; due to the restrictions of
00086    OpenMP, these values must be the same in every thread.  This is not 
00087    verified (nor is it entirely verifiable, since START is not necessarily
00088    retained intact in the work-share data structure).  CHUNK_SIZE is the
00089    scheduling parameter; again this must be identical in all threads.
00090 
00091    Returns true if there's any work for this thread to perform.  If so,
00092    *ISTART and *IEND are filled with the bounds of the iteration block
00093    allocated to this thread.  Returns false if all work was assigned to
00094    other threads prior to this thread's arrival.  */
00095 
00096 static bool
00097 gomp_loop_static_start (long start, long end, long incr, long chunk_size,
00098             long *istart, long *iend)
00099 {
00100   struct gomp_thread *thr = gomp_thread ();
00101 
00102   thr->ts.static_trip = 0;
00103   if (gomp_work_share_start (false))
00104     {
00105       gomp_loop_init (thr->ts.work_share, start, end, incr,
00106               GFS_STATIC, chunk_size);
00107       gomp_work_share_init_done ();
00108     }
00109 
00110   return !gomp_iter_static_next (istart, iend);
00111 }
00112 
00113 static bool
00114 gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
00115              long *istart, long *iend)
00116 {
00117   struct gomp_thread *thr = gomp_thread ();
00118   bool ret;
00119 
00120   if (gomp_work_share_start (false))
00121     {
00122       gomp_loop_init (thr->ts.work_share, start, end, incr,
00123               GFS_DYNAMIC, chunk_size);
00124       gomp_work_share_init_done ();
00125     }
00126 
00127 #ifdef HAVE_SYNC_BUILTINS
00128   ret = gomp_iter_dynamic_next (istart, iend);
00129 #else
00130   gomp_mutex_lock (&thr->ts.work_share->lock);
00131   ret = gomp_iter_dynamic_next_locked (istart, iend);
00132   gomp_mutex_unlock (&thr->ts.work_share->lock);
00133 #endif
00134 
00135   return ret;
00136 }
00137 
00138 static bool
00139 gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
00140             long *istart, long *iend)
00141 {
00142   struct gomp_thread *thr = gomp_thread ();
00143   bool ret;
00144 
00145   if (gomp_work_share_start (false))
00146     {
00147       gomp_loop_init (thr->ts.work_share, start, end, incr,
00148               GFS_GUIDED, chunk_size);
00149       gomp_work_share_init_done ();
00150     }
00151 
00152 #ifdef HAVE_SYNC_BUILTINS
00153   ret = gomp_iter_guided_next (istart, iend);
00154 #else
00155   gomp_mutex_lock (&thr->ts.work_share->lock);
00156   ret = gomp_iter_guided_next_locked (istart, iend);
00157   gomp_mutex_unlock (&thr->ts.work_share->lock);
00158 #endif
00159 
00160   return ret;
00161 }
00162 
00163 bool
00164 GOMP_loop_runtime_start (long start, long end, long incr,
00165              long *istart, long *iend)
00166 {
00167   struct gomp_task_icv *icv = gomp_icv (false);
00168   switch (icv->run_sched_var)
00169     {
00170     case GFS_STATIC:
00171       return gomp_loop_static_start (start, end, incr, icv->run_sched_modifier,
00172                      istart, iend);
00173     case GFS_DYNAMIC:
00174       return gomp_loop_dynamic_start (start, end, incr, icv->run_sched_modifier,
00175                       istart, iend);
00176     case GFS_GUIDED:
00177       return gomp_loop_guided_start (start, end, incr, icv->run_sched_modifier,
00178                      istart, iend);
00179     case GFS_AUTO:
00180       /* For now map to schedule(static), later on we could play with feedback
00181      driven choice.  */
00182       return gomp_loop_static_start (start, end, incr, 0, istart, iend);
00183     default:
00184       abort ();
00185     }
00186 }
00187 
00188 /* The *_ordered_*_start routines are similar.  The only difference is that
00189    this work-share construct is initialized to expect an ORDERED section.  */
00190 
00191 static bool
00192 gomp_loop_ordered_static_start (long start, long end, long incr,
00193                 long chunk_size, long *istart, long *iend)
00194 {
00195   struct gomp_thread *thr = gomp_thread ();
00196 
00197   thr->ts.static_trip = 0;
00198   if (gomp_work_share_start (true))
00199     {
00200       gomp_loop_init (thr->ts.work_share, start, end, incr,
00201               GFS_STATIC, chunk_size);
00202       gomp_ordered_static_init ();
00203       gomp_work_share_init_done ();
00204     }
00205 
00206   return !gomp_iter_static_next (istart, iend);
00207 }
00208 
00209 static bool
00210 gomp_loop_ordered_dynamic_start (long start, long end, long incr,
00211                  long chunk_size, long *istart, long *iend)
00212 {
00213   struct gomp_thread *thr = gomp_thread ();
00214   bool ret;
00215 
00216   if (gomp_work_share_start (true))
00217     {
00218       gomp_loop_init (thr->ts.work_share, start, end, incr,
00219               GFS_DYNAMIC, chunk_size);
00220       gomp_mutex_lock (&thr->ts.work_share->lock);
00221       gomp_work_share_init_done ();
00222     }
00223   else
00224     gomp_mutex_lock (&thr->ts.work_share->lock);
00225 
00226   ret = gomp_iter_dynamic_next_locked (istart, iend);
00227   if (ret)
00228     gomp_ordered_first ();
00229   gomp_mutex_unlock (&thr->ts.work_share->lock);
00230 
00231   return ret;
00232 }
00233 
00234 static bool
00235 gomp_loop_ordered_guided_start (long start, long end, long incr,
00236                 long chunk_size, long *istart, long *iend)
00237 {
00238   struct gomp_thread *thr = gomp_thread ();
00239   bool ret;
00240 
00241   if (gomp_work_share_start (true))
00242     {
00243       gomp_loop_init (thr->ts.work_share, start, end, incr,
00244               GFS_GUIDED, chunk_size);
00245       gomp_mutex_lock (&thr->ts.work_share->lock);
00246       gomp_work_share_init_done ();
00247     }
00248   else
00249     gomp_mutex_lock (&thr->ts.work_share->lock);
00250 
00251   ret = gomp_iter_guided_next_locked (istart, iend);
00252   if (ret)
00253     gomp_ordered_first ();
00254   gomp_mutex_unlock (&thr->ts.work_share->lock);
00255 
00256   return ret;
00257 }
00258 
00259 bool
00260 GOMP_loop_ordered_runtime_start (long start, long end, long incr,
00261                  long *istart, long *iend)
00262 {
00263   struct gomp_task_icv *icv = gomp_icv (false);
00264   switch (icv->run_sched_var)
00265     {
00266     case GFS_STATIC:
00267       return gomp_loop_ordered_static_start (start, end, incr,
00268                          icv->run_sched_modifier,
00269                          istart, iend);
00270     case GFS_DYNAMIC:
00271       return gomp_loop_ordered_dynamic_start (start, end, incr,
00272                           icv->run_sched_modifier,
00273                           istart, iend);
00274     case GFS_GUIDED:
00275       return gomp_loop_ordered_guided_start (start, end, incr,
00276                          icv->run_sched_modifier,
00277                          istart, iend);
00278     case GFS_AUTO:
00279       /* For now map to schedule(static), later on we could play with feedback
00280      driven choice.  */
00281       return gomp_loop_ordered_static_start (start, end, incr,
00282                          0, istart, iend);
00283     default:
00284       abort ();
00285     }
00286 }
00287 
00288 /* The *_next routines are called when the thread completes processing of 
00289    the iteration block currently assigned to it.  If the work-share 
00290    construct is bound directly to a parallel construct, then the iteration
00291    bounds may have been set up before the parallel.  In which case, this
00292    may be the first iteration for the thread.
00293 
00294    Returns true if there is work remaining to be performed; *ISTART and
00295    *IEND are filled with a new iteration block.  Returns false if all work
00296    has been assigned.  */
00297 
00298 static bool
00299 gomp_loop_static_next (long *istart, long *iend)
00300 {
00301   return !gomp_iter_static_next (istart, iend);
00302 }
00303 
00304 static bool
00305 gomp_loop_dynamic_next (long *istart, long *iend)
00306 {
00307   bool ret;
00308 
00309 #ifdef HAVE_SYNC_BUILTINS
00310   ret = gomp_iter_dynamic_next (istart, iend);
00311 #else
00312   struct gomp_thread *thr = gomp_thread ();
00313   gomp_mutex_lock (&thr->ts.work_share->lock);
00314   ret = gomp_iter_dynamic_next_locked (istart, iend);
00315   gomp_mutex_unlock (&thr->ts.work_share->lock);
00316 #endif
00317 
00318   return ret;
00319 }
00320 
00321 static bool
00322 gomp_loop_guided_next (long *istart, long *iend)
00323 {
00324   bool ret;
00325 
00326 #ifdef HAVE_SYNC_BUILTINS
00327   ret = gomp_iter_guided_next (istart, iend);
00328 #else
00329   struct gomp_thread *thr = gomp_thread ();
00330   gomp_mutex_lock (&thr->ts.work_share->lock);
00331   ret = gomp_iter_guided_next_locked (istart, iend);
00332   gomp_mutex_unlock (&thr->ts.work_share->lock);
00333 #endif
00334 
00335   return ret;
00336 }
00337 
00338 bool
00339 GOMP_loop_runtime_next (long *istart, long *iend)
00340 {
00341   struct gomp_thread *thr = gomp_thread ();
00342   
00343   switch (thr->ts.work_share->sched)
00344     {
00345     case GFS_STATIC:
00346     case GFS_AUTO:
00347       return gomp_loop_static_next (istart, iend);
00348     case GFS_DYNAMIC:
00349       return gomp_loop_dynamic_next (istart, iend);
00350     case GFS_GUIDED:
00351       return gomp_loop_guided_next (istart, iend);
00352     default:
00353       abort ();
00354     }
00355 }
00356 
00357 /* The *_ordered_*_next routines are called when the thread completes
00358    processing of the iteration block currently assigned to it.
00359 
00360    Returns true if there is work remaining to be performed; *ISTART and
00361    *IEND are filled with a new iteration block.  Returns false if all work
00362    has been assigned.  */
00363 
00364 static bool
00365 gomp_loop_ordered_static_next (long *istart, long *iend)
00366 {
00367   struct gomp_thread *thr = gomp_thread ();
00368   int test;
00369 
00370   gomp_ordered_sync ();
00371   gomp_mutex_lock (&thr->ts.work_share->lock);
00372   test = gomp_iter_static_next (istart, iend);
00373   if (test >= 0)
00374     gomp_ordered_static_next ();
00375   gomp_mutex_unlock (&thr->ts.work_share->lock);
00376 
00377   return test == 0;
00378 }
00379 
00380 static bool
00381 gomp_loop_ordered_dynamic_next (long *istart, long *iend)
00382 {
00383   struct gomp_thread *thr = gomp_thread ();
00384   bool ret;
00385 
00386   gomp_ordered_sync ();
00387   gomp_mutex_lock (&thr->ts.work_share->lock);
00388   ret = gomp_iter_dynamic_next_locked (istart, iend);
00389   if (ret)
00390     gomp_ordered_next ();
00391   else
00392     gomp_ordered_last ();
00393   gomp_mutex_unlock (&thr->ts.work_share->lock);
00394 
00395   return ret;
00396 }
00397 
00398 static bool
00399 gomp_loop_ordered_guided_next (long *istart, long *iend)
00400 {
00401   struct gomp_thread *thr = gomp_thread ();
00402   bool ret;
00403 
00404   gomp_ordered_sync ();
00405   gomp_mutex_lock (&thr->ts.work_share->lock);
00406   ret = gomp_iter_guided_next_locked (istart, iend);
00407   if (ret)
00408     gomp_ordered_next ();
00409   else
00410     gomp_ordered_last ();
00411   gomp_mutex_unlock (&thr->ts.work_share->lock);
00412 
00413   return ret;
00414 }
00415 
00416 bool
00417 GOMP_loop_ordered_runtime_next (long *istart, long *iend)
00418 {
00419   struct gomp_thread *thr = gomp_thread ();
00420   
00421   switch (thr->ts.work_share->sched)
00422     {
00423     case GFS_STATIC:
00424     case GFS_AUTO:
00425       return gomp_loop_ordered_static_next (istart, iend);
00426     case GFS_DYNAMIC:
00427       return gomp_loop_ordered_dynamic_next (istart, iend);
00428     case GFS_GUIDED:
00429       return gomp_loop_ordered_guided_next (istart, iend);
00430     default:
00431       abort ();
00432     }
00433 }
00434 
00435 /* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
00436    to avoid one synchronization once we get into the loop.  */
00437 
00438 static void
00439 gomp_parallel_loop_start (void (*fn) (void *), void *data,
00440               unsigned num_threads, long start, long end,
00441               long incr, enum gomp_schedule_type sched,
00442               long chunk_size)
00443 {
00444   struct gomp_team *team;
00445 
00446   num_threads = gomp_resolve_num_threads (num_threads, 0);
00447   team = gomp_new_team (num_threads);
00448   gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
00449   gomp_team_start (fn, data, num_threads, team);
00450 }
00451 
00452 void
00453 GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
00454                  unsigned num_threads, long start, long end,
00455                  long incr, long chunk_size)
00456 {
00457   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
00458                 GFS_STATIC, chunk_size);
00459 }
00460 
00461 void
00462 GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
00463                   unsigned num_threads, long start, long end,
00464                   long incr, long chunk_size)
00465 {
00466   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
00467                 GFS_DYNAMIC, chunk_size);
00468 }
00469 
00470 void
00471 GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
00472                  unsigned num_threads, long start, long end,
00473                  long incr, long chunk_size)
00474 {
00475   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
00476                 GFS_GUIDED, chunk_size);
00477 }
00478 
00479 void
00480 GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
00481                   unsigned num_threads, long start, long end,
00482                   long incr)
00483 {
00484   struct gomp_task_icv *icv = gomp_icv (false);
00485   gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
00486                 icv->run_sched_var, icv->run_sched_modifier);
00487 }
00488 
00489 /* The GOMP_loop_end* routines are called after the thread is told that
00490    all loop iterations are complete.  This first version synchronizes
00491    all threads; the nowait version does not.  */
00492 
00493 void
00494 GOMP_loop_end (void)
00495 {
00496   gomp_work_share_end ();
00497 }
00498 
00499 void
00500 GOMP_loop_end_nowait (void)
00501 {
00502   gomp_work_share_end_nowait ();
00503 }
00504 
00505 
00506 /* We use static functions above so that we're sure that the "runtime"
00507    function can defer to the proper routine without interposition.  We
00508    export the static function with a strong alias when possible, or with
00509    a wrapper function otherwise.  */
00510 
00511 #ifdef HAVE_ATTRIBUTE_ALIAS
00512 extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
00513     __attribute__((alias ("gomp_loop_static_start")));
00514 extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
00515     __attribute__((alias ("gomp_loop_dynamic_start")));
00516 extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
00517     __attribute__((alias ("gomp_loop_guided_start")));
00518 
00519 extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
00520     __attribute__((alias ("gomp_loop_ordered_static_start")));
00521 extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
00522     __attribute__((alias ("gomp_loop_ordered_dynamic_start")));
00523 extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
00524     __attribute__((alias ("gomp_loop_ordered_guided_start")));
00525 
00526 extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
00527     __attribute__((alias ("gomp_loop_static_next")));
00528 extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
00529     __attribute__((alias ("gomp_loop_dynamic_next")));
00530 extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
00531     __attribute__((alias ("gomp_loop_guided_next")));
00532 
00533 extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
00534     __attribute__((alias ("gomp_loop_ordered_static_next")));
00535 extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
00536     __attribute__((alias ("gomp_loop_ordered_dynamic_next")));
00537 extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
00538     __attribute__((alias ("gomp_loop_ordered_guided_next")));
00539 #else
00540 bool
00541 GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
00542             long *istart, long *iend)
00543 {
00544   return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
00545 }
00546 
00547 bool
00548 GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
00549              long *istart, long *iend)
00550 {
00551   return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
00552 }
00553 
00554 bool
00555 GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
00556             long *istart, long *iend)
00557 {
00558   return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
00559 }
00560 
00561 bool
00562 GOMP_loop_ordered_static_start (long start, long end, long incr,
00563                 long chunk_size, long *istart, long *iend)
00564 {
00565   return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
00566                      istart, iend);
00567 }
00568 
00569 bool
00570 GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
00571                  long chunk_size, long *istart, long *iend)
00572 {
00573   return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
00574                       istart, iend);
00575 }
00576 
00577 bool
00578 GOMP_loop_ordered_guided_start (long start, long end, long incr,
00579                 long chunk_size, long *istart, long *iend)
00580 {
00581   return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
00582                      istart, iend);
00583 }
00584 
00585 bool
00586 GOMP_loop_static_next (long *istart, long *iend)
00587 {
00588   return gomp_loop_static_next (istart, iend);
00589 }
00590 
00591 bool
00592 GOMP_loop_dynamic_next (long *istart, long *iend)
00593 {
00594   return gomp_loop_dynamic_next (istart, iend);
00595 }
00596 
00597 bool
00598 GOMP_loop_guided_next (long *istart, long *iend)
00599 {
00600   return gomp_loop_guided_next (istart, iend);
00601 }
00602 
00603 bool
00604 GOMP_loop_ordered_static_next (long *istart, long *iend)
00605 {
00606   return gomp_loop_ordered_static_next (istart, iend);
00607 }
00608 
00609 bool
00610 GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
00611 {
00612   return gomp_loop_ordered_dynamic_next (istart, iend);
00613 }
00614 
00615 bool
00616 GOMP_loop_ordered_guided_next (long *istart, long *iend)
00617 {
00618   return gomp_loop_ordered_guided_next (istart, iend);
00619 }
00620 #endif

Generated on Fri Apr 5 05:38:10 2013 for Libgomp by  doxygen 1.4.7