iter.c

Go to the documentation of this file.
00001 /* Copyright (C) 2005, 2008, 2009 Free Software Foundation, Inc.
00002    Contributed by Richard Henderson <rth@redhat.com>.
00003 
00004    This file is part of the GNU OpenMP Library (libgomp).
00005 
00006    Libgomp is free software; you can redistribute it and/or modify it
00007    under the terms of the GNU General Public License as published by
00008    the Free Software Foundation; either version 3, or (at your option)
00009    any later version.
00010 
00011    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
00012    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00013    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
00014    more details.
00015 
00016    Under Section 7 of GPL version 3, you are granted additional
00017    permissions described in the GCC Runtime Library Exception, version
00018    3.1, as published by the Free Software Foundation.
00019 
00020    You should have received a copy of the GNU General Public License and
00021    a copy of the GCC Runtime Library Exception along with this program;
00022    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023    <http://www.gnu.org/licenses/>.  */
00024 
00025 /* This file contains routines for managing work-share iteration, both
00026    for loops and sections.  */
00027 
00028 #include "libgomp.h"
00029 #include <stdlib.h>
00030 
00031 
00032 /* This function implements the STATIC scheduling method.  The caller should
00033    iterate *pstart <= x < *pend.  Return zero if there are more iterations
00034    to perform; nonzero if not.  Return less than 0 if this thread had
00035    received the absolutely last iteration.  */
00036 
00037 int
00038 gomp_iter_static_next (long *pstart, long *pend)
00039 {
00040   struct gomp_thread *thr = gomp_thread ();
00041   struct gomp_team *team = thr->ts.team;
00042   struct gomp_work_share *ws = thr->ts.work_share;
00043   unsigned long nthreads = team ? team->nthreads : 1;
00044 
00045   if (thr->ts.static_trip == -1)
00046     return -1;
00047 
00048   /* Quick test for degenerate teams and orphaned constructs.  */
00049   if (nthreads == 1)
00050     {
00051       *pstart = ws->next;
00052       *pend = ws->end;
00053       thr->ts.static_trip = -1;
00054       return ws->next == ws->end;
00055     }
00056 
00057   /* We interpret chunk_size zero as "unspecified", which means that we
00058      should break up the iterations such that each thread makes only one
00059      trip through the outer loop.  */
00060   if (ws->chunk_size == 0)
00061     {
00062       unsigned long n, q, i;
00063       unsigned long s0, e0;
00064       long s, e;
00065 
00066       if (thr->ts.static_trip > 0)
00067     return 1;
00068 
00069       /* Compute the total number of iterations.  */
00070       s = ws->incr + (ws->incr > 0 ? -1 : 1);
00071       n = (ws->end - ws->next + s) / ws->incr;
00072       i = thr->ts.team_id;
00073 
00074       /* Compute the "zero-based" start and end points.  That is, as
00075          if the loop began at zero and incremented by one.  */
00076       q = n / nthreads;
00077       q += (q * nthreads != n);
00078       s0 = q * i;
00079       e0 = s0 + q;
00080       if (e0 > n)
00081         e0 = n;
00082 
00083       /* Notice when no iterations allocated for this thread.  */
00084       if (s0 >= e0)
00085     {
00086       thr->ts.static_trip = 1;
00087       return 1;
00088     }
00089 
00090       /* Transform these to the actual start and end numbers.  */
00091       s = (long)s0 * ws->incr + ws->next;
00092       e = (long)e0 * ws->incr + ws->next;
00093 
00094       *pstart = s;
00095       *pend = e;
00096       thr->ts.static_trip = (e0 == n ? -1 : 1);
00097       return 0;
00098     }
00099   else
00100     {
00101       unsigned long n, s0, e0, i, c;
00102       long s, e;
00103 
00104       /* Otherwise, each thread gets exactly chunk_size iterations
00105      (if available) each time through the loop.  */
00106 
00107       s = ws->incr + (ws->incr > 0 ? -1 : 1);
00108       n = (ws->end - ws->next + s) / ws->incr;
00109       i = thr->ts.team_id;
00110       c = ws->chunk_size;
00111 
00112       /* Initial guess is a C sized chunk positioned nthreads iterations
00113      in, offset by our thread number.  */
00114       s0 = (thr->ts.static_trip * nthreads + i) * c;
00115       e0 = s0 + c;
00116 
00117       /* Detect overflow.  */
00118       if (s0 >= n)
00119     return 1;
00120       if (e0 > n)
00121     e0 = n;
00122 
00123       /* Transform these to the actual start and end numbers.  */
00124       s = (long)s0 * ws->incr + ws->next;
00125       e = (long)e0 * ws->incr + ws->next;
00126 
00127       *pstart = s;
00128       *pend = e;
00129 
00130       if (e0 == n)
00131     thr->ts.static_trip = -1;
00132       else
00133     thr->ts.static_trip++;
00134       return 0;
00135     }
00136 }
00137 
00138 
00139 /* This function implements the DYNAMIC scheduling method.  Arguments are
00140    as for gomp_iter_static_next.  This function must be called with ws->lock
00141    held.  */
00142 
00143 bool
00144 gomp_iter_dynamic_next_locked (long *pstart, long *pend)
00145 {
00146   struct gomp_thread *thr = gomp_thread ();
00147   struct gomp_work_share *ws = thr->ts.work_share;
00148   long start, end, chunk, left;
00149 
00150   start = ws->next;
00151   if (start == ws->end)
00152     return false;
00153 
00154   chunk = ws->chunk_size;
00155   left = ws->end - start;
00156   if (ws->incr < 0)
00157     {
00158       if (chunk < left)
00159     chunk = left;
00160     }
00161   else
00162     {
00163       if (chunk > left)
00164     chunk = left;
00165     }
00166   end = start + chunk;
00167 
00168   ws->next = end;
00169   *pstart = start;
00170   *pend = end;
00171   return true;
00172 }
00173 
00174 
00175 #ifdef HAVE_SYNC_BUILTINS
00176 /* Similar, but doesn't require the lock held, and uses compare-and-swap
00177    instead.  Note that the only memory value that changes is ws->next.  */
00178 
00179 bool
00180 gomp_iter_dynamic_next (long *pstart, long *pend)
00181 {
00182   struct gomp_thread *thr = gomp_thread ();
00183   struct gomp_work_share *ws = thr->ts.work_share;
00184   long start, end, nend, chunk, incr;
00185 
00186   end = ws->end;
00187   incr = ws->incr;
00188   chunk = ws->chunk_size;
00189 
00190   if (__builtin_expect (ws->mode, 1))
00191     {
00192       long tmp = __sync_fetch_and_add (&ws->next, chunk);
00193       if (incr > 0)
00194     {
00195       if (tmp >= end)
00196         return false;
00197       nend = tmp + chunk;
00198       if (nend > end)
00199         nend = end;
00200       *pstart = tmp;
00201       *pend = nend;
00202       return true;
00203     }
00204       else
00205     {
00206       if (tmp <= end)
00207         return false;
00208       nend = tmp + chunk;
00209       if (nend < end)
00210         nend = end;
00211       *pstart = tmp;
00212       *pend = nend;
00213       return true;
00214     }
00215     }
00216 
00217   start = ws->next;
00218   while (1)
00219     {
00220       long left = end - start;
00221       long tmp;
00222 
00223       if (start == end)
00224     return false;
00225 
00226       if (incr < 0)
00227     {
00228       if (chunk < left)
00229         chunk = left;
00230     }
00231       else
00232     {
00233       if (chunk > left)
00234         chunk = left;
00235     }
00236       nend = start + chunk;
00237 
00238       tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
00239       if (__builtin_expect (tmp == start, 1))
00240     break;
00241 
00242       start = tmp;
00243     }
00244 
00245   *pstart = start;
00246   *pend = nend;
00247   return true;
00248 }
00249 #endif /* HAVE_SYNC_BUILTINS */
00250 
00251 
00252 /* This function implements the GUIDED scheduling method.  Arguments are
00253    as for gomp_iter_static_next.  This function must be called with the
00254    work share lock held.  */
00255 
00256 bool
00257 gomp_iter_guided_next_locked (long *pstart, long *pend)
00258 {
00259   struct gomp_thread *thr = gomp_thread ();
00260   struct gomp_work_share *ws = thr->ts.work_share;
00261   struct gomp_team *team = thr->ts.team;
00262   unsigned long nthreads = team ? team->nthreads : 1;
00263   unsigned long n, q;
00264   long start, end;
00265 
00266   if (ws->next == ws->end)
00267     return false;
00268 
00269   start = ws->next;
00270   n = (ws->end - start) / ws->incr;
00271   q = (n + nthreads - 1) / nthreads;
00272 
00273   if (q < ws->chunk_size)
00274     q = ws->chunk_size;
00275   if (q <= n)
00276     end = start + q * ws->incr;
00277   else
00278     end = ws->end;
00279 
00280   ws->next = end;
00281   *pstart = start;
00282   *pend = end;
00283   return true;
00284 }
00285 
00286 #ifdef HAVE_SYNC_BUILTINS
00287 /* Similar, but doesn't require the lock held, and uses compare-and-swap
00288    instead.  Note that the only memory value that changes is ws->next.  */
00289 
00290 bool
00291 gomp_iter_guided_next (long *pstart, long *pend)
00292 {
00293   struct gomp_thread *thr = gomp_thread ();
00294   struct gomp_work_share *ws = thr->ts.work_share;
00295   struct gomp_team *team = thr->ts.team;
00296   unsigned long nthreads = team ? team->nthreads : 1;
00297   long start, end, nend, incr;
00298   unsigned long chunk_size;
00299 
00300   start = ws->next;
00301   end = ws->end;
00302   incr = ws->incr;
00303   chunk_size = ws->chunk_size;
00304 
00305   while (1)
00306     {
00307       unsigned long n, q;
00308       long tmp;
00309 
00310       if (start == end)
00311     return false;
00312 
00313       n = (end - start) / incr;
00314       q = (n + nthreads - 1) / nthreads;
00315 
00316       if (q < chunk_size)
00317     q = chunk_size;
00318       if (__builtin_expect (q <= n, 1))
00319     nend = start + q * incr;
00320       else
00321     nend = end;
00322 
00323       tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
00324       if (__builtin_expect (tmp == start, 1))
00325     break;
00326 
00327       start = tmp;
00328     }
00329 
00330   *pstart = start;
00331   *pend = nend;
00332   return true;
00333 }
00334 #endif /* HAVE_SYNC_BUILTINS */

Generated on Fri Apr 5 05:38:09 2013 for Libgomp by  doxygen 1.4.7