00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include "libgomp.h"
00029 #include <stdlib.h>
00030
00031
00032
00033
00034
00035
00036
00037 int
00038 gomp_iter_static_next (long *pstart, long *pend)
00039 {
00040 struct gomp_thread *thr = gomp_thread ();
00041 struct gomp_team *team = thr->ts.team;
00042 struct gomp_work_share *ws = thr->ts.work_share;
00043 unsigned long nthreads = team ? team->nthreads : 1;
00044
00045 if (thr->ts.static_trip == -1)
00046 return -1;
00047
00048
00049 if (nthreads == 1)
00050 {
00051 *pstart = ws->next;
00052 *pend = ws->end;
00053 thr->ts.static_trip = -1;
00054 return ws->next == ws->end;
00055 }
00056
00057
00058
00059
00060 if (ws->chunk_size == 0)
00061 {
00062 unsigned long n, q, i;
00063 unsigned long s0, e0;
00064 long s, e;
00065
00066 if (thr->ts.static_trip > 0)
00067 return 1;
00068
00069
00070 s = ws->incr + (ws->incr > 0 ? -1 : 1);
00071 n = (ws->end - ws->next + s) / ws->incr;
00072 i = thr->ts.team_id;
00073
00074
00075
00076 q = n / nthreads;
00077 q += (q * nthreads != n);
00078 s0 = q * i;
00079 e0 = s0 + q;
00080 if (e0 > n)
00081 e0 = n;
00082
00083
00084 if (s0 >= e0)
00085 {
00086 thr->ts.static_trip = 1;
00087 return 1;
00088 }
00089
00090
00091 s = (long)s0 * ws->incr + ws->next;
00092 e = (long)e0 * ws->incr + ws->next;
00093
00094 *pstart = s;
00095 *pend = e;
00096 thr->ts.static_trip = (e0 == n ? -1 : 1);
00097 return 0;
00098 }
00099 else
00100 {
00101 unsigned long n, s0, e0, i, c;
00102 long s, e;
00103
00104
00105
00106
00107 s = ws->incr + (ws->incr > 0 ? -1 : 1);
00108 n = (ws->end - ws->next + s) / ws->incr;
00109 i = thr->ts.team_id;
00110 c = ws->chunk_size;
00111
00112
00113
00114 s0 = (thr->ts.static_trip * nthreads + i) * c;
00115 e0 = s0 + c;
00116
00117
00118 if (s0 >= n)
00119 return 1;
00120 if (e0 > n)
00121 e0 = n;
00122
00123
00124 s = (long)s0 * ws->incr + ws->next;
00125 e = (long)e0 * ws->incr + ws->next;
00126
00127 *pstart = s;
00128 *pend = e;
00129
00130 if (e0 == n)
00131 thr->ts.static_trip = -1;
00132 else
00133 thr->ts.static_trip++;
00134 return 0;
00135 }
00136 }
00137
00138
00139
00140
00141
00142
00143 bool
00144 gomp_iter_dynamic_next_locked (long *pstart, long *pend)
00145 {
00146 struct gomp_thread *thr = gomp_thread ();
00147 struct gomp_work_share *ws = thr->ts.work_share;
00148 long start, end, chunk, left;
00149
00150 start = ws->next;
00151 if (start == ws->end)
00152 return false;
00153
00154 chunk = ws->chunk_size;
00155 left = ws->end - start;
00156 if (ws->incr < 0)
00157 {
00158 if (chunk < left)
00159 chunk = left;
00160 }
00161 else
00162 {
00163 if (chunk > left)
00164 chunk = left;
00165 }
00166 end = start + chunk;
00167
00168 ws->next = end;
00169 *pstart = start;
00170 *pend = end;
00171 return true;
00172 }
00173
00174
00175 #ifdef HAVE_SYNC_BUILTINS
00176
00177
00178
00179 bool
00180 gomp_iter_dynamic_next (long *pstart, long *pend)
00181 {
00182 struct gomp_thread *thr = gomp_thread ();
00183 struct gomp_work_share *ws = thr->ts.work_share;
00184 long start, end, nend, chunk, incr;
00185
00186 end = ws->end;
00187 incr = ws->incr;
00188 chunk = ws->chunk_size;
00189
00190 if (__builtin_expect (ws->mode, 1))
00191 {
00192 long tmp = __sync_fetch_and_add (&ws->next, chunk);
00193 if (incr > 0)
00194 {
00195 if (tmp >= end)
00196 return false;
00197 nend = tmp + chunk;
00198 if (nend > end)
00199 nend = end;
00200 *pstart = tmp;
00201 *pend = nend;
00202 return true;
00203 }
00204 else
00205 {
00206 if (tmp <= end)
00207 return false;
00208 nend = tmp + chunk;
00209 if (nend < end)
00210 nend = end;
00211 *pstart = tmp;
00212 *pend = nend;
00213 return true;
00214 }
00215 }
00216
00217 start = ws->next;
00218 while (1)
00219 {
00220 long left = end - start;
00221 long tmp;
00222
00223 if (start == end)
00224 return false;
00225
00226 if (incr < 0)
00227 {
00228 if (chunk < left)
00229 chunk = left;
00230 }
00231 else
00232 {
00233 if (chunk > left)
00234 chunk = left;
00235 }
00236 nend = start + chunk;
00237
00238 tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
00239 if (__builtin_expect (tmp == start, 1))
00240 break;
00241
00242 start = tmp;
00243 }
00244
00245 *pstart = start;
00246 *pend = nend;
00247 return true;
00248 }
00249 #endif
00250
00251
00252
00253
00254
00255
00256 bool
00257 gomp_iter_guided_next_locked (long *pstart, long *pend)
00258 {
00259 struct gomp_thread *thr = gomp_thread ();
00260 struct gomp_work_share *ws = thr->ts.work_share;
00261 struct gomp_team *team = thr->ts.team;
00262 unsigned long nthreads = team ? team->nthreads : 1;
00263 unsigned long n, q;
00264 long start, end;
00265
00266 if (ws->next == ws->end)
00267 return false;
00268
00269 start = ws->next;
00270 n = (ws->end - start) / ws->incr;
00271 q = (n + nthreads - 1) / nthreads;
00272
00273 if (q < ws->chunk_size)
00274 q = ws->chunk_size;
00275 if (q <= n)
00276 end = start + q * ws->incr;
00277 else
00278 end = ws->end;
00279
00280 ws->next = end;
00281 *pstart = start;
00282 *pend = end;
00283 return true;
00284 }
00285
00286 #ifdef HAVE_SYNC_BUILTINS
00287
00288
00289
00290 bool
00291 gomp_iter_guided_next (long *pstart, long *pend)
00292 {
00293 struct gomp_thread *thr = gomp_thread ();
00294 struct gomp_work_share *ws = thr->ts.work_share;
00295 struct gomp_team *team = thr->ts.team;
00296 unsigned long nthreads = team ? team->nthreads : 1;
00297 long start, end, nend, incr;
00298 unsigned long chunk_size;
00299
00300 start = ws->next;
00301 end = ws->end;
00302 incr = ws->incr;
00303 chunk_size = ws->chunk_size;
00304
00305 while (1)
00306 {
00307 unsigned long n, q;
00308 long tmp;
00309
00310 if (start == end)
00311 return false;
00312
00313 n = (end - start) / incr;
00314 q = (n + nthreads - 1) / nthreads;
00315
00316 if (q < chunk_size)
00317 q = chunk_size;
00318 if (__builtin_expect (q <= n, 1))
00319 nend = start + q * incr;
00320 else
00321 nend = end;
00322
00323 tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
00324 if (__builtin_expect (tmp == start, 1))
00325 break;
00326
00327 start = tmp;
00328 }
00329
00330 *pstart = start;
00331 *pend = nend;
00332 return true;
00333 }
00334 #endif