00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include "libgomp.h"
00029 #include <stdlib.h>
00030
00031 typedef unsigned long long gomp_ull;
00032
00033
00034
00035
00036
00037
00038 int
00039 gomp_iter_ull_static_next (gomp_ull *pstart, gomp_ull *pend)
00040 {
00041 struct gomp_thread *thr = gomp_thread ();
00042 struct gomp_team *team = thr->ts.team;
00043 struct gomp_work_share *ws = thr->ts.work_share;
00044 unsigned long nthreads = team ? team->nthreads : 1;
00045
00046 if (thr->ts.static_trip == -1)
00047 return -1;
00048
00049
00050 if (nthreads == 1)
00051 {
00052 *pstart = ws->next_ull;
00053 *pend = ws->end_ull;
00054 thr->ts.static_trip = -1;
00055 return ws->next_ull == ws->end_ull;
00056 }
00057
00058
00059
00060
00061 if (ws->chunk_size_ull == 0)
00062 {
00063 gomp_ull n, q, i, s0, e0, s, e;
00064
00065 if (thr->ts.static_trip > 0)
00066 return 1;
00067
00068
00069 if (__builtin_expect (ws->mode, 0) == 0)
00070 n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
00071 else
00072 n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
00073 i = thr->ts.team_id;
00074
00075
00076
00077 q = n / nthreads;
00078 q += (q * nthreads != n);
00079 s0 = q * i;
00080 e0 = s0 + q;
00081 if (e0 > n)
00082 e0 = n;
00083
00084
00085 if (s0 >= e0)
00086 {
00087 thr->ts.static_trip = 1;
00088 return 1;
00089 }
00090
00091
00092 s = s0 * ws->incr_ull + ws->next_ull;
00093 e = e0 * ws->incr_ull + ws->next_ull;
00094
00095 *pstart = s;
00096 *pend = e;
00097 thr->ts.static_trip = (e0 == n ? -1 : 1);
00098 return 0;
00099 }
00100 else
00101 {
00102 gomp_ull n, s0, e0, i, c, s, e;
00103
00104
00105
00106
00107 if (__builtin_expect (ws->mode, 0) == 0)
00108 n = (ws->end_ull - ws->next_ull + ws->incr_ull - 1) / ws->incr_ull;
00109 else
00110 n = (ws->next_ull - ws->end_ull - ws->incr_ull - 1) / -ws->incr_ull;
00111 i = thr->ts.team_id;
00112 c = ws->chunk_size_ull;
00113
00114
00115
00116 s0 = (thr->ts.static_trip * (gomp_ull) nthreads + i) * c;
00117 e0 = s0 + c;
00118
00119
00120 if (s0 >= n)
00121 return 1;
00122 if (e0 > n)
00123 e0 = n;
00124
00125
00126 s = s0 * ws->incr_ull + ws->next_ull;
00127 e = e0 * ws->incr_ull + ws->next_ull;
00128
00129 *pstart = s;
00130 *pend = e;
00131
00132 if (e0 == n)
00133 thr->ts.static_trip = -1;
00134 else
00135 thr->ts.static_trip++;
00136 return 0;
00137 }
00138 }
00139
00140
00141
00142
00143
00144
00145 bool
00146 gomp_iter_ull_dynamic_next_locked (gomp_ull *pstart, gomp_ull *pend)
00147 {
00148 struct gomp_thread *thr = gomp_thread ();
00149 struct gomp_work_share *ws = thr->ts.work_share;
00150 gomp_ull start, end, chunk, left;
00151
00152 start = ws->next_ull;
00153 if (start == ws->end_ull)
00154 return false;
00155
00156 chunk = ws->chunk_size_ull;
00157 left = ws->end_ull - start;
00158 if (__builtin_expect (ws->mode & 2, 0))
00159 {
00160 if (chunk < left)
00161 chunk = left;
00162 }
00163 else
00164 {
00165 if (chunk > left)
00166 chunk = left;
00167 }
00168 end = start + chunk;
00169
00170 ws->next_ull = end;
00171 *pstart = start;
00172 *pend = end;
00173 return true;
00174 }
00175
00176
00177 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
00178
00179
00180
00181 bool
00182 gomp_iter_ull_dynamic_next (gomp_ull *pstart, gomp_ull *pend)
00183 {
00184 struct gomp_thread *thr = gomp_thread ();
00185 struct gomp_work_share *ws = thr->ts.work_share;
00186 gomp_ull start, end, nend, chunk;
00187
00188 end = ws->end_ull;
00189 chunk = ws->chunk_size_ull;
00190
00191 if (__builtin_expect (ws->mode & 1, 1))
00192 {
00193 gomp_ull tmp = __sync_fetch_and_add (&ws->next_ull, chunk);
00194 if (__builtin_expect (ws->mode & 2, 0) == 0)
00195 {
00196 if (tmp >= end)
00197 return false;
00198 nend = tmp + chunk;
00199 if (nend > end)
00200 nend = end;
00201 *pstart = tmp;
00202 *pend = nend;
00203 return true;
00204 }
00205 else
00206 {
00207 if (tmp <= end)
00208 return false;
00209 nend = tmp + chunk;
00210 if (nend < end)
00211 nend = end;
00212 *pstart = tmp;
00213 *pend = nend;
00214 return true;
00215 }
00216 }
00217
00218 start = ws->next_ull;
00219 while (1)
00220 {
00221 gomp_ull left = end - start;
00222 gomp_ull tmp;
00223
00224 if (start == end)
00225 return false;
00226
00227 if (__builtin_expect (ws->mode & 2, 0))
00228 {
00229 if (chunk < left)
00230 chunk = left;
00231 }
00232 else
00233 {
00234 if (chunk > left)
00235 chunk = left;
00236 }
00237 nend = start + chunk;
00238
00239 tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
00240 if (__builtin_expect (tmp == start, 1))
00241 break;
00242
00243 start = tmp;
00244 }
00245
00246 *pstart = start;
00247 *pend = nend;
00248 return true;
00249 }
00250 #endif
00251
00252
00253
00254
00255
00256
00257 bool
00258 gomp_iter_ull_guided_next_locked (gomp_ull *pstart, gomp_ull *pend)
00259 {
00260 struct gomp_thread *thr = gomp_thread ();
00261 struct gomp_work_share *ws = thr->ts.work_share;
00262 struct gomp_team *team = thr->ts.team;
00263 gomp_ull nthreads = team ? team->nthreads : 1;
00264 gomp_ull n, q;
00265 gomp_ull start, end;
00266
00267 if (ws->next_ull == ws->end_ull)
00268 return false;
00269
00270 start = ws->next_ull;
00271 if (__builtin_expect (ws->mode, 0) == 0)
00272 n = (ws->end_ull - start) / ws->incr_ull;
00273 else
00274 n = (start - ws->end_ull) / -ws->incr_ull;
00275 q = (n + nthreads - 1) / nthreads;
00276
00277 if (q < ws->chunk_size_ull)
00278 q = ws->chunk_size_ull;
00279 if (q <= n)
00280 end = start + q * ws->incr_ull;
00281 else
00282 end = ws->end_ull;
00283
00284 ws->next_ull = end;
00285 *pstart = start;
00286 *pend = end;
00287 return true;
00288 }
00289
00290 #if defined HAVE_SYNC_BUILTINS && defined __LP64__
00291
00292
00293
00294 bool
00295 gomp_iter_ull_guided_next (gomp_ull *pstart, gomp_ull *pend)
00296 {
00297 struct gomp_thread *thr = gomp_thread ();
00298 struct gomp_work_share *ws = thr->ts.work_share;
00299 struct gomp_team *team = thr->ts.team;
00300 gomp_ull nthreads = team ? team->nthreads : 1;
00301 gomp_ull start, end, nend, incr;
00302 gomp_ull chunk_size;
00303
00304 start = ws->next_ull;
00305 end = ws->end_ull;
00306 incr = ws->incr_ull;
00307 chunk_size = ws->chunk_size_ull;
00308
00309 while (1)
00310 {
00311 gomp_ull n, q;
00312 gomp_ull tmp;
00313
00314 if (start == end)
00315 return false;
00316
00317 if (__builtin_expect (ws->mode, 0) == 0)
00318 n = (end - start) / incr;
00319 else
00320 n = (start - end) / -incr;
00321 q = (n + nthreads - 1) / nthreads;
00322
00323 if (q < chunk_size)
00324 q = chunk_size;
00325 if (__builtin_expect (q <= n, 1))
00326 nend = start + q * incr;
00327 else
00328 nend = end;
00329
00330 tmp = __sync_val_compare_and_swap (&ws->next_ull, start, nend);
00331 if (__builtin_expect (tmp == start, 1))
00332 break;
00333
00334 start = tmp;
00335 }
00336
00337 *pstart = start;
00338 *pend = nend;
00339 return true;
00340 }
00341 #endif