00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059 #include "kmp.h"
00060 #include "kmp_i18n.h"
00061 #include "kmp_str.h"
00062 #include "kmp_error.h"
00063 #if KMP_OS_WINDOWS && KMP_ARCH_X86
00064 #include <float.h>
00065 #endif
00066
00067 #if OMPT_SUPPORT
00068 #include "ompt-internal.h"
00069 #endif
00070
00071
00072
00073
00074 #ifdef KMP_STATIC_STEAL_ENABLED
00075
00076
00077 template< typename T >
00078 struct dispatch_private_infoXX_template {
00079 typedef typename traits_t< T >::unsigned_t UT;
00080 typedef typename traits_t< T >::signed_t ST;
00081 UT count;
00082 T ub;
00083
00084 T lb;
00085 ST st;
00086 UT tc;
00087 T static_steal_counter;
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097 struct KMP_ALIGN( 32 ) {
00098 T parm1;
00099 T parm2;
00100 T parm3;
00101 T parm4;
00102 };
00103
00104 UT ordered_lower;
00105 UT ordered_upper;
00106 #if KMP_OS_WINDOWS
00107 T last_upper;
00108 #endif
00109 };
00110
00111 #else
00112
00113
00114 template< typename T >
00115 struct dispatch_private_infoXX_template {
00116 typedef typename traits_t< T >::unsigned_t UT;
00117 typedef typename traits_t< T >::signed_t ST;
00118 T lb;
00119 T ub;
00120 ST st;
00121 UT tc;
00122
00123 T parm1;
00124 T parm2;
00125 T parm3;
00126 T parm4;
00127
00128 UT count;
00129
00130 UT ordered_lower;
00131 UT ordered_upper;
00132 #if KMP_OS_WINDOWS
00133 T last_upper;
00134 #endif
00135 };
00136
00137 #endif
00138
00139
00140 template< typename T >
00141 struct KMP_ALIGN_CACHE dispatch_private_info_template {
00142
00143 union KMP_ALIGN_CACHE private_info_tmpl {
00144 dispatch_private_infoXX_template< T > p;
00145 dispatch_private_info64_t p64;
00146 } u;
00147 enum sched_type schedule;
00148 kmp_uint32 ordered;
00149 kmp_uint32 ordered_bumped;
00150 kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3];
00151 dispatch_private_info * next;
00152 kmp_uint32 nomerge;
00153 kmp_uint32 type_size;
00154 enum cons_type pushed_ws;
00155 };
00156
00157
00158
00159 template< typename UT >
00160 struct dispatch_shared_infoXX_template {
00161
00162
00163 volatile UT iteration;
00164 volatile UT num_done;
00165 volatile UT ordered_iteration;
00166 UT ordered_dummy[KMP_MAX_ORDERED-1];
00167 };
00168
00169
00170 template< typename UT >
00171 struct dispatch_shared_info_template {
00172
00173 union shared_info_tmpl {
00174 dispatch_shared_infoXX_template< UT > s;
00175 dispatch_shared_info64_t s64;
00176 } u;
00177 volatile kmp_uint32 buffer_index;
00178 };
00179
00180
00181
00182
00183 static void
00184 __kmp_static_delay( int arg )
00185 {
00186
00187 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
00188 #else
00189 KMP_ASSERT( arg >= 0 );
00190 #endif
00191 }
00192
00193 static void
00194 __kmp_static_yield( int arg )
00195 {
00196 __kmp_yield( arg );
00197 }
00198
00199 #undef USE_TEST_LOCKS
00200
00201
00202 template< typename T >
00203 static __forceinline T
00204 test_then_add( volatile T *p, T d ) { KMP_ASSERT(0); };
00205
00206 template<>
00207 __forceinline kmp_int32
00208 test_then_add< kmp_int32 >( volatile kmp_int32 *p, kmp_int32 d )
00209 {
00210 kmp_int32 r;
00211 r = KMP_TEST_THEN_ADD32( p, d );
00212 return r;
00213 }
00214
00215 template<>
00216 __forceinline kmp_int64
00217 test_then_add< kmp_int64 >( volatile kmp_int64 *p, kmp_int64 d )
00218 {
00219 kmp_int64 r;
00220 r = KMP_TEST_THEN_ADD64( p, d );
00221 return r;
00222 }
00223
00224
00225 template< typename T >
00226 static __forceinline T
00227 test_then_inc_acq( volatile T *p ) { KMP_ASSERT(0); };
00228
00229 template<>
00230 __forceinline kmp_int32
00231 test_then_inc_acq< kmp_int32 >( volatile kmp_int32 *p )
00232 {
00233 kmp_int32 r;
00234 r = KMP_TEST_THEN_INC_ACQ32( p );
00235 return r;
00236 }
00237
00238 template<>
00239 __forceinline kmp_int64
00240 test_then_inc_acq< kmp_int64 >( volatile kmp_int64 *p )
00241 {
00242 kmp_int64 r;
00243 r = KMP_TEST_THEN_INC_ACQ64( p );
00244 return r;
00245 }
00246
00247
00248 template< typename T >
00249 static __forceinline T
00250 test_then_inc( volatile T *p ) { KMP_ASSERT(0); };
00251
00252 template<>
00253 __forceinline kmp_int32
00254 test_then_inc< kmp_int32 >( volatile kmp_int32 *p )
00255 {
00256 kmp_int32 r;
00257 r = KMP_TEST_THEN_INC32( p );
00258 return r;
00259 }
00260
00261 template<>
00262 __forceinline kmp_int64
00263 test_then_inc< kmp_int64 >( volatile kmp_int64 *p )
00264 {
00265 kmp_int64 r;
00266 r = KMP_TEST_THEN_INC64( p );
00267 return r;
00268 }
00269
00270
00271 template< typename T >
00272 static __forceinline kmp_int32
00273 compare_and_swap( volatile T *p, T c, T s ) { KMP_ASSERT(0); };
00274
00275 template<>
00276 __forceinline kmp_int32
00277 compare_and_swap< kmp_int32 >( volatile kmp_int32 *p, kmp_int32 c, kmp_int32 s )
00278 {
00279 return KMP_COMPARE_AND_STORE_REL32( p, c, s );
00280 }
00281
00282 template<>
00283 __forceinline kmp_int32
00284 compare_and_swap< kmp_int64 >( volatile kmp_int64 *p, kmp_int64 c, kmp_int64 s )
00285 {
00286 return KMP_COMPARE_AND_STORE_REL64( p, c, s );
00287 }
00288
00289
00290
00291
00292
00293
00294 template< typename UT >
00295
00296 static UT
00297 __kmp_wait_yield( volatile UT * spinner,
00298 UT checker,
00299 kmp_uint32 (* pred)( UT, UT )
00300 )
00301 {
00302
00303 register volatile UT * spin = spinner;
00304 register UT check = checker;
00305 register kmp_uint32 spins;
00306 register kmp_uint32 (*f) ( UT, UT ) = pred;
00307 register UT r;
00308
00309 KMP_INIT_YIELD( spins );
00310
00311 while(!f(r = *spin, check)) {
00312
00313
00314
00315
00316
00317 __kmp_static_delay(TRUE);
00318
00319
00320
00321
00322 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
00323 KMP_YIELD_SPIN( spins );
00324 }
00325 return r;
00326 }
00327
00328 template< typename UT >
00329 static kmp_uint32 __kmp_eq( UT value, UT checker) {
00330 return value == checker;
00331 }
00332
00333 template< typename UT >
00334 static kmp_uint32 __kmp_neq( UT value, UT checker) {
00335 return value != checker;
00336 }
00337
00338 template< typename UT >
00339 static kmp_uint32 __kmp_lt( UT value, UT checker) {
00340 return value < checker;
00341 }
00342
00343 template< typename UT >
00344 static kmp_uint32 __kmp_ge( UT value, UT checker) {
00345 return value >= checker;
00346 }
00347
00348 template< typename UT >
00349 static kmp_uint32 __kmp_le( UT value, UT checker) {
00350 return value <= checker;
00351 }
00352
00353
00354
00355
00356
00357 static void
00358 __kmp_dispatch_deo_error( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
00359 {
00360 kmp_info_t *th;
00361
00362 KMP_DEBUG_ASSERT( gtid_ref );
00363
00364 if ( __kmp_env_consistency_check ) {
00365 th = __kmp_threads[*gtid_ref];
00366 if ( th -> th.th_root -> r.r_active
00367 && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) {
00368 __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL );
00369 }
00370 }
00371 }
00372
00373 template< typename UT >
00374 static void
00375 __kmp_dispatch_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
00376 {
00377 typedef typename traits_t< UT >::signed_t ST;
00378 dispatch_private_info_template< UT > * pr;
00379
00380 int gtid = *gtid_ref;
00381
00382 kmp_info_t *th = __kmp_threads[ gtid ];
00383 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
00384
00385 KD_TRACE(100, ("__kmp_dispatch_deo: T#%d called\n", gtid ) );
00386 if ( __kmp_env_consistency_check ) {
00387 pr = reinterpret_cast< dispatch_private_info_template< UT >* >
00388 ( th -> th.th_dispatch -> th_dispatch_pr_current );
00389 if ( pr -> pushed_ws != ct_none ) {
00390 __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL );
00391 }
00392 }
00393
00394 if ( ! th -> th.th_team -> t.t_serialized ) {
00395 dispatch_shared_info_template< UT > * sh = reinterpret_cast< dispatch_shared_info_template< UT >* >
00396 ( th -> th.th_dispatch -> th_dispatch_sh_current );
00397 UT lower;
00398
00399 if ( ! __kmp_env_consistency_check ) {
00400 pr = reinterpret_cast< dispatch_private_info_template< UT >* >
00401 ( th -> th.th_dispatch -> th_dispatch_pr_current );
00402 }
00403 lower = pr->u.p.ordered_lower;
00404
00405 #if ! defined( KMP_GOMP_COMPAT )
00406 if ( __kmp_env_consistency_check ) {
00407 if ( pr->ordered_bumped ) {
00408 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
00409 __kmp_error_construct2(
00410 kmp_i18n_msg_CnsMultipleNesting,
00411 ct_ordered_in_pdo, loc_ref,
00412 & p->stack_data[ p->w_top ]
00413 );
00414 }
00415 }
00416 #endif
00417
00418 KMP_MB();
00419 #ifdef KMP_DEBUG
00420 {
00421 const char * buff;
00422
00423 buff = __kmp_str_format(
00424 "__kmp_dispatch_deo: T#%%d before wait: ordered_iter:%%%s lower:%%%s\n",
00425 traits_t< UT >::spec, traits_t< UT >::spec );
00426 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
00427 __kmp_str_free( &buff );
00428 }
00429 #endif
00430
00431 __kmp_wait_yield< UT >( &sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
00432 );
00433 KMP_MB();
00434 #ifdef KMP_DEBUG
00435 {
00436 const char * buff;
00437
00438 buff = __kmp_str_format(
00439 "__kmp_dispatch_deo: T#%%d after wait: ordered_iter:%%%s lower:%%%s\n",
00440 traits_t< UT >::spec, traits_t< UT >::spec );
00441 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
00442 __kmp_str_free( &buff );
00443 }
00444 #endif
00445 }
00446 KD_TRACE(100, ("__kmp_dispatch_deo: T#%d returned\n", gtid ) );
00447 }
00448
00449 static void
00450 __kmp_dispatch_dxo_error( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
00451 {
00452 kmp_info_t *th;
00453
00454 if ( __kmp_env_consistency_check ) {
00455 th = __kmp_threads[*gtid_ref];
00456 if ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) {
00457 __kmp_pop_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref );
00458 }
00459 }
00460 }
00461
00462 template< typename UT >
00463 static void
00464 __kmp_dispatch_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
00465 {
00466 typedef typename traits_t< UT >::signed_t ST;
00467 dispatch_private_info_template< UT > * pr;
00468
00469 int gtid = *gtid_ref;
00470
00471 kmp_info_t *th = __kmp_threads[ gtid ];
00472 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
00473
00474 KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d called\n", gtid ) );
00475 if ( __kmp_env_consistency_check ) {
00476 pr = reinterpret_cast< dispatch_private_info_template< UT >* >
00477 ( th -> th.th_dispatch -> th_dispatch_pr_current );
00478 if ( pr -> pushed_ws != ct_none ) {
00479 __kmp_pop_sync( gtid, ct_ordered_in_pdo, loc_ref );
00480 }
00481 }
00482
00483 if ( ! th -> th.th_team -> t.t_serialized ) {
00484 dispatch_shared_info_template< UT > * sh = reinterpret_cast< dispatch_shared_info_template< UT >* >
00485 ( th -> th.th_dispatch -> th_dispatch_sh_current );
00486
00487 if ( ! __kmp_env_consistency_check ) {
00488 pr = reinterpret_cast< dispatch_private_info_template< UT >* >
00489 ( th -> th.th_dispatch -> th_dispatch_pr_current );
00490 }
00491
00492 #if ! defined( KMP_GOMP_COMPAT )
00493 if ( __kmp_env_consistency_check ) {
00494 if ( pr->ordered_bumped != 0 ) {
00495 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
00496
00497 __kmp_error_construct2(
00498 kmp_i18n_msg_CnsMultipleNesting,
00499 ct_ordered_in_pdo, loc_ref,
00500 & p->stack_data[ p->w_top ]
00501 );
00502 }
00503 }
00504 #endif
00505
00506 KMP_MB();
00507
00508 pr->ordered_bumped += 1;
00509
00510 KD_TRACE(1000, ("__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
00511 gtid, pr->ordered_bumped ) );
00512
00513 KMP_MB();
00514
00515
00516 test_then_inc< ST >( (volatile ST *) & sh->u.s.ordered_iteration );
00517
00518 KMP_MB();
00519 }
00520 KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d returned\n", gtid ) );
00521 }
00522
00523
00524 template< typename UT >
00525 static __forceinline long double
00526 __kmp_pow(long double x, UT y) {
00527 long double s=1.0L;
00528
00529 KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
00530
00531 while(y) {
00532 if ( y & 1 )
00533 s *= x;
00534 x *= x;
00535 y >>= 1;
00536 }
00537 return s;
00538 }
00539
00540
00541
00542
00543
00544
00545 template< typename T >
00546 static __inline typename traits_t< T >::unsigned_t
00547 __kmp_dispatch_guided_remaining(
00548 T tc,
00549 typename traits_t< T >::floating_t base,
00550 typename traits_t< T >::unsigned_t idx
00551 ) {
00552
00553
00554
00555
00556
00557
00558
00559 typedef typename traits_t< T >::unsigned_t UT;
00560
00561 long double x = tc * __kmp_pow< UT >(base, idx);
00562 UT r = (UT) x;
00563 if ( x == r )
00564 return r;
00565 return r + 1;
00566 }
00567
00568
00569
00570
00571
00572
00573 static int guided_int_param = 2;
00574 static double guided_flt_param = 0.5;
00575
00576
00577
00578 template< typename T >
00579 static void
00580 __kmp_dispatch_init(
00581 ident_t * loc,
00582 int gtid,
00583 enum sched_type schedule,
00584 T lb,
00585 T ub,
00586 typename traits_t< T >::signed_t st,
00587 typename traits_t< T >::signed_t chunk,
00588 int push_ws
00589 ) {
00590 typedef typename traits_t< T >::unsigned_t UT;
00591 typedef typename traits_t< T >::signed_t ST;
00592 typedef typename traits_t< T >::floating_t DBL;
00593 static const int ___kmp_size_type = sizeof( UT );
00594
00595 int active;
00596 T tc;
00597 kmp_info_t * th;
00598 kmp_team_t * team;
00599 kmp_uint32 my_buffer_index;
00600 dispatch_private_info_template< T > * pr;
00601 dispatch_shared_info_template< UT > volatile * sh;
00602
00603 KMP_BUILD_ASSERT( sizeof( dispatch_private_info_template< T > ) == sizeof( dispatch_private_info ) );
00604 KMP_BUILD_ASSERT( sizeof( dispatch_shared_info_template< UT > ) == sizeof( dispatch_shared_info ) );
00605
00606 if ( ! TCR_4( __kmp_init_parallel ) )
00607 __kmp_parallel_initialize();
00608
00609 #ifdef KMP_DEBUG
00610 {
00611 const char * buff;
00612
00613 buff = __kmp_str_format(
00614 "__kmp_dispatch_init: T#%%d called: schedule:%%d chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
00615 traits_t< ST >::spec, traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
00616 KD_TRACE(10, ( buff, gtid, schedule, chunk, lb, ub, st ) );
00617 __kmp_str_free( &buff );
00618 }
00619 #endif
00620
00621 th = __kmp_threads[ gtid ];
00622 team = th -> th.th_team;
00623 active = ! team -> t.t_serialized;
00624 th->th.th_ident = loc;
00625
00626 if ( ! active ) {
00627 pr = reinterpret_cast< dispatch_private_info_template< T >* >
00628 ( th -> th.th_dispatch -> th_disp_buffer );
00629 } else {
00630 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
00631 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
00632
00633 my_buffer_index = th->th.th_dispatch->th_disp_index ++;
00634
00635
00636 pr = reinterpret_cast< dispatch_private_info_template< T > * >
00637 ( &th -> th.th_dispatch -> th_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
00638 sh = reinterpret_cast< dispatch_shared_info_template< UT > volatile * >
00639 ( &team -> t.t_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
00640 }
00641
00642
00643 if ( (schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper) ) {
00644 pr->nomerge = TRUE;
00645 schedule = (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower));
00646 } else {
00647 pr->nomerge = FALSE;
00648 }
00649 pr->type_size = ___kmp_size_type;
00650 if ( kmp_ord_lower & schedule ) {
00651 pr->ordered = TRUE;
00652 schedule = (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower));
00653 } else {
00654 pr->ordered = FALSE;
00655 }
00656 if ( schedule == kmp_sch_static ) {
00657 schedule = __kmp_static;
00658 } else {
00659 if ( schedule == kmp_sch_runtime ) {
00660 #if OMP_30_ENABLED
00661
00662 schedule = team -> t.t_sched.r_sched_type;
00663
00664 if ( schedule == kmp_sch_guided_chunked ) {
00665 schedule = __kmp_guided;
00666 } else if ( schedule == kmp_sch_static ) {
00667 schedule = __kmp_static;
00668 }
00669
00670 chunk = team -> t.t_sched.chunk;
00671 #else
00672 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
00673
00674 schedule = r_sched.r_sched_type;
00675 chunk = r_sched.chunk;
00676 #endif
00677
00678 #ifdef KMP_DEBUG
00679 {
00680 const char * buff;
00681
00682 buff = __kmp_str_format(
00683 "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n",
00684 traits_t< ST >::spec );
00685 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
00686 __kmp_str_free( &buff );
00687 }
00688 #endif
00689 } else {
00690 if ( schedule == kmp_sch_guided_chunked ) {
00691 schedule = __kmp_guided;
00692 }
00693 if ( chunk <= 0 ) {
00694 chunk = KMP_DEFAULT_CHUNK;
00695 }
00696 }
00697
00698 #if OMP_30_ENABLED
00699 if ( schedule == kmp_sch_auto ) {
00700
00701 schedule = __kmp_auto;
00702 #ifdef KMP_DEBUG
00703 {
00704 const char * buff;
00705
00706 buff = __kmp_str_format(
00707 "__kmp_dispatch_init: kmp_sch_auto: T#%%d new: schedule:%%d chunk:%%%s\n",
00708 traits_t< ST >::spec );
00709 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
00710 __kmp_str_free( &buff );
00711 }
00712 #endif
00713 }
00714 #endif // OMP_30_ENABLED
00715
00716
00717 if ( team->t.t_nproc > 1<<20 && schedule == kmp_sch_guided_analytical_chunked ) {
00718 schedule = kmp_sch_guided_iterative_chunked;
00719 KMP_WARNING( DispatchManyThreads );
00720 }
00721 pr->u.p.parm1 = chunk;
00722 }
00723 KMP_ASSERT2( (kmp_sch_lower < schedule && schedule < kmp_sch_upper),
00724 "unknown scheduling type" );
00725
00726 pr->u.p.count = 0;
00727
00728 if ( __kmp_env_consistency_check ) {
00729 if ( st == 0 ) {
00730 __kmp_error_construct(
00731 kmp_i18n_msg_CnsLoopIncrZeroProhibited,
00732 ( pr->ordered ? ct_pdo_ordered : ct_pdo ), loc
00733 );
00734 }
00735 }
00736
00737 tc = ( ub - lb + st );
00738 if ( st != 1 ) {
00739 if ( st < 0 ) {
00740 if ( lb < ub ) {
00741 tc = 0;
00742 } else {
00743 tc = (ST)tc / st;
00744 }
00745 } else {
00746 if ( ub < lb ) {
00747 tc = 0;
00748 } else {
00749 tc /= st;
00750 }
00751 }
00752 } else if ( ub < lb ) {
00753 tc = 0;
00754 }
00755
00756 pr->u.p.lb = lb;
00757 pr->u.p.ub = ub;
00758 pr->u.p.st = st;
00759 pr->u.p.tc = tc;
00760
00761 #if KMP_OS_WINDOWS
00762 pr->u.p.last_upper = ub + st;
00763 #endif
00764
00765
00766
00767 if ( active ) {
00768 if ( pr->ordered == 0 ) {
00769 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo_error;
00770 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo_error;
00771 } else {
00772 pr->ordered_bumped = 0;
00773
00774 pr->u.p.ordered_lower = 1;
00775 pr->u.p.ordered_upper = 0;
00776
00777 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo< UT >;
00778 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo< UT >;
00779 }
00780 }
00781
00782 if ( __kmp_env_consistency_check ) {
00783 enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo;
00784 if ( push_ws ) {
00785 __kmp_push_workshare( gtid, ws, loc );
00786 pr->pushed_ws = ws;
00787 } else {
00788 __kmp_check_workshare( gtid, ws, loc );
00789 pr->pushed_ws = ct_none;
00790 }
00791 }
00792
00793 switch ( schedule ) {
00794 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
00795 case kmp_sch_static_steal:
00796 {
00797 T nproc = team->t.t_nproc;
00798 T ntc, init;
00799
00800 KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid ) );
00801
00802 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
00803 if ( nproc > 1 && ntc >= nproc ) {
00804 T id = __kmp_tid_from_gtid(gtid);
00805 T small_chunk, extras;
00806
00807 small_chunk = ntc / nproc;
00808 extras = ntc % nproc;
00809
00810 init = id * small_chunk + ( id < extras ? id : extras );
00811 pr->u.p.count = init;
00812 pr->u.p.ub = init + small_chunk + ( id < extras ? 1 : 0 );
00813
00814 pr->u.p.parm2 = lb;
00815
00816 pr->u.p.parm4 = id;
00817 pr->u.p.st = st;
00818 break;
00819 } else {
00820 KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_balanced\n",
00821 gtid ) );
00822 schedule = kmp_sch_static_balanced;
00823
00824 }
00825
00826 }
00827 #endif
00828 case kmp_sch_static_balanced:
00829 {
00830 T nproc = team->t.t_nproc;
00831 T init, limit;
00832
00833 KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n",
00834 gtid ) );
00835
00836 if ( nproc > 1 ) {
00837 T id = __kmp_tid_from_gtid(gtid);
00838
00839 if ( tc < nproc ) {
00840 if ( id < tc ) {
00841 init = id;
00842 limit = id;
00843 pr->u.p.parm1 = (id == tc - 1);
00844 } else {
00845 pr->u.p.count = 1;
00846 pr->u.p.parm1 = FALSE;
00847 break;
00848 }
00849 } else {
00850 T small_chunk = tc / nproc;
00851 T extras = tc % nproc;
00852 init = id * small_chunk + (id < extras ? id : extras);
00853 limit = init + small_chunk - (id < extras ? 0 : 1);
00854 pr->u.p.parm1 = (id == nproc - 1);
00855 }
00856 } else {
00857 if ( tc > 0 ) {
00858 init = 0;
00859 limit = tc - 1;
00860 pr->u.p.parm1 = TRUE;
00861 } else {
00862
00863 pr->u.p.count = 1;
00864 pr->u.p.parm1 = FALSE;
00865 break;
00866 }
00867 }
00868 if ( st == 1 ) {
00869 pr->u.p.lb = lb + init;
00870 pr->u.p.ub = lb + limit;
00871 } else {
00872 T ub_tmp = lb + limit * st;
00873 pr->u.p.lb = lb + init * st;
00874
00875 if ( st > 0 ) {
00876 pr->u.p.ub = ( ub_tmp + st > ub ? ub : ub_tmp );
00877 } else {
00878 pr->u.p.ub = ( ub_tmp + st < ub ? ub : ub_tmp );
00879 }
00880 }
00881 if ( pr->ordered ) {
00882 pr->u.p.ordered_lower = init;
00883 pr->u.p.ordered_upper = limit;
00884 }
00885 break;
00886 }
00887 case kmp_sch_guided_iterative_chunked :
00888 {
00889 int nproc = team->t.t_nproc;
00890 KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked case\n",gtid));
00891
00892 if ( nproc > 1 ) {
00893 if ( (2UL * chunk + 1 ) * nproc >= tc ) {
00894
00895 schedule = kmp_sch_dynamic_chunked;
00896 } else {
00897
00898 pr->u.p.parm2 = guided_int_param * nproc * ( chunk + 1 );
00899 *(double*)&pr->u.p.parm3 = guided_flt_param / nproc;
00900 }
00901 } else {
00902 KD_TRACE(100,("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",gtid));
00903 schedule = kmp_sch_static_greedy;
00904
00905 KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
00906 pr->u.p.parm1 = tc;
00907 }
00908 }
00909 break;
00910 case kmp_sch_guided_analytical_chunked:
00911 {
00912 int nproc = team->t.t_nproc;
00913 KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked case\n", gtid));
00914
00915 if ( nproc > 1 ) {
00916 if ( (2UL * chunk + 1 ) * nproc >= tc ) {
00917
00918 schedule = kmp_sch_dynamic_chunked;
00919 } else {
00920
00921 DBL x;
00922
00923 #if KMP_OS_WINDOWS && KMP_ARCH_X86
00924
00925
00926
00927
00928
00929
00930
00931
00932
00933
00934
00935
00936 unsigned int oldFpcw = _control87(0,0x30000);
00937 #endif
00938
00939 long double target = ((long double)chunk * 2 + 1) * nproc / tc;
00940
00941
00942
00943 UT cross;
00944
00945
00946 x = (long double)1.0 - (long double)0.5 / nproc;
00947
00948 #ifdef KMP_DEBUG
00949 {
00950 struct _test_a {
00951 char a;
00952 union {
00953 char b;
00954 DBL d;
00955 };
00956 } t;
00957 ptrdiff_t natural_alignment = (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
00958
00959 KMP_DEBUG_ASSERT( ( ( (ptrdiff_t)&pr->u.p.parm3 ) & ( natural_alignment ) ) == 0 );
00960 }
00961 #endif // KMP_DEBUG
00962
00963
00964 *(DBL*)&pr->u.p.parm3 = x;
00965
00966
00967 {
00968 UT left, right, mid;
00969 long double p;
00970
00971
00972
00973
00974
00975
00976 right = 229;
00977 p = __kmp_pow< UT >(x,right);
00978 if ( p > target ) {
00979 do{
00980 p *= p;
00981 right <<= 1;
00982 } while(p>target && right < (1<<27));
00983 left = right >> 1;
00984 } else {
00985 left = 0;
00986 }
00987
00988
00989 while ( left + 1 < right ) {
00990 mid = (left + right) / 2;
00991 if ( __kmp_pow< UT >(x,mid) > target ) {
00992 left = mid;
00993 } else {
00994 right = mid;
00995 }
00996 }
00997 cross = right;
00998 }
00999
01000 KMP_ASSERT(cross && __kmp_pow< UT >(x, cross - 1) > target && __kmp_pow< UT >(x, cross) <= target);
01001
01002
01003 pr->u.p.parm2 = cross;
01004
01005
01006 #if ( ( KMP_OS_LINUX || KMP_OS_WINDOWS ) && KMP_ARCH_X86 ) && ( ! defined( KMP_I8 ) )
01007 #define GUIDED_ANALYTICAL_WORKAROUND (*( DBL * )&pr->u.p.parm3)
01008 #else
01009 #define GUIDED_ANALYTICAL_WORKAROUND (x)
01010 #endif
01011
01012 pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk;
01013 #if KMP_OS_WINDOWS && KMP_ARCH_X86
01014
01015 _control87(oldFpcw,0x30000);
01016 #endif
01017 }
01018 } else {
01019 KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",
01020 gtid ) );
01021 schedule = kmp_sch_static_greedy;
01022
01023 pr->u.p.parm1 = tc;
01024 }
01025 }
01026 break;
01027 case kmp_sch_static_greedy:
01028 KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
01029 pr->u.p.parm1 = ( team -> t.t_nproc > 1 ) ?
01030 ( tc + team->t.t_nproc - 1 ) / team->t.t_nproc :
01031 tc;
01032 break;
01033 case kmp_sch_static_chunked :
01034 case kmp_sch_dynamic_chunked :
01035 KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid));
01036 break;
01037 case kmp_sch_trapezoidal :
01038 {
01039
01040
01041 T parm1, parm2, parm3, parm4;
01042 KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid ) );
01043
01044 parm1 = chunk;
01045
01046
01047 parm2 = ( tc / (2 * team->t.t_nproc) );
01048
01049 if ( parm2 < 1 ) {
01050 parm2 = 1;
01051 }
01052
01053
01054
01055
01056 if ( parm1 < 1 ) {
01057 parm1 = 1;
01058 } else if ( parm1 > parm2 ) {
01059 parm1 = parm2;
01060 }
01061
01062
01063 parm3 = ( parm2 + parm1 );
01064 parm3 = ( 2 * tc + parm3 - 1) / parm3;
01065
01066 if ( parm3 < 2 ) {
01067 parm3 = 2;
01068 }
01069
01070
01071 parm4 = ( parm3 - 1 );
01072 parm4 = ( parm2 - parm1 ) / parm4;
01073
01074
01075
01076
01077
01078
01079 pr->u.p.parm1 = parm1;
01080 pr->u.p.parm2 = parm2;
01081 pr->u.p.parm3 = parm3;
01082 pr->u.p.parm4 = parm4;
01083 }
01084 break;
01085
01086 default:
01087 {
01088 __kmp_msg(
01089 kmp_ms_fatal,
01090 KMP_MSG( UnknownSchedTypeDetected ),
01091 KMP_HNT( GetNewerLibrary ),
01092 __kmp_msg_null
01093 );
01094 }
01095 break;
01096 }
01097 pr->schedule = schedule;
01098 if ( active ) {
01099
01100
01101 KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d sh->buffer_index:%d\n",
01102 gtid, my_buffer_index, sh->buffer_index) );
01103 __kmp_wait_yield< kmp_uint32 >( & sh->buffer_index, my_buffer_index, __kmp_eq< kmp_uint32 >
01104 );
01105
01106
01107 KMP_MB();
01108 KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d sh->buffer_index:%d\n",
01109 gtid, my_buffer_index, sh->buffer_index) );
01110
01111 th -> th.th_dispatch -> th_dispatch_pr_current = (dispatch_private_info_t*) pr;
01112 th -> th.th_dispatch -> th_dispatch_sh_current = (dispatch_shared_info_t*) sh;
01113 };
01114 #ifdef KMP_DEBUG
01115 {
01116 const char * buff;
01117
01118 buff = __kmp_str_format(
01119 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s lb:%%%s ub:%%%s" \
01120 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" \
01121 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
01122 traits_t< UT >::spec, traits_t< T >::spec, traits_t< T >::spec,
01123 traits_t< ST >::spec, traits_t< UT >::spec, traits_t< UT >::spec,
01124 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< T >::spec,
01125 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec );
01126 KD_TRACE(10, ( buff,
01127 gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub,
01128 pr->u.p.st, pr->u.p.tc, pr->u.p.count,
01129 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
01130 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4 ) );
01131 __kmp_str_free( &buff );
01132 }
01133 #endif
01134 #if ( KMP_STATIC_STEAL_ENABLED )
01135 if ( ___kmp_size_type < 8 ) {
01136
01137
01138
01139
01140
01141 if( schedule == kmp_sch_static_steal ) {
01142
01143
01144 volatile T * p = &pr->u.p.static_steal_counter;
01145 *p = *p + 1;
01146 }
01147 }
01148 #endif // ( KMP_STATIC_STEAL_ENABLED && USE_STEALING )
01149 #if OMPT_SUPPORT
01150 int tid = __kmp_tid_from_gtid( gtid );
01151 if ((ompt_status == ompt_status_track_callback)) {
01152 if (ompt_callbacks.ompt_callback(ompt_event_loop_begin)) {
01153 ompt_callbacks.ompt_callback(ompt_event_loop_begin)
01154 (team->t.ompt_team_info.parallel_id,
01155 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
01156 }
01157 }
01158 #endif
01159 }
01160
01161
01162
01163
01164
01165
01166
01167
01168 template< typename UT >
01169 static void
01170 __kmp_dispatch_finish( int gtid, ident_t *loc )
01171 {
01172 typedef typename traits_t< UT >::signed_t ST;
01173 kmp_info_t *th = __kmp_threads[ gtid ];
01174
01175 KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid ) );
01176 if ( ! th -> th.th_team -> t.t_serialized ) {
01177
01178 dispatch_private_info_template< UT > * pr =
01179 reinterpret_cast< dispatch_private_info_template< UT >* >
01180 ( th->th.th_dispatch->th_dispatch_pr_current );
01181 dispatch_shared_info_template< UT > volatile * sh =
01182 reinterpret_cast< dispatch_shared_info_template< UT >volatile* >
01183 ( th->th.th_dispatch->th_dispatch_sh_current );
01184 KMP_DEBUG_ASSERT( pr );
01185 KMP_DEBUG_ASSERT( sh );
01186 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
01187 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
01188
01189 if ( pr->ordered_bumped ) {
01190 KD_TRACE(1000, ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
01191 gtid ) );
01192 pr->ordered_bumped = 0;
01193 } else {
01194 UT lower = pr->u.p.ordered_lower;
01195
01196 #ifdef KMP_DEBUG
01197 {
01198 const char * buff;
01199
01200 buff = __kmp_str_format(
01201 "__kmp_dispatch_finish: T#%%d before wait: ordered_iteration:%%%s lower:%%%s\n",
01202 traits_t< UT >::spec, traits_t< UT >::spec );
01203 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
01204 __kmp_str_free( &buff );
01205 }
01206 #endif
01207
01208 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
01209 );
01210 KMP_MB();
01211 #ifdef KMP_DEBUG
01212 {
01213 const char * buff;
01214
01215 buff = __kmp_str_format(
01216 "__kmp_dispatch_finish: T#%%d after wait: ordered_iteration:%%%s lower:%%%s\n",
01217 traits_t< UT >::spec, traits_t< UT >::spec );
01218 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
01219 __kmp_str_free( &buff );
01220 }
01221 #endif
01222
01223 test_then_inc< ST >( (volatile ST *) & sh->u.s.ordered_iteration );
01224 }
01225 }
01226 KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid ) );
01227 #if OMPT_SUPPORT
01228 kmp_info_t *this_thr = __kmp_threads[ gtid ];
01229 kmp_team_t *team = this_thr -> th.th_team;
01230 int tid = __kmp_tid_from_gtid( gtid );
01231 if ((ompt_status == ompt_status_track_callback)) {
01232 if (ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
01233 ompt_callbacks.ompt_callback(ompt_event_loop_end)
01234 (team->t.ompt_team_info.parallel_id,
01235 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
01236 }
01237 }
01238 #endif
01239 }
01240
01241 #ifdef KMP_GOMP_COMPAT
01242
01243 template< typename UT >
01244 static void
01245 __kmp_dispatch_finish_chunk( int gtid, ident_t *loc )
01246 {
01247 typedef typename traits_t< UT >::signed_t ST;
01248 kmp_info_t *th = __kmp_threads[ gtid ];
01249
01250 KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid ) );
01251 if ( ! th -> th.th_team -> t.t_serialized ) {
01252
01253 dispatch_private_info_template< UT > * pr =
01254 reinterpret_cast< dispatch_private_info_template< UT >* >
01255 ( th->th.th_dispatch->th_dispatch_pr_current );
01256 dispatch_shared_info_template< UT > volatile * sh =
01257 reinterpret_cast< dispatch_shared_info_template< UT >volatile* >
01258 ( th->th.th_dispatch->th_dispatch_sh_current );
01259 KMP_DEBUG_ASSERT( pr );
01260 KMP_DEBUG_ASSERT( sh );
01261 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
01262 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
01263
01264
01265 UT lower = pr->u.p.ordered_lower;
01266 UT upper = pr->u.p.ordered_upper;
01267 UT inc = upper - lower + 1;
01268
01269 if ( pr->ordered_bumped == inc ) {
01270 KD_TRACE(1000, ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
01271 gtid ) );
01272 pr->ordered_bumped = 0;
01273 } else {
01274 inc -= pr->ordered_bumped;
01275
01276 #ifdef KMP_DEBUG
01277 {
01278 const char * buff;
01279
01280 buff = __kmp_str_format(
01281 "__kmp_dispatch_finish_chunk: T#%%d before wait: " \
01282 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
01283 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
01284 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower, upper ) );
01285 __kmp_str_free( &buff );
01286 }
01287 #endif
01288
01289 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
01290 );
01291
01292 KMP_MB();
01293 KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting ordered_bumped to zero\n",
01294 gtid ) );
01295 pr->ordered_bumped = 0;
01297 #ifdef KMP_DEBUG
01298 {
01299 const char * buff;
01300
01301 buff = __kmp_str_format(
01302 "__kmp_dispatch_finish_chunk: T#%%d after wait: " \
01303 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
01304 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
01305 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper ) );
01306 __kmp_str_free( &buff );
01307 }
01308 #endif
01309
01310 test_then_add< ST >( (volatile ST *) & sh->u.s.ordered_iteration, inc);
01311 }
01312
01313 }
01314 KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid ) );
01315 }
01316
01317 #endif
01318
01319
01320
01321
01322 #if OMPT_SUPPORT
01323 #define OMPT_LOOP_END \
01324 if (status == 0) { \
01325 kmp_info_t *this_thr = __kmp_threads[ gtid ]; \
01326 kmp_team_t *team = this_thr -> th.th_team; \
01327 int tid = __kmp_tid_from_gtid( gtid ); \
01328 if ((ompt_status == ompt_status_track_callback)) { \
01329 if (ompt_callbacks.ompt_callback(ompt_event_loop_end)) { \
01330 ompt_callbacks.ompt_callback(ompt_event_loop_end) \
01331 (team->t.ompt_team_info.parallel_id, \
01332 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); \
01333 } \
01334 }\
01335 }
01336 #else
01337 #define OMPT_LOOP_END // no-op
01338 #endif
01339
01340 template< typename T >
01341 static int
01342 __kmp_dispatch_next(
01343 ident_t *loc, int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub, typename traits_t< T >::signed_t *p_st
01344 ) {
01345
01346 typedef typename traits_t< T >::unsigned_t UT;
01347 typedef typename traits_t< T >::signed_t ST;
01348 typedef typename traits_t< T >::floating_t DBL;
01349 static const int ___kmp_size_type = sizeof( UT );
01350
01351 int status;
01352 dispatch_private_info_template< T > * pr;
01353 kmp_info_t * th = __kmp_threads[ gtid ];
01354 kmp_team_t * team = th -> th.th_team;
01355
01356 #ifdef KMP_DEBUG
01357 {
01358 const char * buff;
01359
01360 buff = __kmp_str_format(
01361 "__kmp_dispatch_next: T#%%d called p_lb:%%%s p_ub:%%%s p_st:%%%s p_last: %%p\n",
01362 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
01363 KD_TRACE(1000, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last ) );
01364 __kmp_str_free( &buff );
01365 }
01366 #endif
01367
01368 if ( team -> t.t_serialized ) {
01369
01370 pr = reinterpret_cast< dispatch_private_info_template< T >* >
01371 ( th -> th.th_dispatch -> th_disp_buffer );
01372 KMP_DEBUG_ASSERT( pr );
01373
01374 if ( (status = (pr->u.p.tc != 0)) == 0 ) {
01375 *p_lb = 0;
01376 *p_ub = 0;
01377 if ( p_st != 0 ) {
01378 *p_st = 0;
01379 }
01380 if ( __kmp_env_consistency_check ) {
01381 if ( pr->pushed_ws != ct_none ) {
01382 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
01383 }
01384 }
01385 } else if ( pr->nomerge ) {
01386 kmp_int32 last;
01387 T start;
01388 UT limit, trip, init;
01389 ST incr;
01390 T chunk = pr->u.p.parm1;
01391
01392 KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid ) );
01393
01394 init = chunk * pr->u.p.count++;
01395 trip = pr->u.p.tc - 1;
01396
01397 if ( (status = (init <= trip)) == 0 ) {
01398 *p_lb = 0;
01399 *p_ub = 0;
01400 if ( p_st != 0 ) *p_st = 0;
01401 if ( __kmp_env_consistency_check ) {
01402 if ( pr->pushed_ws != ct_none ) {
01403 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
01404 }
01405 }
01406 } else {
01407 start = pr->u.p.lb;
01408 limit = chunk + init - 1;
01409 incr = pr->u.p.st;
01410
01411 if ( (last = (limit >= trip)) != 0 ) {
01412 limit = trip;
01413 #if KMP_OS_WINDOWS
01414 pr->u.p.last_upper = pr->u.p.ub;
01415 #endif
01416 }
01417 if ( p_last ) {
01418 *p_last = last;
01419 }
01420 if ( p_st != 0 ) {
01421 *p_st = incr;
01422 }
01423 if ( incr == 1 ) {
01424 *p_lb = start + init;
01425 *p_ub = start + limit;
01426 } else {
01427 *p_lb = start + init * incr;
01428 *p_ub = start + limit * incr;
01429 }
01430
01431 if ( pr->ordered ) {
01432 pr->u.p.ordered_lower = init;
01433 pr->u.p.ordered_upper = limit;
01434 #ifdef KMP_DEBUG
01435 {
01436 const char * buff;
01437
01438 buff = __kmp_str_format(
01439 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
01440 traits_t< UT >::spec, traits_t< UT >::spec );
01441 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
01442 __kmp_str_free( &buff );
01443 }
01444 #endif
01445 }
01446 }
01447 } else {
01448 pr->u.p.tc = 0;
01449
01450 *p_lb = pr->u.p.lb;
01451 *p_ub = pr->u.p.ub;
01452 #if KMP_OS_WINDOWS
01453 pr->u.p.last_upper = *p_ub;
01454 #endif
01455
01456 if ( p_st != 0 ) {
01457 *p_st = pr->u.p.st;
01458 }
01459 if ( p_last ) {
01460 *p_last = TRUE;
01461 }
01462 }
01463 #ifdef KMP_DEBUG
01464 {
01465 const char * buff;
01466
01467 buff = __kmp_str_format(
01468 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " \
01469 "p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
01470 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
01471 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, *p_st, p_last, status) );
01472 __kmp_str_free( &buff );
01473 }
01474 #endif
01475 OMPT_LOOP_END;
01476 return status;
01477 } else {
01478 kmp_int32 last = 0;
01479 dispatch_shared_info_template< UT > *sh;
01480 T start;
01481 ST incr;
01482 UT limit, trip, init;
01483
01484 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
01485 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
01486
01487 pr = reinterpret_cast< dispatch_private_info_template< T >* >
01488 ( th->th.th_dispatch->th_dispatch_pr_current );
01489 KMP_DEBUG_ASSERT( pr );
01490 sh = reinterpret_cast< dispatch_shared_info_template< UT >* >
01491 ( th->th.th_dispatch->th_dispatch_sh_current );
01492 KMP_DEBUG_ASSERT( sh );
01493
01494 if ( pr->u.p.tc == 0 ) {
01495
01496 status = 0;
01497 } else {
01498 switch (pr->schedule) {
01499 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
01500 case kmp_sch_static_steal:
01501 {
01502 T chunk = pr->u.p.parm1;
01503
01504 KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", gtid) );
01505
01506 trip = pr->u.p.tc - 1;
01507
01508 if ( ___kmp_size_type > 4 ) {
01509
01510
01511 init = ( pr->u.p.count )++;
01512 status = ( init < pr->u.p.ub );
01513 } else {
01514 typedef union {
01515 struct {
01516 UT count;
01517 T ub;
01518 } p;
01519 kmp_int64 b;
01520 } union_i4;
01521
01522
01523 {
01524 union_i4 vold, vnew;
01525 vold.b = *( volatile kmp_int64 * )(&pr->u.p.count);
01526 vnew = vold;
01527 vnew.p.count++;
01528 while( ! KMP_COMPARE_AND_STORE_ACQ64(
01529 ( volatile kmp_int64* )&pr->u.p.count,
01530 *VOLATILE_CAST(kmp_int64 *)&vold.b,
01531 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
01532 KMP_CPU_PAUSE();
01533 vold.b = *( volatile kmp_int64 * )(&pr->u.p.count);
01534 vnew = vold;
01535 vnew.p.count++;
01536 }
01537 vnew = vold;
01538 init = vnew.p.count;
01539 status = ( init < vnew.p.ub ) ;
01540 }
01541
01542 if( !status ) {
01543 kmp_info_t **other_threads = team->t.t_threads;
01544 int while_limit = 10;
01545 int while_index = 0;
01546
01547
01548
01549 while ( ( !status ) && ( while_limit != ++while_index ) ) {
01550 union_i4 vold, vnew;
01551 kmp_int32 remaining;
01552 T victimIdx = pr->u.p.parm4;
01553 T oldVictimIdx = victimIdx;
01554 dispatch_private_info_template< T > * victim;
01555
01556 do {
01557 if( !victimIdx ) {
01558 victimIdx = team->t.t_nproc - 1;
01559 } else {
01560 --victimIdx;
01561 }
01562 victim = reinterpret_cast< dispatch_private_info_template< T >* >
01563 ( other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current );
01564 } while ( (victim == NULL || victim == pr) && oldVictimIdx != victimIdx );
01565
01566 if ( ( !victim ) ||
01567 ( (*( volatile T * )&victim->u.p.static_steal_counter) !=
01568 (*( volatile T * )&pr->u.p.static_steal_counter) ) ) {
01569
01570 continue;
01571
01572
01573 }
01574 if ( oldVictimIdx == victimIdx ) {
01575 break;
01576 }
01577 pr->u.p.parm4 = victimIdx;
01578
01579 while( 1 ) {
01580 vold.b = *( volatile kmp_int64 * )( &victim->u.p.count );
01581 vnew = vold;
01582
01583 KMP_DEBUG_ASSERT( (vnew.p.ub - 1) * chunk <= trip );
01584 if ( vnew.p.count >= vnew.p.ub || (remaining = vnew.p.ub - vnew.p.count) < 4 ) {
01585 break;
01586 }
01587 vnew.p.ub -= (remaining >> 2);
01588 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * chunk <= trip);
01589 #pragma warning( push )
01590
01591 #pragma warning( disable: 186 )
01592 KMP_DEBUG_ASSERT(vnew.p.ub >= 0);
01593 #pragma warning( pop )
01594
01595 if ( KMP_COMPARE_AND_STORE_ACQ64(
01596 ( volatile kmp_int64 * )&victim->u.p.count,
01597 *VOLATILE_CAST(kmp_int64 *)&vold.b,
01598 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
01599 status = 1;
01600 while_index = 0;
01601
01602 #if KMP_ARCH_X86
01603
01604
01605
01606
01607 init = vold.p.count;
01608 pr->u.p.ub = 0;
01609 pr->u.p.count = init + 1;
01610 pr->u.p.ub = vnew.p.count;
01611 #else
01612 init = vnew.p.ub;
01613 vold.p.count = init + 1;
01614
01615 *( volatile kmp_int64 * )(&pr->u.p.count) = vold.b;
01616 #endif // KMP_ARCH_X86
01617 break;
01618 }
01619 KMP_CPU_PAUSE();
01620 }
01621 }
01622 }
01623 }
01624 if ( !status ) {
01625 *p_lb = 0;
01626 *p_ub = 0;
01627 if ( p_st != 0 ) *p_st = 0;
01628 } else {
01629 start = pr->u.p.parm2;
01630 init *= chunk;
01631 limit = chunk + init - 1;
01632 incr = pr->u.p.st;
01633
01634 KMP_DEBUG_ASSERT(init <= trip);
01635 if ( (last = (limit >= trip)) != 0 )
01636 limit = trip;
01637 if ( p_last ) {
01638 *p_last = last;
01639 }
01640 if ( p_st != 0 ) *p_st = incr;
01641
01642 if ( incr == 1 ) {
01643 *p_lb = start + init;
01644 *p_ub = start + limit;
01645 } else {
01646 *p_lb = start + init * incr;
01647 *p_ub = start + limit * incr;
01648 }
01649
01650 if ( pr->ordered ) {
01651 pr->u.p.ordered_lower = init;
01652 pr->u.p.ordered_upper = limit;
01653 #ifdef KMP_DEBUG
01654 {
01655 const char * buff;
01656
01657 buff = __kmp_str_format(
01658 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
01659 traits_t< UT >::spec, traits_t< UT >::spec );
01660 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
01661 __kmp_str_free( &buff );
01662 }
01663 #endif
01664 }
01665 }
01666 break;
01667 }
01668 #endif // ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
01669 case kmp_sch_static_balanced:
01670 {
01671 KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid) );
01672 if ( (status = !pr->u.p.count) != 0 ) {
01673 pr->u.p.count = 1;
01674 *p_lb = pr->u.p.lb;
01675 *p_ub = pr->u.p.ub;
01676 last = pr->u.p.parm1;
01677 if ( p_last ) {
01678 *p_last = last;
01679 }
01680 if ( p_st )
01681 *p_st = pr->u.p.st;
01682 } else {
01683 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
01684 }
01685 if ( pr->ordered ) {
01686 #ifdef KMP_DEBUG
01687 {
01688 const char * buff;
01689
01690 buff = __kmp_str_format(
01691 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
01692 traits_t< UT >::spec, traits_t< UT >::spec );
01693 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
01694 __kmp_str_free( &buff );
01695 }
01696 #endif
01697 }
01698 }
01699 break;
01700 case kmp_sch_static_greedy:
01701 case kmp_sch_static_chunked:
01702 {
01703 T parm1;
01704
01705 KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_[affinity|chunked] case\n",
01706 gtid ) );
01707 parm1 = pr->u.p.parm1;
01708
01709 trip = pr->u.p.tc - 1;
01710 init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid));
01711
01712 if ( (status = (init <= trip)) != 0 ) {
01713 start = pr->u.p.lb;
01714 incr = pr->u.p.st;
01715 limit = parm1 + init - 1;
01716
01717 if ( (last = (limit >= trip)) != 0 )
01718 limit = trip;
01719
01720 if ( p_last ) {
01721 *p_last = last;
01722 }
01723 if ( p_st != 0 ) *p_st = incr;
01724
01725 pr->u.p.count += team->t.t_nproc;
01726
01727 if ( incr == 1 ) {
01728 *p_lb = start + init;
01729 *p_ub = start + limit;
01730 }
01731 else {
01732 *p_lb = start + init * incr;
01733 *p_ub = start + limit * incr;
01734 }
01735
01736 if ( pr->ordered ) {
01737 pr->u.p.ordered_lower = init;
01738 pr->u.p.ordered_upper = limit;
01739 #ifdef KMP_DEBUG
01740 {
01741 const char * buff;
01742
01743 buff = __kmp_str_format(
01744 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
01745 traits_t< UT >::spec, traits_t< UT >::spec );
01746 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
01747 __kmp_str_free( &buff );
01748 }
01749 #endif
01750 }
01751 }
01752 }
01753 break;
01754
01755 case kmp_sch_dynamic_chunked:
01756 {
01757 T chunk = pr->u.p.parm1;
01758
01759 KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
01760 gtid ) );
01761
01762 init = chunk * test_then_inc_acq< ST >((volatile ST *) & sh->u.s.iteration );
01763 trip = pr->u.p.tc - 1;
01764
01765 if ( (status = (init <= trip)) == 0 ) {
01766 *p_lb = 0;
01767 *p_ub = 0;
01768 if ( p_st != 0 ) *p_st = 0;
01769 } else {
01770 start = pr->u.p.lb;
01771 limit = chunk + init - 1;
01772 incr = pr->u.p.st;
01773
01774 if ( (last = (limit >= trip)) != 0 )
01775 limit = trip;
01776 if ( p_last ) {
01777 *p_last = last;
01778 }
01779 if ( p_st != 0 ) *p_st = incr;
01780
01781 if ( incr == 1 ) {
01782 *p_lb = start + init;
01783 *p_ub = start + limit;
01784 } else {
01785 *p_lb = start + init * incr;
01786 *p_ub = start + limit * incr;
01787 }
01788
01789 if ( pr->ordered ) {
01790 pr->u.p.ordered_lower = init;
01791 pr->u.p.ordered_upper = limit;
01792 #ifdef KMP_DEBUG
01793 {
01794 const char * buff;
01795
01796 buff = __kmp_str_format(
01797 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
01798 traits_t< UT >::spec, traits_t< UT >::spec );
01799 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
01800 __kmp_str_free( &buff );
01801 }
01802 #endif
01803 }
01804 }
01805 }
01806 break;
01807
01808 case kmp_sch_guided_iterative_chunked:
01809 {
01810 T chunkspec = pr->u.p.parm1;
01811 KD_TRACE(100,
01812 ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked iterative case\n",gtid));
01813 trip = pr->u.p.tc;
01814
01815 while(1) {
01816 ST remaining;
01817 init = sh->u.s.iteration;
01818 remaining = trip - init;
01819 if ( remaining <= 0 ) {
01820
01821 status = 0;
01822 break;
01823 }
01824 if ( remaining < pr->u.p.parm2 ) {
01825
01826
01827 init = test_then_add<ST>( (ST*)&sh->u.s.iteration, (ST)chunkspec );
01828 remaining = trip - init;
01829 if (remaining <= 0) {
01830 status = 0;
01831 } else {
01832
01833 status = 1;
01834 if ( remaining > chunkspec ) {
01835 limit = init + chunkspec - 1;
01836 } else {
01837 last = 1;
01838 limit = init + remaining - 1;
01839 }
01840 }
01841 break;
01842 }
01843 limit = init + (UT)( remaining * *(double*)&pr->u.p.parm3 );
01844 if ( compare_and_swap<ST>( (ST*)&sh->u.s.iteration, (ST)init, (ST)limit ) ) {
01845
01846 status = 1;
01847 --limit;
01848 break;
01849 }
01850 }
01851 if ( status != 0 ) {
01852 start = pr->u.p.lb;
01853 incr = pr->u.p.st;
01854 if ( p_st != NULL )
01855 *p_st = incr;
01856 if ( p_last != NULL )
01857 *p_last = last;
01858 *p_lb = start + init * incr;
01859 *p_ub = start + limit * incr;
01860 if ( pr->ordered ) {
01861 pr->u.p.ordered_lower = init;
01862 pr->u.p.ordered_upper = limit;
01863 #ifdef KMP_DEBUG
01864 {
01865 const char * buff;
01866
01867 buff = __kmp_str_format(
01868 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
01869 traits_t< UT >::spec, traits_t< UT >::spec );
01870 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
01871 __kmp_str_free( &buff );
01872 }
01873 #endif
01874 }
01875 } else {
01876 *p_lb = 0;
01877 *p_ub = 0;
01878 if ( p_st != NULL )
01879 *p_st = 0;
01880 }
01881 }
01882 break;
01883
01884 case kmp_sch_guided_analytical_chunked:
01885 {
01886 T chunkspec = pr->u.p.parm1;
01887 UT chunkIdx;
01888 #if KMP_OS_WINDOWS && KMP_ARCH_X86
01889
01890
01891 unsigned int oldFpcw;
01892 int fpcwSet = 0;
01893 #endif
01894 KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n",
01895 gtid ) );
01896
01897 trip = pr->u.p.tc;
01898
01899 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
01900 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * team->t.t_nproc < trip);
01901
01902 while(1) {
01903 chunkIdx = test_then_inc_acq< ST >((volatile ST *) & sh->u.s.iteration );
01904 if ( chunkIdx >= pr->u.p.parm2 ) {
01905 --trip;
01906
01907 init = chunkIdx * chunkspec + pr->u.p.count;
01908
01909 if ( (status = (init > 0 && init <= trip)) != 0 ) {
01910 limit = init + chunkspec -1;
01911
01912 if ( (last = (limit >= trip)) != 0 )
01913 limit = trip;
01914 }
01915 break;
01916 } else {
01917
01918
01919
01920
01921 #if KMP_OS_WINDOWS && KMP_ARCH_X86
01922
01923
01924
01925 if ( !fpcwSet ) {
01926 oldFpcw = _control87(0,0x30000);
01927 fpcwSet = 0x30000;
01928 }
01929 #endif
01930 if ( chunkIdx ) {
01931 init = __kmp_dispatch_guided_remaining< T >(
01932 trip, *( DBL * )&pr->u.p.parm3, chunkIdx );
01933 KMP_DEBUG_ASSERT(init);
01934 init = trip - init;
01935 } else
01936 init = 0;
01937 limit = trip - __kmp_dispatch_guided_remaining< T >(
01938 trip, *( DBL * )&pr->u.p.parm3, chunkIdx + 1 );
01939 KMP_ASSERT(init <= limit);
01940 if ( init < limit ) {
01941 KMP_DEBUG_ASSERT(limit <= trip);
01942 --limit;
01943 status = 1;
01944 break;
01945 }
01946 }
01947 }
01948 #if KMP_OS_WINDOWS && KMP_ARCH_X86
01949
01950 if ( oldFpcw & fpcwSet != 0 )
01951 _control87(oldFpcw,0x30000);
01952 #endif
01953 if ( status != 0 ) {
01954 start = pr->u.p.lb;
01955 incr = pr->u.p.st;
01956 if ( p_st != NULL )
01957 *p_st = incr;
01958 if ( p_last != NULL )
01959 *p_last = last;
01960 *p_lb = start + init * incr;
01961 *p_ub = start + limit * incr;
01962 if ( pr->ordered ) {
01963 pr->u.p.ordered_lower = init;
01964 pr->u.p.ordered_upper = limit;
01965 #ifdef KMP_DEBUG
01966 {
01967 const char * buff;
01968
01969 buff = __kmp_str_format(
01970 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
01971 traits_t< UT >::spec, traits_t< UT >::spec );
01972 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
01973 __kmp_str_free( &buff );
01974 }
01975 #endif
01976 }
01977 } else {
01978 *p_lb = 0;
01979 *p_ub = 0;
01980 if ( p_st != NULL )
01981 *p_st = 0;
01982 }
01983 }
01984 break;
01985
01986 case kmp_sch_trapezoidal:
01987 {
01988 UT index;
01989 T parm2 = pr->u.p.parm2;
01990 T parm3 = pr->u.p.parm3;
01991 T parm4 = pr->u.p.parm4;
01992 KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n",
01993 gtid ) );
01994
01995 index = test_then_inc< ST >( (volatile ST *) & sh->u.s.iteration );
01996
01997 init = ( index * ( (2*parm2) - (index-1)*parm4 ) ) / 2;
01998 trip = pr->u.p.tc - 1;
01999
02000 if ( (status = (index < parm3 && init <= trip)) == 0 ) {
02001 *p_lb = 0;
02002 *p_ub = 0;
02003 if ( p_st != 0 ) *p_st = 0;
02004 } else {
02005 start = pr->u.p.lb;
02006 limit = ( (index+1) * ( 2*parm2 - index*parm4 ) ) / 2 - 1;
02007 incr = pr->u.p.st;
02008
02009 if ( (last = (limit >= trip)) != 0 )
02010 limit = trip;
02011
02012 if ( p_last != 0 ) {
02013 *p_last = last;
02014 }
02015 if ( p_st != 0 ) *p_st = incr;
02016
02017 if ( incr == 1 ) {
02018 *p_lb = start + init;
02019 *p_ub = start + limit;
02020 } else {
02021 *p_lb = start + init * incr;
02022 *p_ub = start + limit * incr;
02023 }
02024
02025 if ( pr->ordered ) {
02026 pr->u.p.ordered_lower = init;
02027 pr->u.p.ordered_upper = limit;
02028 #ifdef KMP_DEBUG
02029 {
02030 const char * buff;
02031
02032 buff = __kmp_str_format(
02033 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
02034 traits_t< UT >::spec, traits_t< UT >::spec );
02035 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
02036 __kmp_str_free( &buff );
02037 }
02038 #endif
02039 }
02040 }
02041 }
02042 break;
02043 }
02044 }
02045
02046 if ( status == 0 ) {
02047 UT num_done;
02048
02049 num_done = test_then_inc< ST >( (volatile ST *) & sh->u.s.num_done );
02050 #ifdef KMP_DEBUG
02051 {
02052 const char * buff;
02053
02054 buff = __kmp_str_format(
02055 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
02056 traits_t< UT >::spec );
02057 KD_TRACE(100, ( buff, gtid, sh->u.s.num_done ) );
02058 __kmp_str_free( &buff );
02059 }
02060 #endif
02061
02062 if ( num_done == team->t.t_nproc-1 ) {
02063
02064
02065 KMP_MB();
02066
02067 sh->u.s.num_done = 0;
02068 sh->u.s.iteration = 0;
02069
02070
02071 if ( pr->ordered ) {
02072 sh->u.s.ordered_iteration = 0;
02073 }
02074
02075 KMP_MB();
02076
02077 sh -> buffer_index += KMP_MAX_DISP_BUF;
02078 KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n",
02079 gtid, sh->buffer_index) );
02080
02081 KMP_MB();
02082
02083 }
02084 if ( __kmp_env_consistency_check ) {
02085 if ( pr->pushed_ws != ct_none ) {
02086 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
02087 }
02088 }
02089
02090 th -> th.th_dispatch -> th_deo_fcn = NULL;
02091 th -> th.th_dispatch -> th_dxo_fcn = NULL;
02092 th -> th.th_dispatch -> th_dispatch_sh_current = NULL;
02093 th -> th.th_dispatch -> th_dispatch_pr_current = NULL;
02094 }
02095 #if KMP_OS_WINDOWS
02096 else if ( last ) {
02097 pr->u.p.last_upper = pr->u.p.ub;
02098 }
02099 #endif
02100 }
02101
02102 #ifdef KMP_DEBUG
02103 {
02104 const char * buff;
02105
02106 buff = __kmp_str_format(
02107 "__kmp_dispatch_next: T#%%d normal case: " \
02108 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
02109 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
02110 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status ) );
02111 __kmp_str_free( &buff );
02112 }
02113 #endif
02114 OMPT_LOOP_END;
02115 return status;
02116 }
02117
02118
02119
02120
02121
02122
02123 extern "C" {
02124
02140 void
02141 __kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
02142 kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
02143 {
02144 KMP_DEBUG_ASSERT( __kmp_init_serial );
02145 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
02146 }
02150 void
02151 __kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
02152 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
02153 {
02154 KMP_DEBUG_ASSERT( __kmp_init_serial );
02155 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
02156 }
02157
02161 void
02162 __kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
02163 kmp_int64 lb, kmp_int64 ub,
02164 kmp_int64 st, kmp_int64 chunk )
02165 {
02166 KMP_DEBUG_ASSERT( __kmp_init_serial );
02167 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
02168 }
02169
02173 void
02174 __kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
02175 kmp_uint64 lb, kmp_uint64 ub,
02176 kmp_int64 st, kmp_int64 chunk )
02177 {
02178 KMP_DEBUG_ASSERT( __kmp_init_serial );
02179 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
02180 }
02181
02194 int
02195 __kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
02196 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st )
02197 {
02198 return __kmp_dispatch_next< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
02199 }
02200
02204 int
02205 __kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
02206 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st )
02207 {
02208 return __kmp_dispatch_next< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
02209 }
02210
02214 int
02215 __kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
02216 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st )
02217 {
02218 return __kmp_dispatch_next< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
02219 }
02220
02224 int
02225 __kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
02226 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st )
02227 {
02228 return __kmp_dispatch_next< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
02229 }
02230
02237 void
02238 __kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid )
02239 {
02240 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
02241 }
02242
02246 void
02247 __kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid )
02248 {
02249 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
02250 }
02251
02255 void
02256 __kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid )
02257 {
02258 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
02259 }
02260
02264 void
02265 __kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid )
02266 {
02267 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
02268 }
02271
02272
02273
02274 kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker) {
02275 return value == checker;
02276 }
02277
02278 kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker) {
02279 return value != checker;
02280 }
02281
02282 kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker) {
02283 return value < checker;
02284 }
02285
02286 kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker) {
02287 return value >= checker;
02288 }
02289
02290 kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker) {
02291 return value <= checker;
02292 }
02293 kmp_uint32 __kmp_eq_8( kmp_uint64 value, kmp_uint64 checker) {
02294 return value == checker;
02295 }
02296
02297 kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker) {
02298 return value != checker;
02299 }
02300
02301 kmp_uint32 __kmp_lt_8( kmp_uint64 value, kmp_uint64 checker) {
02302 return value < checker;
02303 }
02304
02305 kmp_uint32 __kmp_ge_8( kmp_uint64 value, kmp_uint64 checker) {
02306 return value >= checker;
02307 }
02308
02309 kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker) {
02310 return value <= checker;
02311 }
02312
02313 kmp_uint32
02314 __kmp_wait_yield_4(volatile kmp_uint32 * spinner,
02315 kmp_uint32 checker,
02316 kmp_uint32 (* pred)( kmp_uint32, kmp_uint32 )
02317 , void * obj
02318 )
02319 {
02320
02321 register volatile kmp_uint32 * spin = spinner;
02322 register kmp_uint32 check = checker;
02323 register kmp_uint32 spins;
02324 register kmp_uint32 (*f) ( kmp_uint32, kmp_uint32 ) = pred;
02325 register kmp_uint32 r;
02326
02327 KMP_INIT_YIELD( spins );
02328
02329 while(!f(r = TCR_4(*spin), check)) {
02330
02331
02332
02333
02334
02335 __kmp_static_delay(TRUE);
02336
02337
02338
02339 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
02340 KMP_YIELD_SPIN( spins );
02341 }
02342
02343 return r;
02344 }
02345
02346 kmp_uint64
02347 __kmp_wait_yield_8( volatile kmp_uint64 * spinner,
02348 kmp_uint64 checker,
02349 kmp_uint32 (* pred)( kmp_uint64, kmp_uint64 )
02350 , void * obj
02351 )
02352 {
02353
02354 register volatile kmp_uint64 * spin = spinner;
02355 register kmp_uint64 check = checker;
02356 register kmp_uint32 spins;
02357 register kmp_uint32 (*f) ( kmp_uint64, kmp_uint64 ) = pred;
02358 register kmp_uint64 r;
02359
02360 KMP_INIT_YIELD( spins );
02361
02362 while(!f(r = *spin, check))
02363 {
02364
02365
02366
02367
02368 __kmp_static_delay(TRUE);
02369
02370
02371
02372
02373 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
02374 KMP_YIELD_SPIN( spins );
02375 }
02376
02377 return r;
02378 }
02379
02380 }
02381
02382 #ifdef KMP_GOMP_COMPAT
02383
02384 void
02385 __kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
02386 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
02387 kmp_int32 chunk, int push_ws )
02388 {
02389 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk,
02390 push_ws );
02391 }
02392
02393 void
02394 __kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
02395 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
02396 kmp_int32 chunk, int push_ws )
02397 {
02398 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk,
02399 push_ws );
02400 }
02401
02402 void
02403 __kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
02404 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
02405 kmp_int64 chunk, int push_ws )
02406 {
02407 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk,
02408 push_ws );
02409 }
02410
02411 void
02412 __kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule,
02413 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
02414 kmp_int64 chunk, int push_ws )
02415 {
02416 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk,
02417 push_ws );
02418 }
02419
02420 void
02421 __kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid )
02422 {
02423 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
02424 }
02425
02426 void
02427 __kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid )
02428 {
02429 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
02430 }
02431
02432 void
02433 __kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid )
02434 {
02435 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
02436 }
02437
02438 void
02439 __kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid )
02440 {
02441 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
02442 }
02443
02444 #endif
02445
02446
02447
02448