00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #include "kmp.h"
00048 #include "kmp_atomic.h"
00049 #include "kmp_wrapper_getpid.h"
00050 #include "kmp_environment.h"
00051 #include "kmp_str.h"
00052 #include "kmp_settings.h"
00053 #include "kmp_i18n.h"
00054 #include "kmp_io.h"
00055 #include "kmp_error.h"
00056
00057 #if OMPT_SUPPORT
00058 #include "ompt-specific.h"
00059 #endif
00060
00061
00062 #define KMP_USE_PRCTL 0
00063 #define KMP_USE_POOLED_ALLOC 0
00064
00065 #if KMP_OS_WINDOWS
00066 #include <process.h>
00067 #endif
00068
00069
00070 #if defined(KMP_GOMP_COMPAT)
00071 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
00072 #endif
00073
00074 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
00075 #if OMP_30_ENABLED
00076 "3.1 (201107)";
00077 #else
00078 "2.5 (200505)";
00079 #endif
00080
00081 #ifdef KMP_DEBUG
00082
00083 char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
00084
00085 char const __kmp_version_perf_v19[] = KMP_VERSION_PREFIX "perf v19: "
00086 #if KMP_PERF_V19 == KMP_ON
00087 "on";
00088 #elif KMP_PERF_V19 == KMP_OFF
00089 "off";
00090 #else
00091 #error "Must specify KMP_PERF_V19 option"
00092 #endif
00093
00094 char const __kmp_version_perf_v106[] = KMP_VERSION_PREFIX "perf v106: "
00095 #if KMP_PERF_V106 == KMP_ON
00096 "on";
00097 #elif KMP_PERF_V106 == KMP_OFF
00098 "off";
00099 #else
00100 #error "Must specify KMP_PERF_V106 option"
00101 #endif
00102
00103 #endif
00104
00105
00106
00107
00108
00109
00110 kmp_info_t __kmp_monitor;
00111
00112
00113
00114
00115
00116
00117
00118 void __kmp_cleanup( void );
00119
00120 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
00121 static void __kmp_initialize_team(
00122 kmp_team_t * team,
00123 int new_nproc,
00124 #if OMP_30_ENABLED
00125 kmp_internal_control_t * new_icvs,
00126 ident_t * loc
00127 #else
00128 int new_set_nproc, int new_set_dynamic, int new_set_nested,
00129 int new_set_blocktime, int new_bt_intervals, int new_bt_set
00130 #endif
00131 );
00132 static void __kmp_partition_places( kmp_team_t *team );
00133 static void __kmp_do_serial_initialize( void );
00134
00135
00136 #ifdef USE_LOAD_BALANCE
00137 static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
00138 #endif
00139
00140 static int __kmp_expand_threads(int nWish, int nNeed);
00141 static int __kmp_unregister_root_other_thread( int gtid );
00142 static void __kmp_unregister_library( void );
00143 static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
00144 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154 int
00155 __kmp_get_global_thread_id( )
00156 {
00157 int i;
00158 kmp_info_t **other_threads;
00159 size_t stack_data;
00160 char *stack_addr;
00161 size_t stack_size;
00162 char *stack_base;
00163
00164 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
00165 __kmp_nth, __kmp_all_nth ));
00166
00167
00168
00169
00170
00171
00172 if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
00173
00174 #ifdef KMP_TDATA_GTID
00175 if ( TCR_4(__kmp_gtid_mode) >= 3) {
00176 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
00177 return __kmp_gtid;
00178 }
00179 #endif
00180 if ( TCR_4(__kmp_gtid_mode) >= 2) {
00181 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
00182 return __kmp_gtid_get_specific();
00183 }
00184 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
00185
00186 stack_addr = (char*) & stack_data;
00187 other_threads = __kmp_threads;
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197
00198
00199
00200
00201
00202 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
00203
00204 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
00205 if( !thr ) continue;
00206
00207 stack_size = (size_t)TCR_PTR(thr -> th.th_info.ds.ds_stacksize);
00208 stack_base = (char *)TCR_PTR(thr -> th.th_info.ds.ds_stackbase);
00209
00210
00211
00212 if( stack_addr <= stack_base ) {
00213 size_t stack_diff = stack_base - stack_addr;
00214
00215 if( stack_diff <= stack_size ) {
00216
00217
00218 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
00219 return i;
00220 }
00221 }
00222 }
00223
00224
00225 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
00226 "thread, using TLS\n" ));
00227 i = __kmp_gtid_get_specific();
00228
00229
00230
00231
00232 if( i<0 ) return i;
00233
00234
00235 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
00236 KMP_FATAL( StackOverflow, i );
00237 }
00238
00239 stack_base = (char *) other_threads[i] -> th.th_info.ds.ds_stackbase;
00240 if( stack_addr > stack_base ) {
00241 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
00242 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
00243 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
00244 } else {
00245 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
00246 }
00247
00248
00249 if ( __kmp_storage_map ) {
00250 char *stack_end = (char *) other_threads[i] -> th.th_info.ds.ds_stackbase;
00251 char *stack_beg = stack_end - other_threads[i] -> th.th_info.ds.ds_stacksize;
00252 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
00253 other_threads[i] -> th.th_info.ds.ds_stacksize,
00254 "th_%d stack (refinement)", i );
00255 }
00256 return i;
00257 }
00258
00259 int
00260 __kmp_get_global_thread_id_reg( )
00261 {
00262 int gtid;
00263
00264 if ( !__kmp_init_serial ) {
00265 gtid = KMP_GTID_DNE;
00266 } else
00267 #ifdef KMP_TDATA_GTID
00268 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
00269 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
00270 gtid = __kmp_gtid;
00271 } else
00272 #endif
00273 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
00274 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
00275 gtid = __kmp_gtid_get_specific();
00276 } else {
00277 KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
00278 gtid = __kmp_get_global_thread_id();
00279 }
00280
00281
00282 if( gtid == KMP_GTID_DNE ) {
00283 KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
00284 "Registering a new gtid.\n" ));
00285 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
00286 if( !__kmp_init_serial ) {
00287 __kmp_do_serial_initialize();
00288 gtid = __kmp_gtid_get_specific();
00289 } else {
00290 gtid = __kmp_register_root(FALSE);
00291 }
00292 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
00293
00294 }
00295
00296 KMP_DEBUG_ASSERT( gtid >=0 );
00297
00298 return gtid;
00299 }
00300
00301
00302 void
00303 __kmp_check_stack_overlap( kmp_info_t *th )
00304 {
00305 int f;
00306 char *stack_beg = NULL;
00307 char *stack_end = NULL;
00308 int gtid;
00309
00310 KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
00311 if ( __kmp_storage_map ) {
00312 stack_end = (char *) th -> th.th_info.ds.ds_stackbase;
00313 stack_beg = stack_end - th -> th.th_info.ds.ds_stacksize;
00314
00315 gtid = __kmp_gtid_from_thread( th );
00316
00317 if (gtid == KMP_GTID_MONITOR) {
00318 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
00319 "th_%s stack (%s)", "mon",
00320 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
00321 } else {
00322 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
00323 "th_%d stack (%s)", gtid,
00324 ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
00325 }
00326 }
00327
00328
00329 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid = __kmp_gtid_from_thread( th )))
00330 {
00331 KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
00332 if ( stack_beg == NULL ) {
00333 stack_end = (char *) th -> th.th_info.ds.ds_stackbase;
00334 stack_beg = stack_end - th -> th.th_info.ds.ds_stacksize;
00335 }
00336
00337 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
00338 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
00339
00340 if( f_th && f_th != th ) {
00341 char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
00342 char *other_stack_beg = other_stack_end -
00343 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
00344 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
00345 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
00346
00347
00348 if ( __kmp_storage_map )
00349 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
00350 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
00351 "th_%d stack (overlapped)",
00352 __kmp_gtid_from_thread( f_th ) );
00353
00354 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
00355 }
00356 }
00357 }
00358 }
00359 KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
00360 }
00361
00362
00363
00364
00365 #ifndef KMP_DEBUG
00366 # define __kmp_static_delay( arg )
00367 #else
00368
00369 static void
00370 __kmp_static_delay( int arg )
00371 {
00372
00373 # if KMP_ARCH_X86_64 && KMP_OS_LINUX
00374 KMP_ASSERT( arg != 0 );
00375 # else
00376 KMP_ASSERT( arg >= 0 );
00377 # endif
00378 }
00379 #endif
00380
00381 static void
00382 __kmp_static_yield( int arg )
00383 {
00384 __kmp_yield( arg );
00385 }
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395 void
00396 __kmp_wait_sleep( kmp_info_t *this_thr,
00397 volatile kmp_uint *spinner,
00398 kmp_uint checker,
00399 int final_spin
00400 )
00401 {
00402
00403 register volatile kmp_uint *spin = spinner;
00404 register kmp_uint check = checker;
00405 register kmp_uint32 spins;
00406 register int hibernate;
00407 int th_gtid, th_tid;
00408 #if OMP_30_ENABLED
00409 int flag = FALSE;
00410 #endif
00411
00412 th_gtid = this_thr->th.th_info.ds.ds_gtid;
00413
00414 if( TCR_4(*spin) == check ) {
00415 return;
00416 }
00417
00418 #if OMPT_SUPPORT
00419 if ((ompt_status == ompt_status_track_callback) &&
00420 (ompt_callbacks.ompt_callback(ompt_event_idle_begin))) {
00421 ompt_callbacks.ompt_callback(ompt_event_idle_begin)();
00422 }
00423 #endif
00424
00425 KA_TRACE( 20, ("__kmp_wait_sleep: T#%d waiting for spin(%p) == %d\n",
00426 th_gtid,
00427 spin, check ) );
00428
00429
00430 KMP_INIT_YIELD( spins );
00431
00432 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
00433
00434
00435
00436
00437 #ifdef KMP_ADJUST_BLOCKTIME
00438 if ( __kmp_zero_bt && ! this_thr->th.th_team_bt_set ) {
00439
00440 hibernate = 0;
00441 } else {
00442 hibernate = this_thr->th.th_team_bt_intervals;
00443 }
00444 #else
00445 hibernate = this_thr->th.th_team_bt_intervals;
00446 #endif
00447 if ( hibernate == 0 ) {
00448 hibernate--;
00449 }
00450 hibernate += TCR_4( __kmp_global.g.g_time.dt.t_value );
00451
00452 KF_TRACE( 20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
00453 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
00454 hibernate - __kmp_global.g.g_time.dt.t_value ));
00455 }
00456
00457 KMP_MB();
00458
00459
00460 while( TCR_4(*spin) != check ) {
00461 int in_pool;
00462
00463 #if OMP_30_ENABLED
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476 kmp_task_team_t * task_team = NULL;
00477 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
00478 task_team = this_thr->th.th_task_team;
00479 if ( task_team != NULL ) {
00480 if ( ! TCR_SYNC_4( task_team->tt.tt_active ) ) {
00481 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( this_thr->th.th_info.ds.ds_tid ) );
00482 __kmp_unref_task_team( task_team, this_thr );
00483 } else if ( KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
00484 __kmp_execute_tasks( this_thr, th_gtid, spin, check, final_spin, &flag
00485 );
00486 }
00487 };
00488 };
00489 #endif
00490
00491 if( TCR_4(__kmp_global.g.g_done) ) {
00492 if( __kmp_global.g.g_abort )
00493 __kmp_abort_thread( );
00494 break;
00495 }
00496
00497 __kmp_static_delay( 1 );
00498
00499
00500
00501 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
00502
00503
00504
00505 KMP_YIELD_SPIN( spins );
00506
00507
00508
00509
00510
00511 in_pool = !!TCR_4(this_thr->th.th_in_pool);
00512 if ( in_pool != !!this_thr->th.th_active_in_pool ) {
00513 if ( in_pool ) {
00514
00515
00516
00517 KMP_TEST_THEN_INC32(
00518 (kmp_int32 *) &__kmp_thread_pool_active_nth );
00519 this_thr->th.th_active_in_pool = TRUE;
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533 }
00534 else {
00535
00536
00537
00538 KMP_TEST_THEN_DEC32(
00539 (kmp_int32 *) &__kmp_thread_pool_active_nth );
00540 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
00541 this_thr->th.th_active_in_pool = FALSE;
00542 }
00543 }
00544
00545 #if OMP_30_ENABLED
00546
00547 if ( ( task_team != NULL ) && TCR_4(task_team->tt.tt_found_tasks) ) {
00548 continue;
00549 }
00550 #endif
00551
00552
00553 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
00554 continue;
00555 }
00556
00557
00558 if( TCR_4( __kmp_global.g.g_time.dt.t_value ) <= hibernate ) {
00559 continue;
00560 }
00561
00562 KF_TRACE( 50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid ) );
00563
00564 __kmp_suspend( th_gtid, spin, check );
00565
00566 if( TCR_4( __kmp_global.g.g_done ) && __kmp_global.g.g_abort ) {
00567 __kmp_abort_thread( );
00568 }
00569
00570
00571
00572 }
00573
00574 #if OMPT_SUPPORT
00575 if ((ompt_status == ompt_status_track_callback) &&
00576 (ompt_callbacks.ompt_callback(ompt_event_idle_end))) {
00577 ompt_callbacks.ompt_callback(ompt_event_idle_end)();
00578 }
00579 #endif
00580
00581 }
00582
00583
00584
00585
00586
00587
00588
00589
00590
00591
00592 void
00593 __kmp_release( kmp_info_t *target_thr, volatile kmp_uint *spin,
00594 enum kmp_mem_fence_type fetchadd_fence )
00595 {
00596 kmp_uint old_spin;
00597 #ifdef KMP_DEBUG
00598 int target_gtid = target_thr->th.th_info.ds.ds_gtid;
00599 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
00600 #endif
00601
00602 KF_TRACE( 20, ( "__kmp_release: T#%d releasing T#%d spin(%p) fence_type(%d)\n",
00603 gtid, target_gtid, spin, fetchadd_fence ));
00604
00605 KMP_DEBUG_ASSERT( spin );
00606
00607 KMP_DEBUG_ASSERT( fetchadd_fence == kmp_acquire_fence ||
00608 fetchadd_fence == kmp_release_fence );
00609
00610 old_spin = ( fetchadd_fence == kmp_acquire_fence )
00611 ? KMP_TEST_THEN_ADD4_ACQ32( (volatile kmp_int32 *) spin )
00612 : KMP_TEST_THEN_ADD4_32( (volatile kmp_int32 *) spin );
00613
00614 KF_TRACE( 100, ( "__kmp_release: T#%d old spin(%p)=%d, set new spin=%d\n",
00615 gtid, spin, old_spin, *spin ) );
00616
00617 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
00618
00619 if ( old_spin & KMP_BARRIER_SLEEP_STATE ) {
00620 #ifndef KMP_DEBUG
00621 int target_gtid = target_thr->th.th_info.ds.ds_gtid;
00622 #endif
00623
00624 KF_TRACE( 50, ( "__kmp_release: T#%d waking up thread T#%d since sleep spin(%p) set\n",
00625 gtid, target_gtid, spin ));
00626 __kmp_resume( target_gtid, spin );
00627 } else {
00628 KF_TRACE( 50, ( "__kmp_release: T#%d don't wake up thread T#%d since sleep spin(%p) not set\n",
00629 gtid, target_gtid, spin ));
00630 }
00631 }
00632
00633 }
00634
00635
00636
00637 void
00638 __kmp_infinite_loop( void )
00639 {
00640 static int done = FALSE;
00641
00642 while (! done) {
00643 KMP_YIELD( 1 );
00644 }
00645 }
00646
00647 #define MAX_MESSAGE 512
00648
00649 void
00650 __kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
00651 char buffer[MAX_MESSAGE];
00652 int node;
00653 va_list ap;
00654
00655 va_start( ap, format);
00656 sprintf( buffer, "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
00657 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
00658 __kmp_vprintf( kmp_err, buffer, ap );
00659 #if KMP_PRINT_DATA_PLACEMENT
00660 if(gtid >= 0) {
00661 if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
00662 if( __kmp_storage_map_verbose ) {
00663 node = __kmp_get_host_node(p1);
00664 if(node < 0)
00665 __kmp_storage_map_verbose = FALSE;
00666 else {
00667 char *last;
00668 int lastNode;
00669 int localProc = __kmp_get_cpu_from_gtid(gtid);
00670
00671 p1 = (void *)( (size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
00672 p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)PAGE_SIZE - 1) );
00673 if(localProc >= 0)
00674 __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1);
00675 else
00676 __kmp_printf_no_lock(" GTID %d\n", gtid);
00677 # if KMP_USE_PRCTL
00678
00679 do {
00680 last = p1;
00681 lastNode = node;
00682
00683 do {
00684 (char*)p1 += PAGE_SIZE;
00685 } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
00686 __kmp_printf_no_lock(" %p-%p memNode %d\n", last,
00687 (char*)p1 - 1, lastNode);
00688 } while(p1 <= p2);
00689 # else
00690 __kmp_printf_no_lock(" %p-%p memNode %d\n", p1,
00691 (char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
00692 if(p1 < p2) {
00693 __kmp_printf_no_lock(" %p-%p memNode %d\n", p2,
00694 (char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
00695 }
00696 # endif
00697 }
00698 }
00699 } else
00700 __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) );
00701 }
00702 #endif
00703 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
00704 }
00705
00706 void
00707 __kmp_warn( char const * format, ... )
00708 {
00709 char buffer[MAX_MESSAGE];
00710 va_list ap;
00711
00712 if ( __kmp_generate_warnings == kmp_warnings_off ) {
00713 return;
00714 }
00715
00716 va_start( ap, format );
00717
00718 snprintf( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
00719 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
00720 __kmp_vprintf( kmp_err, buffer, ap );
00721 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
00722
00723 va_end( ap );
00724 }
00725
00726 void
00727 __kmp_abort_process()
00728 {
00729
00730
00731 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
00732
00733 if ( __kmp_debug_buf ) {
00734 __kmp_dump_debug_buffer();
00735 };
00736
00737 if ( KMP_OS_WINDOWS ) {
00738
00739
00740 __kmp_global.g.g_abort = SIGABRT;
00741
00742
00743
00744
00745
00746
00747
00748
00749
00750
00751
00752 raise( SIGABRT );
00753 _exit( 3 );
00754 } else {
00755 abort();
00756 };
00757
00758 __kmp_infinite_loop();
00759 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
00760
00761 }
00762
00763 void
00764 __kmp_abort_thread( void )
00765 {
00766
00767
00768 __kmp_infinite_loop();
00769 }
00770
00771
00772
00773
00774
00775
00776
00777
00778 static void
00779 __kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
00780 {
00781 __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
00782
00783 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
00784 "th_%d.th_info", gtid );
00785
00786 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
00787 "th_%d.th_local", gtid );
00788
00789 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
00790 sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
00791
00792 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
00793 &thr->th.th_bar[bs_plain_barrier+1],
00794 sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
00795
00796 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
00797 &thr->th.th_bar[bs_forkjoin_barrier+1],
00798 sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
00799
00800 #if KMP_FAST_REDUCTION_BARRIER
00801 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
00802 &thr->th.th_bar[bs_reduction_barrier+1],
00803 sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
00804 #endif // KMP_FAST_REDUCTION_BARRIER
00805 }
00806
00807
00808
00809
00810
00811
00812 static void
00813 __kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
00814 {
00815 int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2;
00816 __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
00817 header, team_id );
00818
00819 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
00820 sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
00821
00822
00823 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
00824 sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
00825
00826 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
00827 sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
00828
00829 #if KMP_FAST_REDUCTION_BARRIER
00830 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
00831 sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
00832 #endif // KMP_FAST_REDUCTION_BARRIER
00833
00834 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
00835 sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
00836
00837 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
00838 sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
00839
00840 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
00841 sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
00842 header, team_id );
00843
00844
00845
00846
00847
00848
00849
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859
00860
00861
00862
00863
00864
00865
00866
00867
00868
00869
00870
00871
00872
00873
00874
00875
00876 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
00877 sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
00878 }
00879
00880 static void __kmp_init_allocator() {}
00881 static void __kmp_fini_allocator() {}
00882 static void __kmp_fini_allocator_thread() {}
00883
00884
00885
00886 #ifdef GUIDEDLL_EXPORTS
00887 # if KMP_OS_WINDOWS
00888
00889
00890 static void
00891 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
00892
00893 __kmp_init_bootstrap_lock( lck );
00894 }
00895
00896 static void
00897 __kmp_reset_locks_on_process_detach( int gtid_req ) {
00898 int i;
00899 int thread_count;
00900
00901
00902
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912 while( 1 ) {
00913 thread_count = 0;
00914 for( i = 0; i < __kmp_threads_capacity; ++i ) {
00915 if( !__kmp_threads ) continue;
00916 kmp_info_t* th = __kmp_threads[ i ];
00917 if( th == NULL ) continue;
00918 int gtid = th->th.th_info.ds.ds_gtid;
00919 if( gtid == gtid_req ) continue;
00920 if( gtid < 0 ) continue;
00921 DWORD exit_val;
00922 int alive = __kmp_is_thread_alive( th, &exit_val );
00923 if( alive ) {
00924 ++thread_count;
00925 }
00926 }
00927 if( thread_count == 0 ) break;
00928 }
00929
00930
00931
00932
00933
00934 __kmp_reset_lock( &__kmp_forkjoin_lock );
00935 #ifdef KMP_DEBUG
00936 __kmp_reset_lock( &__kmp_stdio_lock );
00937 #endif // KMP_DEBUG
00938
00939
00940 }
00941
00942 BOOL WINAPI
00943 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
00944
00945
00946 switch( fdwReason ) {
00947
00948 case DLL_PROCESS_ATTACH:
00949 KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
00950
00951 return TRUE;
00952
00953 case DLL_PROCESS_DETACH:
00954 KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
00955 __kmp_gtid_get_specific() ));
00956
00957 if( lpReserved != NULL )
00958 {
00959
00960
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
00985 }
00986
00987 __kmp_internal_end_library( __kmp_gtid_get_specific() );
00988
00989 return TRUE;
00990
00991 case DLL_THREAD_ATTACH:
00992 KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
00993
00994
00995
00996 return TRUE;
00997
00998 case DLL_THREAD_DETACH:
00999 KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
01000 __kmp_gtid_get_specific() ));
01001
01002 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
01003 return TRUE;
01004 }
01005
01006 return TRUE;
01007 }
01008
01009 # endif
01010 #endif
01011
01012
01013
01014
01015
01016
01017 int
01018 __kmp_change_library( int status )
01019 {
01020 int old_status;
01021
01022 old_status = __kmp_yield_init & 1;
01023
01024 if (status) {
01025 __kmp_yield_init |= 1;
01026 }
01027 else {
01028 __kmp_yield_init &= ~1;
01029 }
01030
01031 return old_status;
01032 }
01033
01034
01035
01036
01037
01038
01039
01040 void
01041 __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
01042 {
01043 int gtid = *gtid_ref;
01044 #ifdef BUILD_PARALLEL_ORDERED
01045 kmp_team_t *team = __kmp_team_from_gtid( gtid );
01046 #endif
01047
01048 if( __kmp_env_consistency_check ) {
01049 if( __kmp_threads[gtid] -> th.th_root -> r.r_active )
01050 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
01051 }
01052 #ifdef BUILD_PARALLEL_ORDERED
01053 if( !team -> t.t_serialized ) {
01054 kmp_uint32 spins;
01055
01056 KMP_MB();
01057 KMP_WAIT_YIELD(&team -> t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
01058 KMP_MB();
01059 }
01060 #endif
01061 }
01062
01063
01064
01065
01066
01067 void
01068 __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
01069 {
01070 int gtid = *gtid_ref;
01071 #ifdef BUILD_PARALLEL_ORDERED
01072 int tid = __kmp_tid_from_gtid( gtid );
01073 kmp_team_t *team = __kmp_team_from_gtid( gtid );
01074 #endif
01075
01076 if( __kmp_env_consistency_check ) {
01077 if( __kmp_threads[gtid] -> th.th_root -> r.r_active )
01078 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
01079 }
01080 #ifdef BUILD_PARALLEL_ORDERED
01081 if ( ! team -> t.t_serialized ) {
01082 KMP_MB();
01083
01084
01085
01086 team -> t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
01087
01088 #if OMPT_SUPPORT
01089 if ((ompt_status == ompt_status_track_callback) &&
01090 (ompt_callbacks.ompt_callback(ompt_event_release_ordered))) {
01091
01092 kmp_info_t *this_thread = __kmp_threads[gtid];
01093 ompt_callbacks.ompt_callback(ompt_event_release_ordered)
01094 (this_thread->th.ompt_thread_info.wait_id);
01095 }
01096 #endif
01097
01098 KMP_MB();
01099 }
01100 #endif
01101 }
01102
01103
01104
01105
01106
01107
01108
01109
01110
01111 int
01112 __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
01113 {
01114 int status;
01115 kmp_info_t *th;
01116 kmp_team_t *team;
01117
01118 if( ! TCR_4(__kmp_init_parallel) )
01119 __kmp_parallel_initialize();
01120
01121 th = __kmp_threads[ gtid ];
01122 team = th -> th.th_team;
01123 status = 0;
01124
01125 th->th.th_ident = id_ref;
01126
01127 if ( team -> t.t_serialized ) {
01128 status = 1;
01129 } else {
01130 kmp_int32 old_this = th->th.th_local.this_construct;
01131
01132 ++th->th.th_local.this_construct;
01133
01134
01135
01136
01137 status = KMP_COMPARE_AND_STORE_ACQ32(&team -> t.t_construct, old_this,
01138 th->th.th_local.this_construct);
01139 }
01140
01141 if( __kmp_env_consistency_check ) {
01142 if (status && push_ws) {
01143 __kmp_push_workshare( gtid, ct_psingle, id_ref );
01144 } else {
01145 __kmp_check_workshare( gtid, ct_psingle, id_ref );
01146 }
01147 }
01148 return status;
01149 }
01150
01151 void
01152 __kmp_exit_single( int gtid )
01153 {
01154 if( __kmp_env_consistency_check )
01155 __kmp_pop_workshare( gtid, ct_psingle, NULL );
01156 }
01157
01158
01159
01160
01161
01162 static void
01163 __kmp_linear_barrier_gather( enum barrier_type bt,
01164 kmp_info_t *this_thr,
01165 int gtid,
01166 int tid,
01167 void (*reduce)(void *, void *)
01168 )
01169 {
01170 register kmp_team_t *team = this_thr -> th.th_team;
01171 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
01172 register kmp_info_t **other_threads = team -> t.t_threads;
01173
01174 KA_TRACE( 20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
01175 gtid, team->t.t_id, tid, bt ) );
01176
01177 KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
01178
01179
01180
01181
01182
01183
01184
01185 if ( ! KMP_MASTER_TID( tid )) {
01186
01187 KA_TRACE( 20, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
01188 "arrived(%p): %u => %u\n",
01189 gtid, team->t.t_id, tid,
01190 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
01191 &thr_bar -> b_arrived, thr_bar -> b_arrived,
01192 thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
01193 ) );
01194
01195
01196
01197
01198
01199
01200
01201 __kmp_release( other_threads[0], &thr_bar -> b_arrived, kmp_release_fence );
01202
01203 } else {
01204 register kmp_balign_team_t *team_bar = & team -> t.t_bar[ bt ];
01205 register int nproc = this_thr -> th.th_team_nproc;
01206 register int i;
01207 register kmp_uint new_state;
01208
01209
01210 new_state = team_bar -> b_arrived + KMP_BARRIER_STATE_BUMP;
01211
01212
01213 for (i = 1; i < nproc; i++) {
01214 #if KMP_CACHE_MANAGE
01215
01216 if ( i+1 < nproc )
01217 KMP_CACHE_PREFETCH( &other_threads[ i+1 ] -> th.th_bar[ bt ].bb.b_arrived );
01218 #endif
01219 KA_TRACE( 20, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
01220 "arrived(%p) == %u\n",
01221 gtid, team->t.t_id, tid,
01222 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
01223 &other_threads[i] -> th.th_bar[ bt ].bb.b_arrived,
01224 new_state ) );
01225
01226
01227 __kmp_wait_sleep( this_thr,
01228 & other_threads[ i ] -> th.th_bar[ bt ].bb.b_arrived,
01229 new_state, FALSE
01230 );
01231
01232 if (reduce) {
01233
01234 KA_TRACE( 100, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
01235 gtid, team->t.t_id, tid,
01236 __kmp_gtid_from_tid( i, team ), team->t.t_id, i ) );
01237
01238 (*reduce)( this_thr -> th.th_local.reduce_data,
01239 other_threads[ i ] -> th.th_local.reduce_data );
01240
01241 }
01242
01243 }
01244
01245
01246 team_bar -> b_arrived = new_state;
01247 KA_TRACE( 20, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
01248 "arrived(%p) = %u\n",
01249 gtid, team->t.t_id, tid, team->t.t_id,
01250 &team_bar -> b_arrived, new_state ) );
01251 }
01252
01253 KA_TRACE( 20, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
01254 gtid, team->t.t_id, tid, bt ) );
01255 }
01256
01257
01258 static void
01259 __kmp_tree_barrier_gather( enum barrier_type bt,
01260 kmp_info_t *this_thr,
01261 int gtid,
01262 int tid,
01263 void (*reduce) (void *, void *)
01264 )
01265 {
01266 register kmp_team_t *team = this_thr -> th.th_team;
01267 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
01268 register kmp_info_t **other_threads = team -> t.t_threads;
01269 register kmp_uint32 nproc = this_thr -> th.th_team_nproc;
01270 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[ bt ];
01271 register kmp_uint32 branch_factor = 1 << branch_bits ;
01272 register kmp_uint32 child;
01273 register kmp_int32 child_tid;
01274 register kmp_uint new_state;
01275
01276 KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
01277 gtid, team->t.t_id, tid, bt ) );
01278
01279 KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
01280
01281
01282
01283
01284
01285
01286
01287 child_tid = (tid << branch_bits) + 1;
01288
01289 if ( child_tid < nproc ) {
01290
01291
01292 new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
01293 child = 1;
01294
01295 do {
01296 register kmp_info_t *child_thr = other_threads[ child_tid ];
01297 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
01298 #if KMP_CACHE_MANAGE
01299
01300 if ( child+1 <= branch_factor && child_tid+1 < nproc )
01301 KMP_CACHE_PREFETCH( &other_threads[ child_tid+1 ] -> th.th_bar[ bt ].bb.b_arrived );
01302 #endif
01303 KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
01304 "arrived(%p) == %u\n",
01305 gtid, team->t.t_id, tid,
01306 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id, child_tid,
01307 &child_bar -> b_arrived, new_state ) );
01308
01309
01310 __kmp_wait_sleep( this_thr, &child_bar -> b_arrived, new_state, FALSE
01311 );
01312
01313 if (reduce) {
01314
01315 KA_TRACE( 100, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
01316 gtid, team->t.t_id, tid,
01317 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
01318 child_tid ) );
01319
01320 (*reduce)( this_thr -> th.th_local.reduce_data,
01321 child_thr -> th.th_local.reduce_data );
01322
01323 }
01324
01325 child++;
01326 child_tid++;
01327 }
01328 while ( child <= branch_factor && child_tid < nproc );
01329 }
01330
01331 if ( !KMP_MASTER_TID(tid) ) {
01332
01333 register kmp_int32 parent_tid = (tid - 1) >> branch_bits;
01334
01335 KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
01336 "arrived(%p): %u => %u\n",
01337 gtid, team->t.t_id, tid,
01338 __kmp_gtid_from_tid( parent_tid, team ), team->t.t_id, parent_tid,
01339 &thr_bar -> b_arrived, thr_bar -> b_arrived,
01340 thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
01341 ) );
01342
01343
01344
01345
01346
01347
01348
01349 __kmp_release( other_threads[parent_tid], &thr_bar -> b_arrived, kmp_release_fence );
01350
01351 } else {
01352
01353
01354 if ( nproc > 1 )
01355
01356 team -> t.t_bar[ bt ].b_arrived = new_state;
01357 else
01358 team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP;
01359
01360 KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %u\n",
01361 gtid, team->t.t_id, tid, team->t.t_id,
01362 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived ) );
01363 }
01364
01365 KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
01366 gtid, team->t.t_id, tid, bt ) );
01367 }
01368
01369
01370 static void
01371 __kmp_hyper_barrier_gather( enum barrier_type bt,
01372 kmp_info_t *this_thr,
01373 int gtid,
01374 int tid,
01375 void (*reduce) (void *, void *)
01376 )
01377 {
01378 register kmp_team_t *team = this_thr -> th.th_team;
01379 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
01380 register kmp_info_t **other_threads = team -> t.t_threads;
01381 register kmp_uint new_state = KMP_BARRIER_UNUSED_STATE;
01382 register kmp_uint32 num_threads = this_thr -> th.th_team_nproc;
01383 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[ bt ];
01384 register kmp_uint32 branch_factor = 1 << branch_bits ;
01385 register kmp_uint32 offset;
01386 register kmp_uint32 level;
01387
01388 KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
01389 gtid, team->t.t_id, tid, bt ) );
01390
01391 KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
01392
01393
01394
01395
01396
01397
01398
01399 for ( level=0, offset =1;
01400 offset < num_threads;
01401 level += branch_bits, offset <<= branch_bits )
01402 {
01403 register kmp_uint32 child;
01404 register kmp_int32 child_tid;
01405
01406 if ( ((tid >> level) & (branch_factor - 1)) != 0 ) {
01407 register kmp_int32 parent_tid = tid & ~( (1 << (level + branch_bits)) -1 );
01408
01409 KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
01410 "arrived(%p): %u => %u\n",
01411 gtid, team->t.t_id, tid,
01412 __kmp_gtid_from_tid( parent_tid, team ), team->t.t_id, parent_tid,
01413 &thr_bar -> b_arrived, thr_bar -> b_arrived,
01414 thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
01415 ) );
01416
01417
01418
01419
01420
01421
01422
01423
01424 __kmp_release( other_threads[parent_tid], &thr_bar -> b_arrived, kmp_release_fence );
01425 break;
01426 }
01427
01428
01429
01430 for ( child = 1, child_tid = tid + (1 << level);
01431 child < branch_factor && child_tid < num_threads;
01432 child++, child_tid += (1 << level) )
01433 {
01434 register kmp_info_t *child_thr = other_threads[ child_tid ];
01435 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
01436 #if KMP_CACHE_MANAGE
01437 register kmp_uint32 next_child_tid = child_tid + (1 << level);
01438
01439 if ( child+1 < branch_factor && next_child_tid < num_threads )
01440 KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ] -> th.th_bar[ bt ].bb.b_arrived );
01441 #endif
01442
01443 if (new_state == KMP_BARRIER_UNUSED_STATE)
01444 new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
01445
01446 KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
01447 "arrived(%p) == %u\n",
01448 gtid, team->t.t_id, tid,
01449 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id, child_tid,
01450 &child_bar -> b_arrived, new_state ) );
01451
01452
01453 __kmp_wait_sleep( this_thr, &child_bar -> b_arrived, new_state, FALSE
01454 );
01455
01456 if (reduce) {
01457
01458 KA_TRACE( 100, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
01459 gtid, team->t.t_id, tid,
01460 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
01461 child_tid ) );
01462
01463 (*reduce)( this_thr -> th.th_local.reduce_data,
01464 child_thr -> th.th_local.reduce_data );
01465
01466 }
01467 }
01468 }
01469
01470
01471 if ( KMP_MASTER_TID(tid) ) {
01472
01473
01474 if (new_state == KMP_BARRIER_UNUSED_STATE)
01475 team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP;
01476 else
01477 team -> t.t_bar[ bt ].b_arrived = new_state;
01478
01479 KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %u\n",
01480 gtid, team->t.t_id, tid, team->t.t_id,
01481 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived ) );
01482 }
01483
01484 KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
01485 gtid, team->t.t_id, tid, bt ) );
01486
01487 }
01488
01489 static void
01490 __kmp_linear_barrier_release( enum barrier_type bt,
01491 kmp_info_t *this_thr,
01492 int gtid,
01493 int tid,
01494 int propagate_icvs
01495 )
01496 {
01497 register kmp_bstate_t *thr_bar = &this_thr -> th.th_bar[ bt ].bb;
01498 register kmp_team_t *team;
01499
01500 if (KMP_MASTER_TID( tid )) {
01501 register int i;
01502 register kmp_uint32 nproc = this_thr -> th.th_team_nproc;
01503 register kmp_info_t **other_threads;
01504
01505 team = __kmp_threads[ gtid ]-> th.th_team;
01506 KMP_DEBUG_ASSERT( team != NULL );
01507 other_threads = team -> t.t_threads;
01508
01509 KA_TRACE( 20, ( "__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
01510 gtid, team->t.t_id, tid, bt ) );
01511
01512
01513 if (nproc > 1) {
01514 for (i = 1; i < nproc; i++) {
01515 #if KMP_CACHE_MANAGE
01516
01517 if( i+1 < nproc )
01518 KMP_CACHE_PREFETCH( &other_threads[ i+1 ]-> th.th_bar[ bt ].bb.b_go );
01519 #endif
01520
01521 #if KMP_BARRIER_ICV_PUSH
01522 if ( propagate_icvs ) {
01523 __kmp_init_implicit_task( team->t.t_ident,
01524 team->t.t_threads[i], team, i, FALSE );
01525 copy_icvs( &team->t.t_implicit_task_taskdata[i].td_icvs,
01526 &team->t.t_implicit_task_taskdata[0].td_icvs );
01527 }
01528 #endif // KMP_BARRIER_ICV_PUSH
01529
01530 KA_TRACE( 20, ( "__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
01531 "go(%p): %u => %u\n",
01532 gtid, team->t.t_id, tid,
01533 other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i,
01534 &other_threads[i]->th.th_bar[bt].bb.b_go,
01535 other_threads[i]->th.th_bar[bt].bb.b_go,
01536 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP
01537 ) );
01538
01539 __kmp_release( other_threads[ i ],
01540 &other_threads[ i ]-> th.th_bar[ bt ].bb.b_go, kmp_acquire_fence );
01541 }
01542 }
01543 } else {
01544
01545
01546 KA_TRACE( 20, ( "__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
01547 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
01548
01549 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
01550 );
01551
01552
01553
01554
01555 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
01556 return;
01557
01558
01559
01560
01561 #ifdef KMP_DEBUG
01562 tid = __kmp_tid_from_gtid( gtid );
01563 team = __kmp_threads[ gtid ]-> th.th_team;
01564 #endif
01565 KMP_DEBUG_ASSERT( team != NULL );
01566
01567 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
01568 KA_TRACE( 20, ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
01569 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
01570
01571 KMP_MB();
01572 }
01573
01574 KA_TRACE( 20, ( "__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
01575 gtid, team->t.t_id, tid, bt ) );
01576 }
01577
01578
01579 static void
01580 __kmp_tree_barrier_release( enum barrier_type bt,
01581 kmp_info_t *this_thr,
01582 int gtid,
01583 int tid,
01584 int propagate_icvs
01585 )
01586 {
01587
01588 register kmp_team_t *team;
01589 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
01590 register kmp_uint32 nproc;
01591 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
01592 register kmp_uint32 branch_factor = 1 << branch_bits ;
01593 register kmp_uint32 child;
01594 register kmp_int32 child_tid;
01595
01596
01597
01598
01599
01600
01601 if ( ! KMP_MASTER_TID( tid )) {
01602
01603
01604 KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n",
01605 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
01606
01607
01608 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
01609 );
01610
01611
01612
01613
01614 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
01615 return;
01616
01617
01618
01619
01620 team = __kmp_threads[ gtid ]-> th.th_team;
01621 KMP_DEBUG_ASSERT( team != NULL );
01622 tid = __kmp_tid_from_gtid( gtid );
01623
01624 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
01625 KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
01626 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
01627
01628 KMP_MB();
01629
01630 } else {
01631 team = __kmp_threads[ gtid ]-> th.th_team;
01632 KMP_DEBUG_ASSERT( team != NULL );
01633
01634 KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
01635 gtid, team->t.t_id, tid, bt ) );
01636 }
01637
01638
01639 nproc = this_thr -> th.th_team_nproc;
01640 child_tid = ( tid << branch_bits ) + 1;
01641
01642 if ( child_tid < nproc ) {
01643 register kmp_info_t **other_threads = team -> t.t_threads;
01644 child = 1;
01645
01646
01647 do {
01648 register kmp_info_t *child_thr = other_threads[ child_tid ];
01649 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
01650 #if KMP_CACHE_MANAGE
01651
01652 if ( child+1 <= branch_factor && child_tid+1 < nproc )
01653 KMP_CACHE_PREFETCH( &other_threads[ child_tid+1 ] -> th.th_bar[ bt ].bb.b_go );
01654 #endif
01655
01656 #if KMP_BARRIER_ICV_PUSH
01657 if ( propagate_icvs ) {
01658 __kmp_init_implicit_task( team->t.t_ident,
01659 team->t.t_threads[child_tid], team, child_tid, FALSE );
01660 copy_icvs( &team->t.t_implicit_task_taskdata[child_tid].td_icvs,
01661 &team->t.t_implicit_task_taskdata[0].td_icvs );
01662 }
01663 #endif // KMP_BARRIER_ICV_PUSH
01664
01665 KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)"
01666 "go(%p): %u => %u\n",
01667 gtid, team->t.t_id, tid,
01668 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
01669 child_tid, &child_bar -> b_go, child_bar -> b_go,
01670 child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
01671
01672
01673 __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
01674
01675 child++;
01676 child_tid++;
01677 }
01678 while ( child <= branch_factor && child_tid < nproc );
01679 }
01680
01681 KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
01682 gtid, team->t.t_id, tid, bt ) );
01683 }
01684
01685
01686 #define KMP_REVERSE_HYPER_BAR
01687 #ifdef KMP_REVERSE_HYPER_BAR
01688 static void
01689 __kmp_hyper_barrier_release( enum barrier_type bt,
01690 kmp_info_t *this_thr,
01691 int gtid,
01692 int tid,
01693 int propagate_icvs
01694 )
01695 {
01696
01697 register kmp_team_t *team;
01698 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
01699 register kmp_info_t **other_threads;
01700 register kmp_uint32 num_threads;
01701 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
01702 register kmp_uint32 branch_factor = 1 << branch_bits;
01703 register kmp_uint32 child;
01704 register kmp_int32 child_tid;
01705 register kmp_uint32 offset;
01706 register kmp_uint32 level;
01707
01708
01709
01710
01711
01712
01713
01714 if ( ! KMP_MASTER_TID( tid )) {
01715
01716
01717 KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
01718 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
01719
01720
01721 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
01722 );
01723
01724
01725
01726
01727 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
01728 return;
01729
01730
01731
01732
01733 team = __kmp_threads[ gtid ]-> th.th_team;
01734 KMP_DEBUG_ASSERT( team != NULL );
01735 tid = __kmp_tid_from_gtid( gtid );
01736
01737 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
01738 KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
01739 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
01740
01741 KMP_MB();
01742
01743 } else {
01744 team = __kmp_threads[ gtid ]-> th.th_team;
01745 KMP_DEBUG_ASSERT( team != NULL );
01746
01747 KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
01748 gtid, team->t.t_id, tid, bt ) );
01749 }
01750
01751 num_threads = this_thr -> th.th_team_nproc;
01752 other_threads = team -> t.t_threads;
01753
01754
01755 for ( level = 0, offset = 1;
01756 offset < num_threads && (((tid >> level) & (branch_factor-1)) == 0);
01757 level += branch_bits, offset <<= branch_bits );
01758
01759
01760 for ( level -= branch_bits, offset >>= branch_bits;
01761 offset != 0;
01762 level -= branch_bits, offset >>= branch_bits )
01763 {
01764 register kmp_uint32 child;
01765 register kmp_int32 child_tid;
01766
01767
01768
01769 child = num_threads >> ((level==0)?level:level-1);
01770 for ( child = (child < branch_factor-1) ? child : branch_factor-1,
01771 child_tid = tid + (child << level);
01772 child >= 1;
01773 child--, child_tid -= (1 << level) )
01774 {
01775
01776 if ( child_tid >= num_threads ) continue;
01777 else {
01778 register kmp_info_t *child_thr = other_threads[ child_tid ];
01779 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
01780 #if KMP_CACHE_MANAGE
01781 register kmp_uint32 next_child_tid = child_tid - (1 << level);
01782
01783 if ( child-1 >= 1 && next_child_tid < num_threads )
01784 KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go );
01785 #endif
01786
01787 #if KMP_BARRIER_ICV_PUSH
01788 if ( propagate_icvs ) {
01789 KMP_DEBUG_ASSERT( team != NULL );
01790 __kmp_init_implicit_task( team->t.t_ident,
01791 team->t.t_threads[child_tid], team, child_tid, FALSE );
01792 copy_icvs( &team->t.t_implicit_task_taskdata[child_tid].td_icvs,
01793 &team->t.t_implicit_task_taskdata[0].td_icvs );
01794 }
01795 #endif // KMP_BARRIER_ICV_PUSH
01796 KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)"
01797 "go(%p): %u => %u\n",
01798 gtid, team->t.t_id, tid,
01799 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
01800 child_tid, &child_bar -> b_go, child_bar -> b_go,
01801 child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
01802
01803
01804 __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
01805 }
01806 }
01807 }
01808
01809 KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
01810 gtid, team->t.t_id, tid, bt ) );
01811 }
01812
01813 #else
01814
01815 static void
01816 __kmp_hyper_barrier_release( enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, int propagate_icvs )
01817 {
01818
01819 register kmp_team_t *team;
01820 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
01821 register kmp_info_t **other_threads;
01822 register kmp_uint32 num_threads;
01823 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
01824 register kmp_uint32 branch_factor = 1 << branch_bits;
01825 register kmp_uint32 child;
01826 register kmp_int32 child_tid;
01827 register kmp_uint32 offset;
01828 register kmp_uint32 level;
01829
01830
01831
01832
01833
01834
01835
01836 if ( ! KMP_MASTER_TID( tid )) {
01837
01838
01839 KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
01840 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
01841
01842
01843 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE, NULL );
01844
01845
01846
01847
01848 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
01849 return;
01850
01851
01852
01853
01854 team = __kmp_threads[ gtid ]-> th.th_team;
01855 KMP_DEBUG_ASSERT( team != NULL );
01856 tid = __kmp_tid_from_gtid( gtid );
01857
01858 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
01859 KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
01860 gtid, ( team != NULL ) ? team->t.t_id : -1, tid,
01861 &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
01862
01863 KMP_MB();
01864
01865 } else {
01866 team = __kmp_threads[ gtid ]-> th.th_team;
01867 KMP_DEBUG_ASSERT( team != NULL );
01868
01869 KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) enter for barrier type %d\n",
01870 gtid, team->t.t_id, tid, bt ) );
01871 }
01872
01873
01874 if ( team == NULL ) {
01875
01876 tid = __kmp_tid_from_gtid( gtid );
01877 team = __kmp_threads[ gtid ]-> th.th_team;
01878 }
01879 num_threads = this_thr -> th.th_team_nproc;
01880 other_threads = team -> t.t_threads;
01881
01882
01883 for ( level = 0, offset = 1;
01884 offset < num_threads;
01885 level += branch_bits, offset <<= branch_bits )
01886 {
01887 register kmp_uint32 child;
01888 register kmp_int32 child_tid;
01889
01890 if (((tid >> level) & (branch_factor - 1)) != 0)
01891
01892
01893 break;
01894
01895
01896 for ( child = 1, child_tid = tid + (1 << level);
01897 child < branch_factor && child_tid < num_threads;
01898 child++, child_tid += (1 << level) )
01899 {
01900 register kmp_info_t *child_thr = other_threads[ child_tid ];
01901 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
01902 #if KMP_CACHE_MANAGE
01903 {
01904 register kmp_uint32 next_child_tid = child_tid + (1 << level);
01905
01906 if ( child+1 < branch_factor && next_child_tid < num_threads )
01907 KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go );
01908 }
01909 #endif
01910
01911 #if KMP_BARRIER_ICV_PUSH
01912 if ( propagate_icvs ) {
01913 KMP_DEBUG_ASSERT( team != NULL );
01914 __kmp_init_implicit_task( team->t.t_ident,
01915 team->t.t_threads[child_tid], team, child_tid, FALSE );
01916 copy_icvs( &team->t.t_implicit_task_taskdata[child_tid].td_icvs,
01917 &team->t.t_implicit_task_taskdata[0].td_icvs );
01918 }
01919 #endif // KMP_BARRIER_ICV_PUSH
01920
01921 KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) releasing "
01922 "T#%d(%d:%d) go(%p): %u => %u\n",
01923 gtid, team->t.t_id, tid,
01924 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
01925 child_tid, &child_bar -> b_go, child_bar -> b_go,
01926 child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
01927
01928
01929 __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
01930 }
01931 }
01932
01933 KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
01934 gtid, team->t.t_id, tid, bt ) );
01935 }
01936 #endif
01937
01938
01939
01940
01941
01942
01943
01944
01945 int
01946 __kmp_barrier( enum barrier_type bt, int gtid, int is_split,
01947 size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *) )
01948 {
01949 register int tid = __kmp_tid_from_gtid( gtid );
01950 register kmp_info_t *this_thr = __kmp_threads[ gtid ];
01951 register kmp_team_t *team = this_thr -> th.th_team;
01952 register int status = 0;
01953 ompt_task_id_t my_task_id;
01954 ompt_parallel_id_t my_parallel_id;
01955
01956 KA_TRACE( 15, ( "__kmp_barrier: T#%d(%d:%d) has arrived\n",
01957 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) );
01958 #if OMPT_SUPPORT
01959 if ((ompt_status & ompt_status_track)) {
01960 my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
01961 my_parallel_id = team->t.ompt_team_info.parallel_id;
01962
01963 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
01964 if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) {
01965 if ((ompt_status == ompt_status_track_callback)) {
01966 if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) {
01967 ompt_callbacks.ompt_callback(ompt_event_single_others_end)
01968 (my_parallel_id, my_task_id);
01969 }
01970 }
01971 }
01972 if ((ompt_status == ompt_status_track_callback) &&
01973 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
01974 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)
01975 (my_parallel_id, my_task_id);
01976 }
01977 }
01978 #endif
01979
01980 if ( ! team->t.t_serialized ) {
01981 #if OMP_30_ENABLED
01982 if ( __kmp_tasking_mode == tskm_extra_barrier ) {
01983 __kmp_tasking_barrier( team, this_thr, gtid );
01984 KA_TRACE( 15, ( "__kmp_barrier: T#%d(%d:%d) past tasking barrier\n",
01985 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) );
01986 }
01987 #endif
01988
01989
01990
01991
01992
01993
01994
01995
01996 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
01997 #if OMP_30_ENABLED
01998 this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
01999 this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
02000 #else
02001 this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
02002 this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
02003 #endif // OMP_30_ENABLED
02004 }
02005
02006
02007 if ( reduce != NULL ) {
02008
02009 this_thr -> th.th_local.reduce_data = reduce_data;
02010 }
02011 if ( __kmp_barrier_gather_pattern[ bt ] == bp_linear_bar || __kmp_barrier_gather_branch_bits[ bt ] == 0 ) {
02012 __kmp_linear_barrier_gather( bt, this_thr, gtid, tid, reduce
02013 );
02014 } else if ( __kmp_barrier_gather_pattern[ bt ] == bp_tree_bar ) {
02015 __kmp_tree_barrier_gather( bt, this_thr, gtid, tid, reduce
02016 );
02017 } else {
02018 __kmp_hyper_barrier_gather( bt, this_thr, gtid, tid, reduce
02019 );
02020 };
02021
02022
02023 KMP_MB();
02024
02025 if ( KMP_MASTER_TID( tid ) ) {
02026 status = 0;
02027
02028 #if OMP_30_ENABLED
02029 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02030 __kmp_task_team_wait( this_thr, team
02031 );
02032 __kmp_task_team_setup( this_thr, team );
02033 }
02034 #endif
02035
02036 } else {
02037 status = 1;
02038 }
02039 if ( status == 1 || ! is_split ) {
02040 if ( __kmp_barrier_release_pattern[ bt ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bt ] == 0 ) {
02041 __kmp_linear_barrier_release( bt, this_thr, gtid, tid, FALSE
02042 );
02043 } else if ( __kmp_barrier_release_pattern[ bt ] == bp_tree_bar ) {
02044 __kmp_tree_barrier_release( bt, this_thr, gtid, tid, FALSE
02045 );
02046 } else {
02047 __kmp_hyper_barrier_release( bt, this_thr, gtid, tid, FALSE
02048 );
02049 }
02050 #if OMP_30_ENABLED
02051 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02052 __kmp_task_team_sync( this_thr, team );
02053 }
02054 #endif
02055 }
02056
02057
02058 } else {
02059
02060 status = 0;
02061
02062 #if OMP_30_ENABLED
02063 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02064
02065
02066
02067
02068 KMP_DEBUG_ASSERT( team->t.t_task_team == NULL );
02069 KMP_DEBUG_ASSERT( this_thr->th.th_task_team == NULL );
02070 }
02071 #endif
02072 }
02073
02074 KA_TRACE( 15, ( "__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
02075 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid),
02076 status ) );
02077 #if OMPT_SUPPORT
02078 if (ompt_status & ompt_status_track) {
02079 if ((ompt_status == ompt_status_track_callback) &&
02080 ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
02081 ompt_callbacks.ompt_callback(ompt_event_barrier_end)
02082 (my_parallel_id, my_task_id);
02083 }
02084 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
02085 }
02086 #endif
02087 return status;
02088 }
02089
02090
02091 void
02092 __kmp_end_split_barrier( enum barrier_type bt, int gtid )
02093 {
02094 int tid = __kmp_tid_from_gtid( gtid );
02095 kmp_info_t *this_thr = __kmp_threads[ gtid ];
02096 kmp_team_t *team = this_thr -> th.th_team;
02097
02098 if( ! team -> t.t_serialized ) {
02099 if( KMP_MASTER_GTID( gtid ) ) {
02100 if ( __kmp_barrier_release_pattern[ bt ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bt ] == 0 ) {
02101 __kmp_linear_barrier_release( bt, this_thr, gtid, tid, FALSE
02102 );
02103 } else if ( __kmp_barrier_release_pattern[ bt ] == bp_tree_bar ) {
02104 __kmp_tree_barrier_release( bt, this_thr, gtid, tid, FALSE
02105 );
02106 } else {
02107 __kmp_hyper_barrier_release( bt, this_thr, gtid, tid, FALSE
02108 );
02109 };
02110 #if OMP_30_ENABLED
02111 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02112 __kmp_task_team_sync( this_thr, team );
02113 };
02114 #endif
02115 }
02116 }
02117 }
02118
02119
02120
02121
02122
02123
02124
02125
02126
02127
02128
02129
02130 static int
02131 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
02132 int master_tid, int set_nthreads
02133 )
02134 {
02135 int capacity;
02136 int new_nthreads;
02137 int use_rml_to_adjust_nth;
02138 KMP_DEBUG_ASSERT( __kmp_init_serial );
02139 KMP_DEBUG_ASSERT( root && parent_team );
02140
02141
02142
02143
02144 if ( set_nthreads == 1 ) {
02145 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d reserving 1 thread; requested %d threads\n",
02146 __kmp_get_gtid(), set_nthreads ));
02147 return 1;
02148 }
02149 if ( ( !get__nested_2(parent_team,master_tid) && root->r.r_in_parallel )
02150 || ( __kmp_library == library_serial ) ) {
02151 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team; requested %d threads\n",
02152 __kmp_get_gtid(), set_nthreads ));
02153 return 1;
02154 }
02155
02156
02157
02158
02159
02160 new_nthreads = set_nthreads;
02161 use_rml_to_adjust_nth = FALSE;
02162 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
02163 ;
02164 }
02165 #ifdef USE_LOAD_BALANCE
02166 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
02167 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
02168 if ( new_nthreads == 1 ) {
02169 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
02170 master_tid ));
02171 return 1;
02172 }
02173 if ( new_nthreads < set_nthreads ) {
02174 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
02175 master_tid, new_nthreads ));
02176 }
02177 }
02178 #endif
02179 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
02180 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
02181 : root->r.r_hot_team->t.t_nproc);
02182 if ( new_nthreads <= 1 ) {
02183 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
02184 master_tid ));
02185 return 1;
02186 }
02187 if ( new_nthreads < set_nthreads ) {
02188 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
02189 master_tid, new_nthreads ));
02190 }
02191 else {
02192 new_nthreads = set_nthreads;
02193 }
02194 }
02195 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
02196 if ( set_nthreads > 2 ) {
02197 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
02198 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
02199 if ( new_nthreads == 1 ) {
02200 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
02201 master_tid ));
02202 return 1;
02203 }
02204 if ( new_nthreads < set_nthreads ) {
02205 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
02206 master_tid, new_nthreads ));
02207 }
02208 }
02209 }
02210 else {
02211 KMP_ASSERT( 0 );
02212 }
02213
02214
02215
02216
02217 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
02218 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
02219 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
02220 root->r.r_hot_team->t.t_nproc );
02221 if ( tl_nthreads <= 0 ) {
02222 tl_nthreads = 1;
02223 }
02224
02225
02226
02227
02228 if ( ! get__dynamic_2( parent_team, master_tid )
02229 && ( ! __kmp_reserve_warn ) ) {
02230 __kmp_reserve_warn = 1;
02231 __kmp_msg(
02232 kmp_ms_warning,
02233 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
02234 KMP_HNT( Unset_ALL_THREADS ),
02235 __kmp_msg_null
02236 );
02237 }
02238 if ( tl_nthreads == 1 ) {
02239 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
02240 master_tid ));
02241 return 1;
02242 }
02243 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
02244 master_tid, tl_nthreads ));
02245 new_nthreads = tl_nthreads;
02246 }
02247
02248
02249
02250
02251
02252
02253
02254
02255 capacity = __kmp_threads_capacity;
02256 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
02257 --capacity;
02258 }
02259 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
02260 root->r.r_hot_team->t.t_nproc ) > capacity ) {
02261
02262
02263
02264 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
02265 root->r.r_hot_team->t.t_nproc ) - capacity;
02266 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
02267 if ( slotsAdded < slotsRequired ) {
02268
02269
02270
02271 new_nthreads -= ( slotsRequired - slotsAdded );
02272 KMP_ASSERT( new_nthreads >= 1 );
02273
02274
02275
02276
02277 if ( ! get__dynamic_2( parent_team, master_tid )
02278 && ( ! __kmp_reserve_warn ) ) {
02279 __kmp_reserve_warn = 1;
02280 if ( __kmp_tp_cached ) {
02281 __kmp_msg(
02282 kmp_ms_warning,
02283 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
02284 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
02285 KMP_HNT( PossibleSystemLimitOnThreads ),
02286 __kmp_msg_null
02287 );
02288 }
02289 else {
02290 __kmp_msg(
02291 kmp_ms_warning,
02292 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
02293 KMP_HNT( SystemLimitOnThreads ),
02294 __kmp_msg_null
02295 );
02296 }
02297 }
02298 }
02299 }
02300
02301 if ( new_nthreads == 1 ) {
02302 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
02303 __kmp_get_gtid(), set_nthreads ) );
02304 return 1;
02305 }
02306
02307 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
02308 __kmp_get_gtid(), new_nthreads, set_nthreads ));
02309 return new_nthreads;
02310 }
02311
02312
02313
02314
02315
02316
02317
02318
02319 static void
02320 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
02321 kmp_info_t *master_th, int master_gtid )
02322 {
02323 int i;
02324
02325 KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
02326 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
02327 KMP_MB();
02328
02329
02330 master_th -> th.th_info .ds.ds_tid = 0;
02331 master_th -> th.th_team = team;
02332 master_th -> th.th_team_nproc = team -> t.t_nproc;
02333 master_th -> th.th_team_master = master_th;
02334 master_th -> th.th_team_serialized = FALSE;
02335 master_th -> th.th_dispatch = & team -> t.t_dispatch[ 0 ];
02336
02337
02338 if ( team != root->r.r_hot_team ) {
02339
02340
02341 team -> t.t_threads[ 0 ] = master_th;
02342 __kmp_initialize_info( master_th, team, 0, master_gtid );
02343
02344
02345 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
02346
02347
02348 team -> t.t_threads[ i ] = __kmp_allocate_thread( root, team, i );
02349 KMP_DEBUG_ASSERT( team->t.t_threads[i] );
02350 KMP_DEBUG_ASSERT( team->t.t_threads[i]->th.th_team == team );
02351
02352 KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%u, plain=%u\n",
02353 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
02354 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
02355 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
02356 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
02357
02358 {
02359 int b;
02360 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
02361 for ( b = 0; b < bs_last_barrier; ++ b ) {
02362 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
02363 };
02364 }
02365 }
02366
02367 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
02368 __kmp_partition_places( team );
02369 #endif
02370
02371 }
02372
02373 KMP_MB();
02374 }
02375
02376
02377
02378
02379 int
02380 __kmp_fork_call(
02381 ident_t * loc,
02382 int gtid,
02383 int exec_master,
02384
02385
02386 kmp_int32 argc,
02387 microtask_t microtask,
02388 launch_t invoker,
02389
02390 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
02391 va_list * ap
02392 #else
02393 va_list ap
02394 #endif
02395 )
02396 {
02397 void **argv;
02398 int i;
02399 int master_tid;
02400 int master_this_cons;
02401 int master_last_cons;
02402 kmp_team_t *team;
02403 kmp_team_t *parent_team;
02404 kmp_info_t *master_th;
02405 kmp_root_t *root;
02406 int nthreads;
02407 int master_active;
02408 int master_set_numthreads;
02409 int level;
02410
02411 #if OMPT_SUPPORT
02412 ompt_state_t prev_state;
02413 #endif
02414
02415 KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
02416
02417
02418 KMP_DEBUG_ASSERT( __kmp_init_serial );
02419 if( ! TCR_4(__kmp_init_parallel) )
02420 __kmp_parallel_initialize();
02421
02422
02423 master_th = __kmp_threads[ gtid ];
02424
02425 parent_team = master_th -> th.th_team;
02426 master_tid = master_th -> th.th_info.ds.ds_tid;
02427
02428 #if OMPT_SUPPORT
02429 ompt_parallel_id_t ompt_parallel_id =
02430 __ompt_parallel_id_new(master_th, master_tid);
02431 ompt_task_id_t ompt_task_id = 0;
02432 ompt_frame_t *ompt_frame = 0;
02433 if (ompt_status & ompt_status_track) {
02434 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
02435 if ((ompt_status == ompt_status_track_callback) &&
02436 ompt_callbacks.ompt_callback(ompt_event_parallel_create)) {
02437 ompt_callbacks.ompt_callback(ompt_event_parallel_create)
02438 (ompt_task_id, ompt_frame,
02439 ompt_parallel_id, (void *) microtask);
02440 }
02441 }
02442 #endif
02443
02444 master_this_cons = master_th -> th.th_local.this_construct;
02445 master_last_cons = master_th -> th.th_local.last_construct;
02446 root = master_th -> th.th_root;
02447 master_active = root -> r.r_active;
02448 master_set_numthreads = master_th -> th.th_set_nproc;
02449 #if OMP_30_ENABLED
02450
02451 level = parent_team->t.t_level;
02452 #endif // OMP_30_ENABLED
02453
02454
02455 master_th->th.th_ident = loc;
02456
02457 #if OMP_30_ENABLED && KMP_DEBUG
02458 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02459 KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
02460 }
02461 #endif // OMP_30_ENABLED
02462
02463
02464 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
02465
02466 #if OMP_30_ENABLED
02467 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
02468 nthreads = 1;
02469 }
02470 else
02471 #endif // OMP_30_ENABLED
02472
02473 {
02474 nthreads = master_set_numthreads ?
02475 master_set_numthreads : get__nproc_2( parent_team, master_tid );
02476 nthreads = __kmp_reserve_threads( root, parent_team, master_tid, nthreads
02477 );
02478 }
02479 KMP_DEBUG_ASSERT( nthreads > 0 );
02480
02481
02482 master_th -> th.th_set_nproc = 0;
02483
02484
02485
02486 if ( nthreads == 1 ) {
02487
02488 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
02489 void * args[ argc ];
02490 #else
02491 void * * args = (void**) alloca( argc * sizeof( void * ) );
02492 #endif
02493
02494 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
02495 KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
02496
02497 __kmpc_serialized_parallel(loc, gtid);
02498
02499 if ( exec_master == 0 ) {
02500
02501 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
02502 return FALSE;
02503 } else if ( exec_master == 1 ) {
02504
02505 argv = args;
02506 for( i=argc-1; i >= 0; --i )
02507
02508 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
02509 *argv++ = va_arg( *ap, void * );
02510 #else
02511 *argv++ = va_arg( ap, void * );
02512 #endif
02513 master_th -> th.th_serial_team -> t.t_ident = loc;
02514 KMP_MB();
02515
02516 #if OMPT_SUPPORT
02517 ompt_lw_taskteam_t lw_taskteam;
02518 void **exit_runtime_p =
02519 &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
02520 if (ompt_status & ompt_status_track) {
02521 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, microtask,
02522 ompt_parallel_id);
02523
02524
02525 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
02526
02527 #if 0
02528
02529 if ((ompt_status & ompt_status_track_callback) &&
02530 ompt_callbacks.ompt_callback(ompt_event_parallel_create)) {
02531 ompt_callbacks.ompt_callback(ompt_event_parallel_create)
02532 (lw_taskteam.ompt_task_info.task_id,
02533 &lw_taskteam.ompt_task_info.frame,
02534 lw_taskteam.ompt_team_info.parallel_id,
02535 (void *) microtask
02536 );
02537 }
02538 #endif
02539 __ompt_lw_taskteam_link(&lw_taskteam, master_th);
02540 }
02541 #else
02542 void *dummy;
02543 void **exit_runtime_p = &dummy;
02544 #endif
02545
02546 __kmp_invoke_microtask( microtask, gtid, 0, argc, args, exit_runtime_p );
02547
02548 #if OMPT_SUPPORT
02549 if (ompt_status & ompt_status_track) {
02550 lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
02551 __ompt_lw_taskteam_unlink(&lw_taskteam, master_th);
02552 if ((ompt_status & ompt_status_track_callback) &&
02553 ompt_callbacks.ompt_callback(ompt_event_parallel_exit)) {
02554 ompt_callbacks.ompt_callback(ompt_event_parallel_exit)
02555 (lw_taskteam.ompt_task_info.task_id,
02556 &lw_taskteam.ompt_task_info.frame,
02557 lw_taskteam.ompt_team_info.parallel_id,
02558 (void *) microtask
02559 );
02560 }
02561 master_th->th.ompt_thread_info.state = prev_state;
02562 }
02563 #endif
02564 }
02565 else {
02566 KMP_ASSERT2( exec_master <= 1, "__kmp_fork_call: unknown parameter exec_master" );
02567 }
02568
02569 KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
02570
02571 KMP_MB();
02572 return FALSE;
02573 }
02574
02575 #if OMP_30_ENABLED
02576
02577
02578 KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
02579 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
02580 master_th->th.th_current_task->td_icvs.max_active_levels ) );
02581
02582
02583 master_th->th.th_current_task->td_flags.executing = 0;
02584 #endif
02585
02586
02587 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
02588
02589 #if OMP_30_ENABLED
02590
02591
02592
02593 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
02594 if ( ( level + 1 < __kmp_nested_nth.used ) &&
02595 ( __kmp_nested_nth.nth[level + 1] != nthreads_icv ) ) {
02596 nthreads_icv = __kmp_nested_nth.nth[level + 1];
02597 }
02598 else {
02599 nthreads_icv = 0;
02600 }
02601
02602 #if OMP_40_ENABLED
02603
02604
02605
02606 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
02607 kmp_proc_bind_t proc_bind_icv;
02608
02609 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
02610 proc_bind = proc_bind_false;
02611 proc_bind_icv = proc_bind_default;
02612 }
02613 else {
02614 proc_bind_icv = master_th->th.th_current_task->td_icvs.proc_bind;
02615 if ( proc_bind == proc_bind_default ) {
02616
02617
02618
02619
02620 proc_bind = proc_bind_icv;
02621 }
02622 else {
02623
02624
02625
02626
02627
02628 }
02629
02630
02631
02632
02633 if ( ( level + 1 < __kmp_nested_proc_bind.used )
02634 && ( __kmp_nested_proc_bind.bind_types[level + 1] != proc_bind_icv ) ) {
02635 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
02636 }
02637 else {
02638 proc_bind_icv = proc_bind_default;
02639 }
02640 }
02641
02642
02643
02644
02645 master_th->th.th_set_proc_bind = proc_bind_default;
02646 #endif
02647
02648 if ( ( nthreads_icv > 0 )
02649 #if OMP_40_ENABLED
02650 || ( proc_bind_icv != proc_bind_default )
02651 #endif
02652 )
02653 {
02654 kmp_internal_control_t new_icvs;
02655 copy_icvs( & new_icvs, & master_th->th.th_current_task->td_icvs );
02656 new_icvs.next = NULL;
02657
02658 if ( nthreads_icv > 0 ) {
02659 new_icvs.nproc = nthreads_icv;
02660 }
02661
02662 #if OMP_40_ENABLED
02663 if ( proc_bind_icv != proc_bind_default ) {
02664 new_icvs.proc_bind = proc_bind_icv;
02665 }
02666 #endif
02667
02668
02669 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
02670 team = __kmp_allocate_team(root, nthreads, nthreads,
02671 #if OMPT_SUPPORT
02672 ompt_parallel_id,
02673 #endif
02674 #if OMP_40_ENABLED
02675 proc_bind,
02676 #endif
02677 &new_icvs, argc );
02678 } else
02679 #endif
02680 {
02681
02682 KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
02683 team = __kmp_allocate_team(root, nthreads, nthreads,
02684 #if OMPT_SUPPORT
02685 ompt_parallel_id,
02686 #endif
02687 #if OMP_40_ENABLED
02688 proc_bind,
02689 #endif
02690 #if OMP_30_ENABLED
02691 &master_th->th.th_current_task->td_icvs,
02692 #else
02693 parent_team->t.t_set_nproc[master_tid],
02694 parent_team->t.t_set_dynamic[master_tid],
02695 parent_team->t.t_set_nested[master_tid],
02696 parent_team->t.t_set_blocktime[master_tid],
02697 parent_team->t.t_set_bt_intervals[master_tid],
02698 parent_team->t.t_set_bt_set[master_tid],
02699 #endif
02700 argc );
02701 }
02702
02703 KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n",
02704 team ) );
02705
02706
02707 team->t.t_master_tid = master_tid;
02708 team->t.t_master_this_cons = master_this_cons;
02709 team->t.t_master_last_cons = master_last_cons;
02710
02711 team->t.t_parent = parent_team;
02712 TCW_SYNC_PTR(team->t.t_pkfn, microtask);
02713 team->t.t_invoke = invoker;
02714 team->t.t_ident = loc;
02715
02716 #if OMP_30_ENABLED
02717
02718 team->t.t_level = parent_team->t.t_level + 1;
02719 team->t.t_active_level = parent_team->t.t_active_level + 1;
02720 team->t.t_sched = get__sched_2( parent_team, master_tid );
02721
02722 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
02723 if ( __kmp_inherit_fp_control ) {
02724 __kmp_store_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
02725 __kmp_store_mxcsr( &team->t.t_mxcsr );
02726 team->t.t_mxcsr &= KMP_X86_MXCSR_MASK;
02727 team->t.t_fp_control_saved = TRUE;
02728 }
02729 else {
02730 team->t.t_fp_control_saved = FALSE;
02731 }
02732 #endif
02733
02734 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02735
02736
02737
02738
02739 KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
02740 KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
02741 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
02742 parent_team, team->t.t_task_team, team ) );
02743 master_th->th.th_task_team = team->t.t_task_team;
02744 KMP_DEBUG_ASSERT( ( master_th->th.th_task_team == NULL ) || ( team == root->r.r_hot_team ) ) ;
02745 }
02746 #endif // OMP_30_ENABLED
02747
02748 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
02749 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
02750 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
02751 ( team->t.t_master_tid == 0 &&
02752 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
02753 KMP_MB();
02754
02755
02756 argv = (void**) team -> t.t_argv;
02757 for( i=argc-1; i >= 0; --i )
02758
02759 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
02760 *argv++ = va_arg( *ap, void * );
02761 #else
02762 *argv++ = va_arg( ap, void * );
02763 #endif
02764
02765
02766
02767 team->t.t_master_active = master_active;
02768 if (!root -> r.r_active)
02769 root -> r.r_active = TRUE;
02770
02771 __kmp_fork_team_threads( root, team, master_th, gtid );
02772
02773 #if 0
02774 #if OMPT_SUPPORT
02775 if (ompt_status & ompt_status_track) {
02776 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
02777 if ((ompt_status == ompt_status_track_callback) &&
02778 ompt_callbacks.ompt_callback(ompt_event_parallel_create)) {
02779 int tid = __kmp_tid_from_gtid( gtid );
02780 ompt_callbacks.ompt_callback(ompt_event_parallel_create)
02781 (team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
02782 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame),
02783 team->t.ompt_team_info.parallel_id,
02784 (void *) microtask);
02785 }
02786 }
02787 #endif
02788 #endif
02789
02790 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
02791
02792
02793
02794
02795 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
02796 KMP_MB();
02797
02798 KF_TRACE( 10, ( "__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", root, team, master_th, gtid ) );
02799
02800
02801 __kmp_internal_fork( loc, gtid, team );
02802 KF_TRACE( 10, ( "__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n", root, team, master_th, gtid ) );
02803
02804 if (! exec_master) {
02805 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
02806 return TRUE;
02807 }
02808
02809
02810 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
02811 gtid, team->t.t_id, team->t.t_pkfn ) );
02812
02813 if (! team->t.t_invoke( gtid )) {
02814 KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
02815 }
02816 KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
02817 gtid, team->t.t_id, team->t.t_pkfn ) );
02818 KMP_MB();
02819
02820 KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
02821
02822 #if OMPT_SUPPORT
02823 if (ompt_status & ompt_status_track) {
02824 master_th->th.ompt_thread_info.state = prev_state;
02825 }
02826 #endif
02827
02828 return TRUE;
02829 }
02830
02831
02832 void
02833 __kmp_join_call(ident_t *loc, int gtid)
02834 {
02835 kmp_team_t *team;
02836 kmp_team_t *parent_team;
02837 kmp_info_t *master_th;
02838 kmp_root_t *root;
02839 int master_active;
02840 int i;
02841
02842 KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
02843
02844
02845 master_th = __kmp_threads[ gtid ];
02846
02847
02848 #if OMPT_SUPPORT
02849 if (ompt_status & ompt_status_track) {
02850 master_th->th.ompt_thread_info.state = ompt_state_overhead;
02851 }
02852 #endif
02853
02854 root = master_th -> th.th_root;
02855 team = master_th -> th.th_team;
02856 parent_team = team->t.t_parent;
02857
02858 master_th->th.th_ident = loc;
02859
02860 #if OMP_30_ENABLED && KMP_DEBUG
02861 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02862 KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
02863 __kmp_gtid_from_thread( master_th ), team,
02864 team -> t.t_task_team, master_th->th.th_task_team) );
02865 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team );
02866 }
02867 #endif // OMP_30_ENABLED
02868
02869 if( team->t.t_serialized ) {
02870 __kmpc_end_serialized_parallel( loc, gtid );
02871 return;
02872 }
02873
02874 master_active = team->t.t_master_active;
02875
02876 __kmp_internal_join( loc, gtid, team );
02877 KMP_MB();
02878
02879 #if 0
02880 #if OMPT_SUPPORT
02881 if ((ompt_status & ompt_status_track_callback) &&
02882 ompt_callbacks.ompt_callback(ompt_event_parallel_exit)) {
02883 int tid = __kmp_tid_from_gtid( gtid );
02884 ompt_callbacks.ompt_callback(ompt_event_parallel_exit)
02885 (team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
02886 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame),
02887 team->t.ompt_team_info.parallel_id,
02888 (void *) team->t.t_pkfn);
02889 }
02890 #endif
02891 #endif
02892
02893 #if OMPT_SUPPORT
02894 ompt_parallel_info_t parallel_info;
02895 if ((ompt_status & ompt_status_track_callback) &&
02896 ompt_callbacks.ompt_callback(ompt_event_parallel_exit)) {
02897 int tid = __kmp_tid_from_gtid( gtid );
02898 parallel_info = (ompt_parallel_info_t)
02899 { .parent_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
02900 .parent_task_frame = &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame),
02901 .parallel_id = team->t.ompt_team_info.parallel_id,
02902 .parallel_function = (void *) team->t.t_pkfn
02903 };
02904 }
02905 #endif
02906
02907
02908 master_th -> th.th_info .ds.ds_tid = team -> t.t_master_tid;
02909 master_th -> th.th_local.this_construct = team -> t.t_master_this_cons;
02910 master_th -> th.th_local.last_construct = team -> t.t_master_last_cons;
02911
02912 master_th -> th.th_dispatch =
02913 & parent_team -> t.t_dispatch[ team -> t.t_master_tid ];
02914
02915
02916
02917
02918
02919 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
02920
02921
02922 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
02923 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
02924
02925 #if OMP_30_ENABLED
02926 KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
02927 0, master_th, team ) );
02928 __kmp_pop_current_task_from_thread( master_th );
02929 #endif // OMP_30_ENABLED
02930
02931 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
02932
02933
02934
02935 master_th -> th.th_first_place = team -> t.t_first_place;
02936 master_th -> th.th_last_place = team -> t.t_last_place;
02937 #endif
02938
02939 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
02940 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
02941 __kmp_clear_x87_fpu_status_word();
02942 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
02943 __kmp_load_mxcsr( &team->t.t_mxcsr );
02944 }
02945 #endif
02946
02947 if ( root -> r.r_active != master_active )
02948 root -> r.r_active = master_active;
02949
02950 __kmp_free_team( root, team );
02951
02952
02953
02954
02955
02956
02957
02958 master_th -> th.th_team = parent_team;
02959 master_th -> th.th_team_nproc = parent_team -> t.t_nproc;
02960 master_th -> th.th_team_master = parent_team -> t.t_threads[0];
02961 master_th -> th.th_team_serialized = parent_team -> t.t_serialized;
02962
02963
02964 if( parent_team -> t.t_serialized &&
02965 parent_team != master_th->th.th_serial_team &&
02966 parent_team != root->r.r_root_team ) {
02967 __kmp_free_team( root, master_th -> th.th_serial_team );
02968 master_th -> th.th_serial_team = parent_team;
02969 }
02970
02971 #if OMP_30_ENABLED
02972 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02973
02974
02975
02976
02977 if ( ( master_th -> th.th_task_team = parent_team -> t.t_task_team ) != NULL ) {
02978 master_th -> th.th_task_state = master_th -> th.th_task_team -> tt.tt_state;
02979 }
02980 KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
02981 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
02982 parent_team ) );
02983 }
02984 #endif
02985
02986 #if OMP_30_ENABLED
02987
02988
02989 master_th->th.th_current_task->td_flags.executing = 1;
02990 #endif // OMP_30_ENABLED
02991
02992 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
02993
02994 #if OMPT_SUPPORT
02995 if ((ompt_status & ompt_status_track_callback) &&
02996 ompt_callbacks.ompt_callback(ompt_event_parallel_exit)) {
02997 ompt_callbacks.ompt_callback(ompt_event_parallel_exit)
02998 (parallel_info.parent_task_id, parallel_info.parent_task_frame,
02999 parallel_info.parallel_id, parallel_info.parallel_function);
03000 }
03001 #endif
03002
03003 KMP_MB();
03004 KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
03005 }
03006
03007
03008
03009
03010
03011
03012 void
03013 __kmp_save_internal_controls ( kmp_info_t * thread )
03014 {
03015
03016 if ( thread -> th.th_team != thread -> th.th_serial_team ) {
03017 return;
03018 }
03019 if (thread -> th.th_team -> t.t_serialized > 1) {
03020 int push = 0;
03021
03022 if (thread -> th.th_team -> t.t_control_stack_top == NULL) {
03023 push = 1;
03024 } else {
03025 if ( thread -> th.th_team -> t.t_control_stack_top -> serial_nesting_level !=
03026 thread -> th.th_team -> t.t_serialized ) {
03027 push = 1;
03028 }
03029 }
03030 if (push) {
03031 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
03032
03033 #if OMP_30_ENABLED
03034 copy_icvs( control, & thread->th.th_current_task->td_icvs );
03035 #else
03036 control->nproc = thread->th.th_team->t.t_set_nproc[0];
03037 control->dynamic = thread->th.th_team->t.t_set_dynamic[0];
03038 control->nested = thread->th.th_team->t.t_set_nested[0];
03039 control->blocktime = thread->th.th_team->t.t_set_blocktime[0];
03040 control->bt_intervals = thread->th.th_team->t.t_set_bt_intervals[0];
03041 control->bt_set = thread->th.th_team->t.t_set_bt_set[0];
03042 #endif // OMP_30_ENABLED
03043
03044 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
03045
03046 control->next = thread -> th.th_team -> t.t_control_stack_top;
03047 thread -> th.th_team -> t.t_control_stack_top = control;
03048 }
03049 }
03050 }
03051
03052
03053 void
03054 __kmp_set_num_threads( int new_nth, int gtid )
03055 {
03056 kmp_info_t *thread;
03057 kmp_root_t *root;
03058
03059 KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
03060 KMP_DEBUG_ASSERT( __kmp_init_serial );
03061
03062 if (new_nth < 1)
03063 new_nth = 1;
03064 else if (new_nth > __kmp_max_nth)
03065 new_nth = __kmp_max_nth;
03066
03067 thread = __kmp_threads[gtid];
03068
03069 __kmp_save_internal_controls( thread );
03070
03071 set__nproc( thread, new_nth );
03072
03073
03074
03075
03076
03077
03078 root = thread->th.th_root;
03079 if ( __kmp_init_parallel && ( ! root->r.r_active )
03080 && ( root->r.r_hot_team->t.t_nproc > new_nth ) ) {
03081 kmp_team_t *hot_team = root->r.r_hot_team;
03082 int f;
03083
03084 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
03085
03086
03087 #if OMP_30_ENABLED
03088 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
03089 kmp_task_team_t *task_team = hot_team->t.t_task_team;
03090 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
03091
03092
03093
03094
03095
03096
03097 KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
03098 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
03099 KMP_MB();
03100
03101 KA_TRACE( 20, ( "__kmp_set_num_threads: setting task_team %p to NULL\n",
03102 &hot_team->t.t_task_team ) );
03103 hot_team->t.t_task_team = NULL;
03104 }
03105 else {
03106 KMP_DEBUG_ASSERT( task_team == NULL );
03107 }
03108 }
03109 #endif // OMP_30_ENABLED
03110
03111
03112
03113
03114 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
03115 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
03116 __kmp_free_thread( hot_team->t.t_threads[f] );
03117 hot_team->t.t_threads[f] = NULL;
03118 }
03119 hot_team->t.t_nproc = new_nth;
03120
03121
03122 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
03123
03124
03125
03126
03127 for( f=0 ; f < new_nth; f++ ) {
03128 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
03129 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
03130 }
03131 #if KMP_MIC
03132
03133 hot_team -> t.t_size_changed = -1;
03134 #endif
03135 }
03136
03137 }
03138
03139 #if OMP_30_ENABLED
03140
03141 void
03142 __kmp_set_max_active_levels( int gtid, int max_active_levels )
03143 {
03144 kmp_info_t *thread;
03145
03146 KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
03147 KMP_DEBUG_ASSERT( __kmp_init_serial );
03148
03149
03150 if( max_active_levels < 0 ) {
03151 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
03152
03153
03154
03155 KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
03156 return;
03157 }
03158 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
03159
03160
03161 } else {
03162 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
03163 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
03164
03165
03166
03167 }
03168 KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
03169
03170 thread = __kmp_threads[ gtid ];
03171
03172 __kmp_save_internal_controls( thread );
03173
03174 set__max_active_levels( thread, max_active_levels );
03175
03176 }
03177
03178
03179 int
03180 __kmp_get_max_active_levels( int gtid )
03181 {
03182 kmp_info_t *thread;
03183
03184 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) );
03185 KMP_DEBUG_ASSERT( __kmp_init_serial );
03186
03187 thread = __kmp_threads[ gtid ];
03188 KMP_DEBUG_ASSERT( thread -> th.th_current_task );
03189 KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
03190 gtid, thread -> th.th_current_task, thread -> th.th_current_task -> td_icvs.max_active_levels ) );
03191 return thread -> th.th_current_task -> td_icvs.max_active_levels;
03192 }
03193
03194
03195 void
03196 __kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk )
03197 {
03198 kmp_info_t *thread;
03199
03200
03201 KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk ));
03202 KMP_DEBUG_ASSERT( __kmp_init_serial );
03203
03204
03205
03206
03207
03208 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
03209 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
03210 {
03211
03212 __kmp_msg(
03213 kmp_ms_warning,
03214 KMP_MSG( ScheduleKindOutOfRange, kind ),
03215 KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ),
03216 __kmp_msg_null
03217 );
03218 kind = kmp_sched_default;
03219 chunk = 0;
03220 }
03221
03222 thread = __kmp_threads[ gtid ];
03223
03224 __kmp_save_internal_controls( thread );
03225
03226 if ( kind < kmp_sched_upper_std ) {
03227 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
03228
03229
03230 thread -> th.th_current_task -> td_icvs.sched.r_sched_type = kmp_sch_static;
03231 } else {
03232 thread -> th.th_current_task -> td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
03233 }
03234 } else {
03235
03236 thread -> th.th_current_task -> td_icvs.sched.r_sched_type =
03237 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
03238 }
03239 if ( kind == kmp_sched_auto ) {
03240
03241 thread -> th.th_current_task -> td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
03242 } else {
03243 thread -> th.th_current_task -> td_icvs.sched.chunk = chunk;
03244 }
03245 }
03246
03247
03248 void
03249 __kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk )
03250 {
03251 kmp_info_t *thread;
03252 enum sched_type th_type;
03253 int i;
03254
03255 KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid ));
03256 KMP_DEBUG_ASSERT( __kmp_init_serial );
03257
03258 thread = __kmp_threads[ gtid ];
03259
03260
03261 th_type = thread -> th.th_current_task -> td_icvs.sched.r_sched_type;
03262
03263 switch ( th_type ) {
03264 case kmp_sch_static:
03265 case kmp_sch_static_greedy:
03266 case kmp_sch_static_balanced:
03267 *kind = kmp_sched_static;
03268 *chunk = 0;
03269 return;
03270 case kmp_sch_static_chunked:
03271 *kind = kmp_sched_static;
03272 break;
03273 case kmp_sch_dynamic_chunked:
03274 *kind = kmp_sched_dynamic;
03275 break;
03276 case kmp_sch_guided_chunked:
03277 case kmp_sch_guided_iterative_chunked:
03278 case kmp_sch_guided_analytical_chunked:
03279 *kind = kmp_sched_guided;
03280 break;
03281 case kmp_sch_auto:
03282 *kind = kmp_sched_auto;
03283 break;
03284 case kmp_sch_trapezoidal:
03285 *kind = kmp_sched_trapezoidal;
03286 break;
03287
03288
03289
03290
03291
03292 default:
03293 KMP_FATAL( UnknownSchedulingType, th_type );
03294 }
03295
03296
03297 *chunk = thread -> th.th_current_task -> td_icvs.sched.chunk;
03298 }
03299
03300 int
03301 __kmp_get_ancestor_thread_num( int gtid, int level ) {
03302
03303 int ii, dd;
03304 kmp_team_t *team;
03305
03306 KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
03307 KMP_DEBUG_ASSERT( __kmp_init_serial );
03308
03309
03310 if( level == 0 ) return 0;
03311 if( level < 0 ) return -1;
03312 team = __kmp_threads[ gtid ] -> th.th_team;
03313 ii = team -> t.t_level;
03314 if( level > ii ) return -1;
03315 if( ii == level ) return __kmp_tid_from_gtid( gtid );
03316
03317 dd = team -> t.t_serialized;
03318 level++;
03319 while( ii > level )
03320 {
03321 for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
03322 {
03323 }
03324 if( ( team -> t.t_serialized ) && ( !dd ) ) {
03325 team = team->t.t_parent;
03326 continue;
03327 }
03328 if( ii > level ) {
03329 team = team->t.t_parent;
03330 dd = team -> t.t_serialized;
03331 ii--;
03332 }
03333 }
03334
03335 return ( dd > 1 ) ? ( 0 ) : ( team -> t.t_master_tid );
03336 }
03337
03338 int
03339 __kmp_get_team_size( int gtid, int level ) {
03340
03341 int ii, dd;
03342 kmp_team_t *team;
03343
03344 KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level ));
03345 KMP_DEBUG_ASSERT( __kmp_init_serial );
03346
03347
03348 if( level == 0 ) return 1;
03349 if( level < 0 ) return -1;
03350 team = __kmp_threads[ gtid ] -> th.th_team;
03351 ii = team -> t.t_level;
03352 if( level > ii ) return -1;
03353
03354 while( ii > level )
03355 {
03356 for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
03357 {
03358 }
03359 if( team -> t.t_serialized && ( !dd ) ) {
03360 team = team->t.t_parent;
03361 continue;
03362 }
03363 if( ii > level ) {
03364 team = team->t.t_parent;
03365 ii--;
03366 }
03367 }
03368
03369 return team -> t.t_nproc;
03370 }
03371
03372 #endif // OMP_30_ENABLED
03373
03374 kmp_r_sched_t
03375 __kmp_get_schedule_global() {
03376
03377
03378
03379 kmp_r_sched_t r_sched;
03380
03381
03382
03383
03384 if ( __kmp_sched == kmp_sch_static ) {
03385 r_sched.r_sched_type = __kmp_static;
03386 } else if ( __kmp_sched == kmp_sch_guided_chunked ) {
03387 r_sched.r_sched_type = __kmp_guided;
03388 } else {
03389 r_sched.r_sched_type = __kmp_sched;
03390 }
03391
03392 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) {
03393 r_sched.chunk = KMP_DEFAULT_CHUNK;
03394 } else {
03395 r_sched.chunk = __kmp_chunk;
03396 }
03397
03398 return r_sched;
03399 }
03400
03401
03402
03403
03404
03405
03406
03407
03408
03409 static void
03410 __kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc )
03411 {
03412
03413 KMP_DEBUG_ASSERT( team );
03414 if( !realloc || argc > team -> t.t_max_argc ) {
03415
03416 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
03417 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
03418 #if (KMP_PERF_V106 == KMP_ON)
03419
03420 if ( realloc && team -> t.t_argv != &team -> t.t_inline_argv[0] )
03421 __kmp_free( (void *) team -> t.t_argv );
03422
03423 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
03424
03425 team -> t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
03426 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
03427 team->t.t_id, team->t.t_max_argc ));
03428 team -> t.t_argv = &team -> t.t_inline_argv[0];
03429 if ( __kmp_storage_map ) {
03430 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
03431 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
03432 (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES),
03433 "team_%d.t_inline_argv",
03434 team->t.t_id );
03435 }
03436 } else {
03437
03438 team -> t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
03439 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
03440 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
03441 team->t.t_id, team->t.t_max_argc ));
03442 team -> t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
03443 if ( __kmp_storage_map ) {
03444 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
03445 sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv",
03446 team->t.t_id );
03447 }
03448 }
03449 #else
03450 if ( realloc )
03451 __kmp_free( (void*) team -> t.t_argv );
03452 team -> t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
03453 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
03454 KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
03455 team->t.t_id, team->t.t_max_argc ));
03456 team -> t.t_argv = __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
03457 if ( __kmp_storage_map ) {
03458 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
03459 sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv", team->t.t_id );
03460 }
03461 #endif
03462
03463 }
03464 }
03465
03466 static void
03467 __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
03468 {
03469 int i;
03470 int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2;
03471 #if KMP_USE_POOLED_ALLOC
03472 char *ptr = __kmp_allocate(max_nth *
03473 ( sizeof(kmp_info_t*) + sizeof(dispatch_shared_info_t)*2
03474 + sizeof(kmp_disp_t) + sizeof(int)*6
03475 # if OMP_30_ENABLED
03476
03477 + sizeof(kmp_r_sched_t)
03478 + sizeof(kmp_taskdata_t)
03479 # endif
03480 ) );
03481
03482 team -> t.t_threads = (kmp_info_t**) ptr; ptr += sizeof(kmp_info_t*) * max_nth;
03483 team -> t.t_disp_buffer = (dispatch_shared_info_t*) ptr;
03484 ptr += sizeof(dispatch_shared_info_t) * num_disp_buff;
03485 team -> t.t_dispatch = (kmp_disp_t*) ptr; ptr += sizeof(kmp_disp_t) * max_nth;
03486 team -> t.t_set_nproc = (int*) ptr; ptr += sizeof(int) * max_nth;
03487 team -> t.t_set_dynamic = (int*) ptr; ptr += sizeof(int) * max_nth;
03488 team -> t.t_set_nested = (int*) ptr; ptr += sizeof(int) * max_nth;
03489 team -> t.t_set_blocktime = (int*) ptr; ptr += sizeof(int) * max_nth;
03490 team -> t.t_set_bt_intervals = (int*) ptr; ptr += sizeof(int) * max_nth;
03491 team -> t.t_set_bt_set = (int*) ptr;
03492 # if OMP_30_ENABLED
03493 ptr += sizeof(int) * max_nth;
03494
03495 team -> t.t_set_sched = (kmp_r_sched_t*) ptr;
03496 ptr += sizeof(kmp_r_sched_t) * max_nth;
03497 team -> t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr;
03498 ptr += sizeof(kmp_taskdata_t) * max_nth;
03499 # endif // OMP_30_ENABLED
03500 #else
03501
03502 team -> t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth );
03503 team -> t.t_disp_buffer = (dispatch_shared_info_t*)
03504 __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff );
03505 team -> t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth );
03506 #if OMP_30_ENABLED
03507
03508
03509 team -> t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth );
03510 #else
03511 team -> t.t_set_nproc = (int*) __kmp_allocate( sizeof(int) * max_nth );
03512 team -> t.t_set_dynamic = (int*) __kmp_allocate( sizeof(int) * max_nth );
03513 team -> t.t_set_nested = (int*) __kmp_allocate( sizeof(int) * max_nth );
03514 team -> t.t_set_blocktime = (int*) __kmp_allocate( sizeof(int) * max_nth );
03515 team -> t.t_set_bt_intervals = (int*) __kmp_allocate( sizeof(int) * max_nth );
03516 team -> t.t_set_bt_set = (int*) __kmp_allocate( sizeof(int) * max_nth );
03517 # endif // OMP_30_ENABLED
03518 #endif
03519 team->t.t_max_nproc = max_nth;
03520
03521
03522 for(i = 0 ; i < num_disp_buff; ++i)
03523 team -> t.t_disp_buffer[i].buffer_index = i;
03524 }
03525
03526 static void
03527 __kmp_free_team_arrays(kmp_team_t *team) {
03528
03529 int i;
03530 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
03531 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
03532 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
03533 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
03534 };
03535 };
03536 __kmp_free(team->t.t_threads);
03537 #if !KMP_USE_POOLED_ALLOC
03538 __kmp_free(team->t.t_disp_buffer);
03539 __kmp_free(team->t.t_dispatch);
03540 #if OMP_30_ENABLED
03541
03542
03543 __kmp_free(team->t.t_implicit_task_taskdata);
03544 #else
03545 __kmp_free(team->t.t_set_nproc);
03546 __kmp_free(team->t.t_set_dynamic);
03547 __kmp_free(team->t.t_set_nested);
03548 __kmp_free(team->t.t_set_blocktime);
03549 __kmp_free(team->t.t_set_bt_intervals);
03550 __kmp_free(team->t.t_set_bt_set);
03551 # endif // OMP_30_ENABLED
03552 #endif
03553 team->t.t_threads = NULL;
03554 team->t.t_disp_buffer = NULL;
03555 team->t.t_dispatch = NULL;
03556 #if OMP_30_ENABLED
03557
03558
03559 team->t.t_implicit_task_taskdata = 0;
03560 #else
03561 team->t.t_set_nproc = 0;
03562 team->t.t_set_dynamic = 0;
03563 team->t.t_set_nested = 0;
03564 team->t.t_set_blocktime = 0;
03565 team->t.t_set_bt_intervals = 0;
03566 team->t.t_set_bt_set = 0;
03567 #endif // OMP_30_ENABLED
03568 }
03569
03570 static void
03571 __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
03572 kmp_info_t **oldThreads = team->t.t_threads;
03573
03574 #if !KMP_USE_POOLED_ALLOC
03575 __kmp_free(team->t.t_disp_buffer);
03576 __kmp_free(team->t.t_dispatch);
03577 #if OMP_30_ENABLED
03578
03579
03580 __kmp_free(team->t.t_implicit_task_taskdata);
03581 #else
03582 __kmp_free(team->t.t_set_nproc);
03583 __kmp_free(team->t.t_set_dynamic);
03584 __kmp_free(team->t.t_set_nested);
03585 __kmp_free(team->t.t_set_blocktime);
03586 __kmp_free(team->t.t_set_bt_intervals);
03587 __kmp_free(team->t.t_set_bt_set);
03588 # endif // OMP_30_ENABLED
03589 #endif
03590 __kmp_allocate_team_arrays(team, max_nth);
03591
03592 memcpy(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
03593
03594 __kmp_free(oldThreads);
03595 }
03596
03597 static kmp_internal_control_t
03598 __kmp_get_global_icvs( void ) {
03599
03600 #if OMP_30_ENABLED
03601 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
03602 #endif
03603
03604 #if OMP_40_ENABLED
03605 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
03606 #endif
03607
03608 kmp_internal_control_t g_icvs = {
03609 0,
03610 __kmp_dflt_nested,
03611 __kmp_global.g.g_dynamic,
03612 __kmp_dflt_team_nth,
03613
03614
03615 __kmp_dflt_blocktime,
03616 __kmp_bt_intervals,
03617 __kmp_env_blocktime,
03618 #if OMP_30_ENABLED
03619 __kmp_dflt_max_active_levels,
03620 r_sched,
03621 #endif
03622 #if OMP_40_ENABLED
03623 __kmp_nested_proc_bind.bind_types[0],
03624 #endif
03625 NULL
03626 };
03627
03628 return g_icvs;
03629 }
03630
03631 static kmp_internal_control_t
03632 __kmp_get_x_global_icvs( const kmp_team_t *team ) {
03633
03634 #if OMP_30_ENABLED
03635 kmp_internal_control_t gx_icvs;
03636 gx_icvs.serial_nesting_level = 0;
03637 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
03638 gx_icvs.next = NULL;
03639 #else
03640 kmp_internal_control_t gx_icvs =
03641 {
03642 0,
03643 team->t.t_set_nested[0],
03644 team->t.t_set_dynamic[0],
03645 team->t.t_set_nproc[0],
03646 team->t.t_set_blocktime[0],
03647 team->t.t_set_bt_intervals[0],
03648 team->t.t_set_bt_set[0],
03649 NULL
03650 };
03651 #endif // OMP_30_ENABLED
03652
03653 return gx_icvs;
03654 }
03655
03656 static void
03657 __kmp_initialize_root( kmp_root_t *root )
03658 {
03659 int f;
03660 kmp_team_t *root_team;
03661 kmp_team_t *hot_team;
03662 size_t disp_size, dispatch_size, bar_size;
03663 int hot_team_max_nth;
03664 #if OMP_30_ENABLED
03665 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
03666 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
03667 #endif // OMP_30_ENABLED
03668 KMP_DEBUG_ASSERT( root );
03669 KMP_ASSERT( ! root->r.r_begin );
03670
03671
03672 __kmp_init_lock( &root->r.r_begin_lock );
03673 root -> r.r_begin = FALSE;
03674 root -> r.r_active = FALSE;
03675 root -> r.r_in_parallel = 0;
03676 root -> r.r_blocktime = __kmp_dflt_blocktime;
03677 root -> r.r_nested = __kmp_dflt_nested;
03678
03679
03680
03681 KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
03682
03683 int gtid = __kmp_gtid_get_specific();
03684 kmp_info_t *ti = ompt_get_thread_gtid(gtid);
03685 ompt_parallel_id_t ompt_parallel_id_1 = __ompt_parallel_id_new(ti, gtid);
03686
03687 root_team =
03688 __kmp_allocate_team(
03689 root,
03690 1,
03691 1,
03692 #if OMPT_SUPPORT
03693 ompt_parallel_id_1,
03694 #endif
03695 #if OMP_40_ENABLED
03696 __kmp_nested_proc_bind.bind_types[0],
03697 #endif
03698 #if OMP_30_ENABLED
03699 &r_icvs,
03700 #else
03701 __kmp_dflt_team_nth_ub,
03702 __kmp_global.g.g_dynamic,
03703 __kmp_dflt_nested,
03704 __kmp_dflt_blocktime,
03705 __kmp_bt_intervals,
03706 __kmp_env_blocktime,
03707 #endif
03708 0
03709 );
03710
03711 KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) );
03712
03713 root -> r.r_root_team = root_team;
03714 root_team -> t.t_control_stack_top = NULL;
03715
03716
03717 root_team -> t.t_threads[0] = NULL;
03718 root_team -> t.t_nproc = 1;
03719 root_team -> t.t_serialized = 1;
03720 #if OMP_30_ENABLED
03721
03722 root_team -> t.t_sched.r_sched_type = r_sched.r_sched_type;
03723 root_team -> t.t_sched.chunk = r_sched.chunk;
03724 #endif // OMP_30_ENABLED
03725 KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
03726 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
03727
03728
03729
03730 KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
03731
03732 ompt_parallel_id_t ompt_parallel_id_2 = __ompt_parallel_id_new(ti, gtid);
03733
03734 hot_team =
03735 __kmp_allocate_team(
03736 root,
03737 1,
03738 __kmp_dflt_team_nth_ub * 2,
03739 #if OMPT_SUPPORT
03740 ompt_parallel_id_2,
03741 #endif
03742 #if OMP_40_ENABLED
03743 __kmp_nested_proc_bind.bind_types[0],
03744 #endif
03745 #if OMP_30_ENABLED
03746 &r_icvs,
03747 #else
03748 __kmp_dflt_team_nth_ub,
03749 __kmp_global.g.g_dynamic,
03750 __kmp_dflt_nested,
03751 __kmp_dflt_blocktime,
03752 __kmp_bt_intervals,
03753 __kmp_env_blocktime,
03754 #endif
03755 0
03756 );
03757 KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
03758
03759 root -> r.r_hot_team = hot_team;
03760 root_team -> t.t_control_stack_top = NULL;
03761
03762
03763 hot_team -> t.t_parent = root_team;
03764
03765
03766 hot_team_max_nth = hot_team->t.t_max_nproc;
03767 for ( f = 0; f < hot_team_max_nth; ++ f ) {
03768 hot_team -> t.t_threads[ f ] = NULL;
03769 };
03770 hot_team -> t.t_nproc = 1;
03771 #if OMP_30_ENABLED
03772
03773 hot_team -> t.t_sched.r_sched_type = r_sched.r_sched_type;
03774 hot_team -> t.t_sched.chunk = r_sched.chunk;
03775 #endif // OMP_30_ENABLED
03776 #if KMP_MIC
03777 hot_team -> t.t_size_changed = 0;
03778 #endif
03779
03780 }
03781
03782 #ifdef KMP_DEBUG
03783
03784
03785 typedef struct kmp_team_list_item {
03786 kmp_team_p const * entry;
03787 struct kmp_team_list_item * next;
03788 } kmp_team_list_item_t;
03789 typedef kmp_team_list_item_t * kmp_team_list_t;
03790
03791
03792 static void
03793 __kmp_print_structure_team_accum(
03794 kmp_team_list_t list,
03795 kmp_team_p const * team
03796 ) {
03797
03798
03799
03800
03801
03802
03803 kmp_team_list_t l;
03804
03805 KMP_DEBUG_ASSERT( list != NULL );
03806 if ( team == NULL ) {
03807 return;
03808 };
03809
03810 __kmp_print_structure_team_accum( list, team->t.t_parent );
03811 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
03812
03813
03814 l = list;
03815 while ( l->next != NULL && l->entry != team ) {
03816 l = l->next;
03817 };
03818 if ( l->next != NULL ) {
03819 return;
03820 };
03821
03822
03823 l = list;
03824 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
03825 l = l->next;
03826 };
03827
03828
03829 {
03830 kmp_team_list_item_t * item =
03831 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
03832 * item = * l;
03833 l->entry = team;
03834 l->next = item;
03835 }
03836
03837 }
03838
03839 static void
03840 __kmp_print_structure_team(
03841 char const * title,
03842 kmp_team_p const * team
03843
03844 ) {
03845 __kmp_printf( "%s", title );
03846 if ( team != NULL ) {
03847 __kmp_printf( "%2x %p\n", team->t.t_id, team );
03848 } else {
03849 __kmp_printf( " - (nil)\n" );
03850 };
03851 }
03852
03853 static void
03854 __kmp_print_structure_thread(
03855 char const * title,
03856 kmp_info_p const * thread
03857
03858 ) {
03859 __kmp_printf( "%s", title );
03860 if ( thread != NULL ) {
03861 __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
03862 } else {
03863 __kmp_printf( " - (nil)\n" );
03864 };
03865 }
03866
03867 static void
03868 __kmp_print_structure(
03869 void
03870 ) {
03871
03872 kmp_team_list_t list;
03873
03874
03875 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
03876 list->entry = NULL;
03877 list->next = NULL;
03878
03879 __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
03880 {
03881 int gtid;
03882 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
03883 __kmp_printf( "%2d", gtid );
03884 if ( __kmp_threads != NULL ) {
03885 __kmp_printf( " %p", __kmp_threads[ gtid ] );
03886 };
03887 if ( __kmp_root != NULL ) {
03888 __kmp_printf( " %p", __kmp_root[ gtid ] );
03889 };
03890 __kmp_printf( "\n" );
03891 };
03892 }
03893
03894
03895 __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" );
03896 if ( __kmp_threads != NULL ) {
03897 int gtid;
03898 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
03899 kmp_info_t const * thread = __kmp_threads[ gtid ];
03900 if ( thread != NULL ) {
03901 __kmp_printf( "GTID %2d %p:\n", gtid, thread );
03902 __kmp_printf( " Our Root: %p\n", thread->th.th_root );
03903 __kmp_print_structure_team( " Our Team: ", thread->th.th_team );
03904 __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team );
03905 __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc );
03906 __kmp_print_structure_thread( " Master: ", thread->th.th_team_master );
03907 __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized );
03908 __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc );
03909 #if OMP_40_ENABLED
03910 __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
03911 #endif
03912 __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool );
03913 __kmp_printf( "\n" );
03914 __kmp_print_structure_team_accum( list, thread->th.th_team );
03915 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
03916 };
03917 };
03918 } else {
03919 __kmp_printf( "Threads array is not allocated.\n" );
03920 };
03921
03922
03923 __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" );
03924 if ( __kmp_root != NULL ) {
03925 int gtid;
03926 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
03927 kmp_root_t const * root = __kmp_root[ gtid ];
03928 if ( root != NULL ) {
03929 __kmp_printf( "GTID %2d %p:\n", gtid, root );
03930 __kmp_print_structure_team( " Root Team: ", root->r.r_root_team );
03931 __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team );
03932 __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread );
03933 __kmp_printf( " Active?: %2d\n", root->r.r_active );
03934 __kmp_printf( " Nested?: %2d\n", root->r.r_nested );
03935 __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel );
03936 __kmp_printf( "\n" );
03937 __kmp_print_structure_team_accum( list, root->r.r_root_team );
03938 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
03939 };
03940 };
03941 } else {
03942 __kmp_printf( "Ubers array is not allocated.\n" );
03943 };
03944
03945 __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" );
03946 while ( list->next != NULL ) {
03947 kmp_team_p const * team = list->entry;
03948 int i;
03949 __kmp_printf( "Team %2x %p:\n", team->t.t_id, team );
03950 __kmp_print_structure_team( " Parent Team: ", team->t.t_parent );
03951 __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid );
03952 __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc );
03953 __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized );
03954 __kmp_printf( " Number threads: %2d\n", team->t.t_nproc );
03955 for ( i = 0; i < team->t.t_nproc; ++ i ) {
03956 __kmp_printf( " Thread %2d: ", i );
03957 __kmp_print_structure_thread( "", team->t.t_threads[ i ] );
03958 };
03959 __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool );
03960 __kmp_printf( "\n" );
03961 list = list->next;
03962 };
03963
03964
03965 __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" );
03966 __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
03967 __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool );
03968 __kmp_printf( "\n" );
03969
03970
03971 while ( list != NULL ) {
03972 kmp_team_list_item_t * item = list;
03973 list = list->next;
03974 KMP_INTERNAL_FREE( item );
03975 };
03976
03977 }
03978
03979 #endif
03980
03981
03982
03983
03984
03985
03986 static const unsigned __kmp_primes[] = {
03987 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
03988 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
03989 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
03990 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
03991 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
03992 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
03993 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
03994 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
03995 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
03996 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
03997 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
03998 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
03999 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
04000 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
04001 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
04002 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
04003 };
04004
04005
04006
04007
04008 unsigned short
04009 __kmp_get_random( kmp_info_t * thread )
04010 {
04011 unsigned x = thread -> th.th_x;
04012 unsigned short r = x>>16;
04013
04014 thread -> th.th_x = x*thread->th.th_a+1;
04015
04016 KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
04017 thread->th.th_info.ds.ds_tid, r) );
04018
04019 return r;
04020 }
04021
04022
04023
04024 void
04025 __kmp_init_random( kmp_info_t * thread )
04026 {
04027 unsigned seed = thread->th.th_info.ds.ds_tid;
04028
04029 thread -> th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))];
04030 thread -> th.th_x = (seed+1)*thread->th.th_a+1;
04031 KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread -> th.th_a) );
04032 }
04033
04034
04035 #if KMP_OS_WINDOWS
04036
04037 static int
04038 __kmp_reclaim_dead_roots(void) {
04039 int i, r = 0;
04040
04041 for(i = 0; i < __kmp_threads_capacity; ++i) {
04042 if( KMP_UBER_GTID( i ) &&
04043 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
04044 !__kmp_root[i]->r.r_active ) {
04045 r += __kmp_unregister_root_other_thread(i);
04046 }
04047 }
04048 return r;
04049 }
04050 #endif
04051
04052
04053
04054
04055
04056
04057
04058
04059
04060
04061
04062
04063
04064
04065
04066
04067
04068
04069
04070
04071
04072
04073 static int
04074 __kmp_expand_threads(int nWish, int nNeed) {
04075 int added = 0;
04076 int old_tp_cached;
04077 int __kmp_actual_max_nth;
04078
04079 if(nNeed > nWish)
04080 nWish = nNeed;
04081 #if KMP_OS_WINDOWS && !defined GUIDEDLL_EXPORTS
04082
04083
04084 added = __kmp_reclaim_dead_roots();
04085
04086 if(nNeed) {
04087 nNeed -= added;
04088 if(nNeed < 0)
04089 nNeed = 0;
04090 }
04091 if(nWish) {
04092 nWish -= added;
04093 if(nWish < 0)
04094 nWish = 0;
04095 }
04096 #endif
04097 if(nWish <= 0)
04098 return added;
04099
04100 while(1) {
04101 int nTarget;
04102 int minimumRequiredCapacity;
04103 int newCapacity;
04104 kmp_info_t **newThreads;
04105 kmp_root_t **newRoot;
04106
04107
04108
04109
04110
04111
04112
04113
04114
04115
04116
04117
04118
04119
04120
04121
04122
04123
04124
04125
04126
04127 old_tp_cached = __kmp_tp_cached;
04128 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
04129 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
04130
04131
04132 nTarget = nWish;
04133 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
04134
04135 if(nNeed) {
04136 nTarget = nNeed;
04137 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
04138
04139 break;
04140 }
04141 } else {
04142
04143 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
04144 if(!nTarget) {
04145
04146 break;
04147 }
04148 }
04149 }
04150 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
04151
04152 newCapacity = __kmp_threads_capacity;
04153 do{
04154 newCapacity =
04155 newCapacity <= (__kmp_actual_max_nth >> 1) ?
04156 (newCapacity << 1) :
04157 __kmp_actual_max_nth;
04158 } while(newCapacity < minimumRequiredCapacity);
04159 newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
04160 newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity );
04161 memcpy(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*));
04162 memcpy(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*));
04163 memset(newThreads + __kmp_threads_capacity, 0,
04164 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*));
04165 memset(newRoot + __kmp_threads_capacity, 0,
04166 (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*));
04167
04168 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
04169
04170
04171
04172
04173
04174 __kmp_free(newThreads);
04175 continue;
04176 }
04177 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
04178 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
04179
04180 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
04181 __kmp_free(newThreads);
04182 continue;
04183 } else {
04184
04185
04186
04187
04188
04189
04190 TC_IGNORE({ *(kmp_info_t**volatile*)&__kmp_threads = newThreads; });
04191 TC_IGNORE({ *(kmp_root_t**volatile*)&__kmp_root = newRoot; });
04192 added += newCapacity - __kmp_threads_capacity;
04193 TC_IGNORE({ *(volatile int*)&__kmp_threads_capacity = newCapacity; });
04194 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
04195 break;
04196 }
04197 }
04198 return added;
04199 }
04200
04201
04202
04203
04204 int
04205 __kmp_register_root( int initial_thread )
04206 {
04207 kmp_info_t *root_thread;
04208 kmp_root_t *root;
04209 int gtid;
04210 int capacity;
04211 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
04212 KA_TRACE( 20, ("__kmp_register_root: entered\n"));
04213 KMP_MB();
04214
04215
04216
04217
04218
04219
04220
04221
04222
04223
04224
04225
04226
04227
04228
04229
04230 capacity = __kmp_threads_capacity;
04231 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
04232 -- capacity;
04233 };
04234
04235
04236 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
04237 if ( __kmp_tp_cached ) {
04238 __kmp_msg(
04239 kmp_ms_fatal,
04240 KMP_MSG( CantRegisterNewThread ),
04241 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
04242 KMP_HNT( PossibleSystemLimitOnThreads ),
04243 __kmp_msg_null
04244 );
04245 }
04246 else {
04247 __kmp_msg(
04248 kmp_ms_fatal,
04249 KMP_MSG( CantRegisterNewThread ),
04250 KMP_HNT( SystemLimitOnThreads ),
04251 __kmp_msg_null
04252 );
04253 }
04254 };
04255
04256
04257
04258
04259 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ );
04260 KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
04261 KMP_ASSERT( gtid < __kmp_threads_capacity );
04262
04263
04264 __kmp_all_nth ++;
04265 TCW_4(__kmp_nth, __kmp_nth + 1);
04266
04267
04268
04269
04270
04271
04272 if ( __kmp_adjust_gtid_mode ) {
04273 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
04274 if ( TCR_4(__kmp_gtid_mode) != 2) {
04275 TCW_4(__kmp_gtid_mode, 2);
04276 }
04277 }
04278 else {
04279 if (TCR_4(__kmp_gtid_mode) != 1 ) {
04280 TCW_4(__kmp_gtid_mode, 1);
04281 }
04282 }
04283 }
04284
04285 #ifdef KMP_ADJUST_BLOCKTIME
04286
04287
04288 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
04289 if ( __kmp_nth > __kmp_avail_proc ) {
04290 __kmp_zero_bt = TRUE;
04291 }
04292 }
04293 #endif
04294
04295
04296 if( ! ( root = __kmp_root[gtid] )) {
04297 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) );
04298 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
04299 }
04300
04301 __kmp_initialize_root( root );
04302
04303
04304 if( root -> r.r_uber_thread ) {
04305 root_thread = root -> r.r_uber_thread;
04306 } else {
04307 root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
04308 if ( __kmp_storage_map ) {
04309 __kmp_print_thread_storage_map( root_thread, gtid );
04310 }
04311 root_thread -> th.th_info .ds.ds_gtid = gtid;
04312 root_thread -> th.th_root = root;
04313 if( __kmp_env_consistency_check ) {
04314 root_thread -> th.th_cons = __kmp_allocate_cons_stack( gtid );
04315 }
04316 #if USE_FAST_MEMORY
04317 __kmp_initialize_fast_memory( root_thread );
04318 #endif
04319
04320 #if KMP_USE_BGET
04321 KMP_DEBUG_ASSERT( root_thread -> th.th_local.bget_data == NULL );
04322 __kmp_initialize_bget( root_thread );
04323 #endif
04324 __kmp_init_random( root_thread );
04325 }
04326
04327
04328 if( ! root_thread -> th.th_serial_team ) {
04329 #if OMP_30_ENABLED
04330 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
04331 #endif // OMP_30_ENABLED
04332 KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
04333
04334 kmp_info_t *ti = ompt_get_thread_gtid(gtid);
04335 ompt_parallel_id_t ompt_parallel_id_3 = __ompt_parallel_id_new(ti, gtid);
04336
04337 root_thread -> th.th_serial_team = __kmp_allocate_team( root, 1, 1,
04338 #if OMPT_SUPPORT
04339 ompt_parallel_id_3,
04340 #endif
04341 #if OMP_40_ENABLED
04342 proc_bind_default,
04343 #endif
04344 #if OMP_30_ENABLED
04345 &r_icvs,
04346 #else
04347 __kmp_dflt_team_nth_ub,
04348 __kmp_global.g.g_dynamic,
04349 __kmp_dflt_nested,
04350 __kmp_dflt_blocktime,
04351 __kmp_bt_intervals,
04352 __kmp_env_blocktime,
04353 #endif
04354 0 );
04355 }
04356 KMP_ASSERT( root_thread -> th.th_serial_team );
04357 KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n",
04358 root_thread -> th.th_serial_team ) );
04359
04360
04361 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
04362
04363 root -> r.r_root_team -> t.t_threads[0] = root_thread;
04364 root -> r.r_hot_team -> t.t_threads[0] = root_thread;
04365 root_thread -> th.th_serial_team -> t.t_threads[0] = root_thread;
04366 root -> r.r_uber_thread = root_thread;
04367
04368
04369 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
04370
04371
04372 __kmp_gtid_set_specific( gtid );
04373 #ifdef KMP_TDATA_GTID
04374 __kmp_gtid = gtid;
04375 #endif
04376 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
04377 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
04378 TCW_4(__kmp_init_gtid, TRUE);
04379
04380 KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
04381 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
04382 root -> r.r_hot_team -> t.t_id, 0, KMP_INIT_BARRIER_STATE,
04383 KMP_INIT_BARRIER_STATE ) );
04384 {
04385 int b;
04386 for ( b = 0; b < bs_last_barrier; ++ b ) {
04387 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
04388 };
04389 }
04390 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
04391
04392
04393 #if KMP_OS_WINDOWS || KMP_OS_LINUX
04394 if ( TCR_4(__kmp_init_middle) ) {
04395 __kmp_affinity_set_init_mask( gtid, TRUE );
04396 }
04397 #endif
04398
04399 __kmp_root_counter ++;
04400
04401 KMP_MB();
04402 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
04403
04404 return gtid;
04405 }
04406
04407
04408
04409
04410 static int
04411 __kmp_reset_root(int gtid, kmp_root_t *root)
04412 {
04413 kmp_team_t * root_team = root->r.r_root_team;
04414 kmp_team_t * hot_team = root->r.r_hot_team;
04415 int n = hot_team->t.t_nproc;
04416 int i;
04417
04418 KMP_DEBUG_ASSERT( ! root->r.r_active );
04419
04420 root->r.r_root_team = NULL;
04421 root->r.r_hot_team = NULL;
04422
04423
04424 __kmp_free_team( root, root_team );
04425 __kmp_free_team( root, hot_team );
04426
04427 #if OMP_30_ENABLED
04428
04429
04430
04431
04432 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
04433 __kmp_wait_to_unref_task_teams();
04434 }
04435 #endif
04436
04437 #if KMP_OS_WINDOWS
04438
04439 KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
04440 (LPVOID)&(root->r.r_uber_thread->th),
04441 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
04442 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
04443 #endif
04444
04445 TCW_4(__kmp_nth, __kmp_nth - 1);
04446 __kmp_reap_thread( root->r.r_uber_thread, 1 );
04447
04448
04449 root->r.r_uber_thread = NULL;
04450
04451 root -> r.r_begin = FALSE;
04452
04453 return n;
04454 }
04455
04456 void
04457 __kmp_unregister_root_current_thread( int gtid )
04458 {
04459 kmp_root_t *root = __kmp_root[gtid];
04460
04461 KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
04462 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
04463 KMP_ASSERT( KMP_UBER_GTID( gtid ));
04464 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
04465 KMP_ASSERT( root->r.r_active == FALSE );
04466
04467
04468
04469
04470
04471 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
04472
04473 KMP_MB();
04474
04475 __kmp_reset_root(gtid, root);
04476
04477
04478 __kmp_gtid_set_specific( KMP_GTID_DNE );
04479 #ifdef KMP_TDATA_GTID
04480 __kmp_gtid = KMP_GTID_DNE;
04481 #endif
04482
04483 KMP_MB();
04484 KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
04485
04486 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
04487 }
04488
04489
04490
04491
04492
04493 static int
04494 __kmp_unregister_root_other_thread( int gtid )
04495 {
04496 kmp_root_t *root = __kmp_root[gtid];
04497 int r;
04498
04499 KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
04500 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
04501 KMP_ASSERT( KMP_UBER_GTID( gtid ));
04502 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
04503 KMP_ASSERT( root->r.r_active == FALSE );
04504
04505 r = __kmp_reset_root(gtid, root);
04506 KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
04507 return r;
04508 }
04509
04510 #if OMP_30_ENABLED
04511
04512 #if KMP_DEBUG
04513 void __kmp_task_info() {
04514
04515 kmp_int32 gtid = __kmp_entry_gtid();
04516 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
04517 kmp_info_t *this_thr = __kmp_threads[ gtid ];
04518 kmp_team_t *steam = this_thr -> th.th_serial_team;
04519 kmp_team_t *team = this_thr -> th.th_team;
04520
04521 __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
04522 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
04523 }
04524 #endif // KMP_DEBUG
04525
04526 #endif // OMP_30_ENABLED
04527
04528
04529
04530
04531 static void
04532 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
04533 {
04534
04535
04536
04537 KMP_DEBUG_ASSERT( this_thr != NULL );
04538 KMP_DEBUG_ASSERT( this_thr -> th.th_serial_team );
04539 KMP_DEBUG_ASSERT( team );
04540 KMP_DEBUG_ASSERT( team -> t.t_threads );
04541 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
04542 KMP_DEBUG_ASSERT( team -> t.t_threads[0] );
04543 KMP_DEBUG_ASSERT( team -> t.t_threads[0] -> th.th_root );
04544
04545 KMP_MB();
04546
04547 TCW_SYNC_PTR(this_thr->th.th_team, team);
04548
04549 this_thr->th.th_info.ds.ds_tid = tid;
04550 this_thr->th.th_set_nproc = 0;
04551 #if OMP_40_ENABLED
04552 this_thr->th.th_set_proc_bind = proc_bind_default;
04553 # if (KMP_OS_WINDOWS || KMP_OS_LINUX)
04554 this_thr->th.th_new_place = this_thr->th.th_current_place;
04555 # endif
04556 #endif
04557 this_thr->th.th_root = team -> t.t_threads[0] -> th.th_root;
04558
04559
04560 this_thr->th.th_team_nproc = team -> t.t_nproc;
04561 this_thr->th.th_team_master = team -> t.t_threads[0];
04562 this_thr->th.th_team_serialized = team -> t.t_serialized;
04563 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
04564
04565 #if OMP_30_ENABLED
04566 KMP_DEBUG_ASSERT( team -> t.t_implicit_task_taskdata );
04567 this_thr->th.th_task_state = 0;
04568
04569 KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
04570 tid, gtid, this_thr, this_thr->th.th_current_task ) );
04571
04572 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
04573
04574 KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
04575 tid, gtid, this_thr, this_thr->th.th_current_task ) );
04576
04577 #endif // OMP_30_ENABLED
04578
04579
04580 this_thr -> th.th_dispatch = &team -> t.t_dispatch[ tid ];
04581
04582 this_thr->th.th_local.this_construct = 0;
04583 this_thr->th.th_local.last_construct = 0;
04584
04585 #ifdef BUILD_TV
04586 this_thr->th.th_local.tv_data = 0;
04587 #endif
04588
04589 if ( ! this_thr->th.th_pri_common ) {
04590 this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) );
04591 if ( __kmp_storage_map ) {
04592 __kmp_print_storage_map_gtid(
04593 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
04594 sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid
04595 );
04596 };
04597 this_thr->th.th_pri_head = NULL;
04598 };
04599
04600
04601 {
04602 volatile kmp_disp_t *dispatch = this_thr -> th.th_dispatch;
04603
04604
04605
04606 size_t disp_size = sizeof( dispatch_private_info_t ) *
04607 ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF );
04608 KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
04609 KMP_ASSERT( dispatch );
04610 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
04611 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
04612
04613 dispatch->th_disp_index = 0;
04614
04615 if( ! dispatch -> th_disp_buffer ) {
04616 dispatch -> th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
04617
04618 if ( __kmp_storage_map ) {
04619 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
04620 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ],
04621 disp_size, "th_%d.th_dispatch.th_disp_buffer "
04622 "(team_%d.t_dispatch[%d].th_disp_buffer)",
04623 gtid, team->t.t_id, gtid );
04624 }
04625 } else {
04626 memset( & dispatch -> th_disp_buffer[0], '\0', disp_size );
04627 }
04628
04629 dispatch -> th_dispatch_pr_current = 0;
04630 dispatch -> th_dispatch_sh_current = 0;
04631
04632 dispatch -> th_deo_fcn = 0;
04633 dispatch -> th_dxo_fcn = 0;
04634 }
04635
04636 this_thr->th.th_next_pool = NULL;
04637
04638 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
04639 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
04640
04641 #if OMPT_SUPPORT
04642 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
04643 this_thr->th.ompt_thread_info.wait_id = 0;
04644 this_thr->th.ompt_thread_info.next_task_id = 1;
04645 this_thr->th.ompt_thread_info.next_parallel_id = tid+1;
04646
04647 this_thr->th.ompt_thread_info.lw_taskteam = NULL;
04648 #endif
04649
04650 KMP_MB();
04651 }
04652
04653
04654
04655
04656
04657
04658
04659
04660 kmp_info_t *
04661 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
04662 {
04663 kmp_team_t *serial_team;
04664 kmp_info_t *new_thr;
04665 int new_gtid;
04666
04667 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
04668 KMP_DEBUG_ASSERT( root && team );
04669 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
04670 KMP_MB();
04671
04672
04673 if ( __kmp_thread_pool ) {
04674
04675 new_thr = (kmp_info_t*)__kmp_thread_pool;
04676 __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool;
04677 if ( new_thr == __kmp_thread_pool_insert_pt ) {
04678 __kmp_thread_pool_insert_pt = NULL;
04679 }
04680 TCW_4(new_thr->th.th_in_pool, FALSE);
04681
04682
04683
04684
04685
04686 __kmp_thread_pool_nth--;
04687
04688 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
04689 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
04690 KMP_ASSERT( ! new_thr -> th.th_team );
04691 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
04692 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
04693
04694
04695 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
04696 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
04697
04698 TCW_4(__kmp_nth, __kmp_nth + 1);
04699
04700 #ifdef KMP_ADJUST_BLOCKTIME
04701
04702
04703 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
04704 if ( __kmp_nth > __kmp_avail_proc ) {
04705 __kmp_zero_bt = TRUE;
04706 }
04707 }
04708 #endif
04709
04710 KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
04711 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
04712
04713 KMP_MB();
04714 return new_thr;
04715 }
04716
04717
04718
04719 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
04720 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
04721
04722
04723
04724
04725
04726 if ( ! TCR_4( __kmp_init_monitor ) ) {
04727 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
04728 if ( ! TCR_4( __kmp_init_monitor ) ) {
04729 KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) );
04730 TCW_4( __kmp_init_monitor, 1 );
04731 __kmp_create_monitor( & __kmp_monitor );
04732 KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
04733 }
04734 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
04735 }
04736
04737 KMP_MB();
04738 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
04739 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
04740 }
04741
04742
04743 new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
04744
04745 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
04746
04747 if ( __kmp_storage_map ) {
04748 __kmp_print_thread_storage_map( new_thr, new_gtid );
04749 }
04750
04751
04752 {
04753 #if OMP_30_ENABLED
04754 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
04755 #endif // OMP_30_ENABLED
04756 KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
04757
04758 kmp_info_t *ti = ompt_get_thread_gtid(new_gtid);
04759 ompt_parallel_id_t ompt_parallel_id_4 = __ompt_parallel_id_new(ti, new_gtid);
04760
04761 new_thr -> th.th_serial_team = serial_team =
04762 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
04763 #if OMPT_SUPPORT
04764 ompt_parallel_id_4,
04765 #endif
04766 #if OMP_40_ENABLED
04767 proc_bind_default,
04768 #endif
04769 #if OMP_30_ENABLED
04770 &r_icvs,
04771 #else
04772 team->t.t_set_nproc[0],
04773 team->t.t_set_dynamic[0],
04774 team->t.t_set_nested[0],
04775 team->t.t_set_blocktime[0],
04776 team->t.t_set_bt_intervals[0],
04777 team->t.t_set_bt_set[0],
04778 #endif
04779 0 );
04780 }
04781 KMP_ASSERT ( serial_team );
04782 serial_team -> t.t_threads[0] = new_thr;
04783 KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
04784 new_thr ) );
04785
04786
04787 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
04788
04789 #if USE_FAST_MEMORY
04790 __kmp_initialize_fast_memory( new_thr );
04791 #endif
04792
04793 #if KMP_USE_BGET
04794 KMP_DEBUG_ASSERT( new_thr -> th.th_local.bget_data == NULL );
04795 __kmp_initialize_bget( new_thr );
04796 #endif
04797
04798 __kmp_init_random( new_thr );
04799
04800
04801 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
04802 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
04803
04804 new_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
04805 new_thr->th.th_bar[ bs_plain_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
04806 #if KMP_FAST_REDUCTION_BARRIER
04807 new_thr->th.th_bar[ bs_reduction_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
04808 #endif // KMP_FAST_REDUCTION_BARRIER
04809
04810 new_thr->th.th_spin_here = FALSE;
04811 new_thr->th.th_next_waiting = 0;
04812
04813 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
04814 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
04815 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
04816 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
04817 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
04818 #endif
04819
04820 TCW_4(new_thr->th.th_in_pool, FALSE);
04821 new_thr->th.th_active_in_pool = FALSE;
04822 TCW_4(new_thr->th.th_active, TRUE);
04823
04824
04825 __kmp_all_nth ++;
04826 __kmp_nth ++;
04827
04828
04829
04830
04831
04832
04833 if ( __kmp_adjust_gtid_mode ) {
04834 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
04835 if ( TCR_4(__kmp_gtid_mode) != 2) {
04836 TCW_4(__kmp_gtid_mode, 2);
04837 }
04838 }
04839 else {
04840 if (TCR_4(__kmp_gtid_mode) != 1 ) {
04841 TCW_4(__kmp_gtid_mode, 1);
04842 }
04843 }
04844 }
04845
04846 #ifdef KMP_ADJUST_BLOCKTIME
04847
04848
04849 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
04850 if ( __kmp_nth > __kmp_avail_proc ) {
04851 __kmp_zero_bt = TRUE;
04852 }
04853 }
04854 #endif
04855
04856
04857 KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
04858 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
04859 KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
04860
04861
04862 KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
04863 KMP_MB();
04864 return new_thr;
04865 }
04866
04867
04868
04869
04870
04871
04872
04873
04874
04875
04876 static void
04877 __kmp_reinitialize_team(
04878 kmp_team_t * team,
04879 int new_nproc,
04880 #if OMP_30_ENABLED
04881 kmp_internal_control_t * new_icvs,
04882 ident_t * loc
04883 #else
04884 int new_set_nproc, int new_set_dynamic, int new_set_nested,
04885 int new_set_blocktime, int new_bt_intervals, int new_bt_set
04886 #endif
04887 ) {
04888 int f;
04889 #if OMP_30_ENABLED
04890 KMP_DEBUG_ASSERT( team && new_nproc && new_icvs );
04891 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
04892 team->t.t_ident = loc;
04893 #else
04894 KMP_DEBUG_ASSERT( team && new_nproc && new_set_nproc );
04895 #endif // OMP_30_ENABLED
04896
04897 team->t.t_id = KMP_GEN_TEAM_ID();
04898
04899 #if KMP_BARRIER_ICV_PULL
04900
04901
04902
04903
04904 copy_icvs( &team->t.t_initial_icvs, new_icvs );
04905
04906
04907
04908
04909
04910
04911
04912 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
04913 copy_icvs( &team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs );
04914 KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
04915 0, team->t.t_threads[0], team ) );
04916
04917 #elif KMP_BARRIER_ICV_PUSH
04918
04919
04920
04921
04922 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
04923 copy_icvs( &team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs );
04924 KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
04925 0, team->t.t_threads[0], team ) );
04926
04927 #else
04928
04929
04930
04931 for( f=0 ; f<new_nproc ; f++) {
04932 # if OMP_30_ENABLED
04933
04934 KF_TRACE( 10, ( "__kmp_reinitialize_team1: T#%d this_thread=%p team=%p\n",
04935 f, team->t.t_threads[f], team ) );
04936 __kmp_init_implicit_task( loc, team->t.t_threads[f], team, f, FALSE );
04937 copy_icvs( &team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs );
04938 KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
04939 f, team->t.t_threads[f], team ) );
04940 # else
04941 team -> t.t_set_nproc[f] = new_set_nproc;
04942 team -> t.t_set_dynamic[f] = new_set_dynamic;
04943 team -> t.t_set_nested[f] = new_set_nested;
04944 team -> t.t_set_blocktime[f] = new_set_blocktime;
04945 team -> t.t_set_bt_intervals[f] = new_bt_intervals;
04946 team -> t.t_set_bt_set[f] = new_bt_set;
04947 # endif // OMP_30_ENABLED
04948 }
04949
04950 #endif // KMP_BARRIER_ICV_PUSH || KMP_BARRIER_ICV_PULL
04951
04952 }
04953
04954
04955
04956
04957 static void
04958 __kmp_initialize_team(
04959 kmp_team_t * team,
04960 int new_nproc,
04961 #if OMP_30_ENABLED
04962 kmp_internal_control_t * new_icvs,
04963 ident_t * loc
04964 #else
04965 int new_set_nproc, int new_set_dynamic, int new_set_nested,
04966 int new_set_blocktime, int new_bt_intervals, int new_bt_set
04967 #endif
04968 ) {
04969
04970 KMP_DEBUG_ASSERT( team );
04971 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
04972 KMP_DEBUG_ASSERT( team->t.t_threads );
04973 KMP_MB();
04974
04975 team -> t.t_master_tid = 0;
04976
04977 team -> t.t_serialized = 0;
04978 team -> t.t_nproc = new_nproc;
04979
04980
04981 team -> t.t_next_pool = NULL;
04982
04983
04984 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
04985 team -> t.t_invoke = NULL;
04986
04987 #if OMP_30_ENABLED
04988
04989 team -> t.t_sched = new_icvs->sched;
04990 #endif // OMP_30_ENABLED
04991
04992 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
04993 team -> t.t_fp_control_saved = FALSE;
04994 team -> t.t_x87_fpu_control_word = 0;
04995 team -> t.t_mxcsr = 0;
04996 #endif
04997
04998 team -> t.t_construct = 0;
04999 __kmp_init_lock( & team -> t.t_single_lock );
05000
05001 team -> t.t_ordered .dt.t_value = 0;
05002 team -> t.t_master_active = FALSE;
05003
05004 memset( & team -> t.t_taskq, '\0', sizeof( kmp_taskq_t ));
05005
05006 #ifdef KMP_DEBUG
05007 team -> t.t_copypriv_data = NULL;
05008 #endif
05009 team -> t.t_copyin_counter = 0;
05010
05011 team -> t.t_control_stack_top = NULL;
05012
05013 __kmp_reinitialize_team(
05014 team, new_nproc,
05015 #if OMP_30_ENABLED
05016 new_icvs,
05017 loc
05018 #else
05019 new_set_nproc, new_set_dynamic, new_set_nested,
05020 new_set_blocktime, new_bt_intervals, new_bt_set
05021 #endif
05022 );
05023
05024 KMP_MB();
05025 }
05026
05027 #if KMP_OS_LINUX
05028
05029 static void
05030 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
05031 {
05032 if ( KMP_AFFINITY_CAPABLE() ) {
05033 int status;
05034 if ( old_mask != NULL ) {
05035 status = __kmp_get_system_affinity( old_mask, TRUE );
05036 int error = errno;
05037 if ( status != 0 ) {
05038 __kmp_msg(
05039 kmp_ms_fatal,
05040 KMP_MSG( ChangeThreadAffMaskError ),
05041 KMP_ERR( error ),
05042 __kmp_msg_null
05043 );
05044 }
05045 }
05046 __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE );
05047 }
05048 }
05049 #endif
05050
05051 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
05052
05053
05054
05055
05056
05057
05058
05059 static void
05060 __kmp_partition_places( kmp_team_t *team )
05061 {
05062
05063
05064
05065 kmp_info_t *master_th = team->t.t_threads[0];
05066 KMP_DEBUG_ASSERT( master_th != NULL );
05067 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
05068 int first_place = master_th->th.th_first_place;
05069 int last_place = master_th->th.th_last_place;
05070 int masters_place = master_th->th.th_current_place;
05071 team->t.t_first_place = first_place;
05072 team->t.t_last_place = last_place;
05073
05074 KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
05075 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
05076 masters_place, first_place, last_place ) );
05077
05078 switch ( proc_bind ) {
05079
05080 case proc_bind_default:
05081
05082
05083
05084
05085
05086 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
05087 break;
05088
05089 case proc_bind_master:
05090 {
05091 int f;
05092 int n_th = team->t.t_nproc;
05093 for ( f = 1; f < n_th; f++ ) {
05094 kmp_info_t *th = team->t.t_threads[f];
05095 KMP_DEBUG_ASSERT( th != NULL );
05096 th->th.th_first_place = first_place;
05097 th->th.th_last_place = last_place;
05098 th->th.th_new_place = masters_place;
05099
05100 KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
05101 __kmp_gtid_from_thread( team->t.t_threads[f] ),
05102 team->t.t_id, f, masters_place, first_place, last_place ) );
05103 }
05104 }
05105 break;
05106
05107 case proc_bind_close:
05108 {
05109 int f;
05110 int n_th = team->t.t_nproc;
05111 int n_places;
05112 if ( first_place <= last_place ) {
05113 n_places = last_place - first_place + 1;
05114 }
05115 else {
05116 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
05117 }
05118 if ( n_th <= n_places ) {
05119 int place = masters_place;
05120 for ( f = 1; f < n_th; f++ ) {
05121 kmp_info_t *th = team->t.t_threads[f];
05122 KMP_DEBUG_ASSERT( th != NULL );
05123
05124 if ( place == last_place ) {
05125 place = first_place;
05126 }
05127 else if ( place == __kmp_affinity_num_masks - 1) {
05128 place = 0;
05129 }
05130 else {
05131 place++;
05132 }
05133 th->th.th_first_place = first_place;
05134 th->th.th_last_place = last_place;
05135 th->th.th_new_place = place;
05136
05137 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
05138 __kmp_gtid_from_thread( team->t.t_threads[f] ),
05139 team->t.t_id, f, place, first_place, last_place ) );
05140 }
05141 }
05142 else {
05143 int S, rem, gap, s_count;
05144 S = n_th / n_places;
05145 s_count = 0;
05146 rem = n_th - ( S * n_places );
05147 gap = rem > 0 ? n_places/rem : n_places;
05148 int place = masters_place;
05149 int gap_ct = gap;
05150 for ( f = 0; f < n_th; f++ ) {
05151 kmp_info_t *th = team->t.t_threads[f];
05152 KMP_DEBUG_ASSERT( th != NULL );
05153
05154 th->th.th_first_place = first_place;
05155 th->th.th_last_place = last_place;
05156 th->th.th_new_place = place;
05157 s_count++;
05158
05159 if ( (s_count == S) && rem && (gap_ct == gap) ) {
05160
05161 }
05162 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
05163
05164 if ( place == last_place ) {
05165 place = first_place;
05166 }
05167 else if ( place == __kmp_affinity_num_masks - 1) {
05168 place = 0;
05169 }
05170 else {
05171 place++;
05172 }
05173 s_count = 0;
05174 gap_ct = 1;
05175 rem--;
05176 }
05177 else if (s_count == S) {
05178 if ( place == last_place ) {
05179 place = first_place;
05180 }
05181 else if ( place == __kmp_affinity_num_masks - 1) {
05182 place = 0;
05183 }
05184 else {
05185 place++;
05186 }
05187 gap_ct++;
05188 s_count = 0;
05189 }
05190
05191 KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
05192 __kmp_gtid_from_thread( team->t.t_threads[f] ),
05193 team->t.t_id, f, th->th.th_new_place, first_place,
05194 last_place ) );
05195 }
05196 KMP_DEBUG_ASSERT( place == masters_place );
05197 }
05198 }
05199 break;
05200
05201 case proc_bind_spread:
05202 {
05203 int f;
05204 int n_th = team->t.t_nproc;
05205 int n_places;
05206 if ( first_place <= last_place ) {
05207 n_places = last_place - first_place + 1;
05208 }
05209 else {
05210 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
05211 }
05212 if ( n_th <= n_places ) {
05213 int place = masters_place;
05214 int S = n_places/n_th;
05215 int s_count, rem, gap, gap_ct;
05216 rem = n_places - n_th*S;
05217 gap = rem ? n_th/rem : 1;
05218 gap_ct = gap;
05219 for ( f = 0; f < n_th; f++ ) {
05220 kmp_info_t *th = team->t.t_threads[f];
05221 KMP_DEBUG_ASSERT( th != NULL );
05222
05223 th->th.th_first_place = place;
05224 th->th.th_new_place = place;
05225 s_count = 1;
05226 while (s_count < S) {
05227 if ( place == last_place ) {
05228 place = first_place;
05229 }
05230 else if ( place == __kmp_affinity_num_masks - 1) {
05231 place = 0;
05232 }
05233 else {
05234 place++;
05235 }
05236 s_count++;
05237 }
05238 if (rem && (gap_ct == gap)) {
05239 if ( place == last_place ) {
05240 place = first_place;
05241 }
05242 else if ( place == __kmp_affinity_num_masks - 1) {
05243 place = 0;
05244 }
05245 else {
05246 place++;
05247 }
05248 rem--;
05249 gap_ct = 0;
05250 }
05251 th->th.th_last_place = place;
05252 gap_ct++;
05253
05254 if ( place == last_place ) {
05255 place = first_place;
05256 }
05257 else if ( place == __kmp_affinity_num_masks - 1) {
05258 place = 0;
05259 }
05260 else {
05261 place++;
05262 }
05263
05264 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
05265 __kmp_gtid_from_thread( team->t.t_threads[f] ),
05266 team->t.t_id, f, th->th.th_new_place,
05267 th->th.th_first_place, th->th.th_last_place ) );
05268 }
05269 KMP_DEBUG_ASSERT( place == masters_place );
05270 }
05271 else {
05272 int S, rem, gap, s_count;
05273 S = n_th / n_places;
05274 s_count = 0;
05275 rem = n_th - ( S * n_places );
05276 gap = rem > 0 ? n_places/rem : n_places;
05277 int place = masters_place;
05278 int gap_ct = gap;
05279 for ( f = 0; f < n_th; f++ ) {
05280 kmp_info_t *th = team->t.t_threads[f];
05281 KMP_DEBUG_ASSERT( th != NULL );
05282
05283 th->th.th_first_place = place;
05284 th->th.th_last_place = place;
05285 th->th.th_new_place = place;
05286 s_count++;
05287
05288 if ( (s_count == S) && rem && (gap_ct == gap) ) {
05289
05290 }
05291 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
05292
05293 if ( place == last_place ) {
05294 place = first_place;
05295 }
05296 else if ( place == __kmp_affinity_num_masks - 1) {
05297 place = 0;
05298 }
05299 else {
05300 place++;
05301 }
05302 s_count = 0;
05303 gap_ct = 1;
05304 rem--;
05305 }
05306 else if (s_count == S) {
05307 if ( place == last_place ) {
05308 place = first_place;
05309 }
05310 else if ( place == __kmp_affinity_num_masks - 1) {
05311 place = 0;
05312 }
05313 else {
05314 place++;
05315 }
05316 gap_ct++;
05317 s_count = 0;
05318 }
05319
05320 KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
05321 __kmp_gtid_from_thread( team->t.t_threads[f] ),
05322 team->t.t_id, f, th->th.th_new_place,
05323 th->th.th_first_place, th->th.th_last_place) );
05324 }
05325 KMP_DEBUG_ASSERT( place == masters_place );
05326 }
05327 }
05328 break;
05329
05330 default:
05331 break;
05332 }
05333
05334 KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
05335 }
05336
05337 #endif
05338
05339
05340 kmp_team_t *
05341 __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
05342 #if OMPT_SUPPORT
05343 ompt_parallel_id_t ompt_parallel_id,
05344 #endif
05345 #if OMP_40_ENABLED
05346 kmp_proc_bind_t new_proc_bind,
05347 #endif
05348 #if OMP_30_ENABLED
05349 kmp_internal_control_t *new_icvs,
05350 #else
05351 int new_set_nproc, int new_set_dynamic, int new_set_nested,
05352 int new_set_blocktime, int new_bt_intervals, int new_bt_set,
05353 #endif
05354 int argc )
05355 {
05356 int f;
05357 kmp_team_t *team;
05358 char *ptr;
05359 size_t size;
05360
05361 KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
05362 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
05363 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
05364 KMP_MB();
05365
05366
05367
05368
05369
05370 if ( ! root->r.r_active && new_nproc > 1 ) {
05371
05372 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
05373
05374 team = root -> r.r_hot_team;
05375
05376 #if OMP_30_ENABLED && KMP_DEBUG
05377 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
05378 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team = %p before reinit\n",
05379 team -> t.t_task_team ));
05380 }
05381 #endif
05382
05383
05384 if( team -> t.t_nproc > new_nproc ) {
05385 KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
05386
05387 #if KMP_MIC
05388 team -> t.t_size_changed = 1;
05389 #endif
05390 #if OMP_30_ENABLED
05391 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
05392 kmp_task_team_t *task_team = team->t.t_task_team;
05393 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
05394
05395
05396
05397
05398
05399
05400 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
05401 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
05402 KMP_MB();
05403
05404 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
05405 &team->t.t_task_team ) );
05406 team->t.t_task_team = NULL;
05407 }
05408 else {
05409 KMP_DEBUG_ASSERT( task_team == NULL );
05410 }
05411 }
05412 #endif // OMP_30_ENABLED
05413
05414
05415 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
05416 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
05417 __kmp_free_thread( team->t.t_threads[ f ] );
05418 team -> t.t_threads[ f ] = NULL;
05419 }
05420
05421 team -> t.t_nproc = new_nproc;
05422 #if OMP_30_ENABLED
05423
05424 team -> t.t_sched = new_icvs->sched;
05425 #endif
05426 __kmp_reinitialize_team( team, new_nproc,
05427 #if OMP_30_ENABLED
05428 new_icvs,
05429 root->r.r_uber_thread->th.th_ident
05430 #else
05431 new_set_nproc, new_set_dynamic, new_set_nested,
05432 new_set_blocktime, new_bt_intervals, new_bt_set
05433 #endif
05434 );
05435
05436 #if OMP_30_ENABLED
05437 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
05438 kmp_task_team_t *task_team = team->t.t_task_team;
05439 if ( task_team != NULL ) {
05440 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
05441 task_team->tt.tt_nproc = new_nproc;
05442 task_team->tt.tt_unfinished_threads = new_nproc;
05443 task_team->tt.tt_ref_ct = new_nproc - 1;
05444 }
05445 }
05446 #endif
05447
05448
05449 for( f = 0 ; f < new_nproc ; f++ ) {
05450 team -> t.t_threads[ f ] -> th.th_team_nproc = team->t.t_nproc;
05451 }
05452
05453 #if OMP_30_ENABLED
05454
05455 KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
05456 0, team->t.t_threads[0], team ) );
05457
05458 __kmp_push_current_task_to_thread( team -> t.t_threads[ 0 ], team, 0 );
05459 #endif
05460
05461 #ifdef KMP_DEBUG
05462 for ( f = 0; f < team->t.t_nproc; f++ ) {
05463 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
05464 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
05465 }
05466 #endif
05467
05468 #if OMP_40_ENABLED
05469 team->t.t_proc_bind = new_proc_bind;
05470 # if KMP_OS_WINDOWS || KMP_OS_LINUX
05471 __kmp_partition_places( team );
05472 # endif
05473 #endif
05474
05475 }
05476 else if ( team -> t.t_nproc < new_nproc ) {
05477 #if KMP_OS_LINUX
05478 kmp_affin_mask_t *old_mask;
05479 if ( KMP_AFFINITY_CAPABLE() ) {
05480 KMP_CPU_ALLOC(old_mask);
05481 }
05482 #endif
05483
05484 KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
05485
05486 #if KMP_MIC
05487 team -> t.t_size_changed = 1;
05488 #endif
05489
05490
05491 if(team -> t.t_max_nproc < new_nproc) {
05492
05493 __kmp_reallocate_team_arrays(team, new_nproc);
05494 __kmp_reinitialize_team( team, new_nproc,
05495 #if OMP_30_ENABLED
05496 new_icvs,
05497 NULL
05498 #else
05499 new_set_nproc, new_set_dynamic, new_set_nested,
05500 new_set_blocktime, new_bt_intervals, new_bt_set
05501 #endif
05502 );
05503 }
05504
05505 #if KMP_OS_LINUX
05506
05507
05508
05509
05510
05511
05512 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
05513 #endif
05514
05515
05516 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
05517 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
05518 KMP_DEBUG_ASSERT( new_worker );
05519 team->t.t_threads[ f ] = new_worker;
05520 new_worker->th.th_team_nproc = team->t.t_nproc;
05521
05522 KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%u, plain=%u\n",
05523 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
05524 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
05525 team->t.t_bar[bs_plain_barrier].b_arrived ) );
05526
05527 {
05528 int b;
05529 kmp_balign_t * balign = new_worker->th.th_bar;
05530 for ( b = 0; b < bp_last_bar; ++ b ) {
05531 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
05532 }
05533 }
05534 }
05535
05536 #if KMP_OS_LINUX
05537 if ( KMP_AFFINITY_CAPABLE() ) {
05538
05539 __kmp_set_system_affinity( old_mask, TRUE );
05540 KMP_CPU_FREE(old_mask);
05541 }
05542 #endif
05543
05544
05545 __kmp_initialize_team( team, new_nproc,
05546 #if OMP_30_ENABLED
05547 new_icvs,
05548 root->r.r_uber_thread->th.th_ident
05549 #else
05550 new_set_nproc, new_set_dynamic, new_set_nested,
05551 new_set_blocktime, new_bt_intervals, new_bt_set
05552 #endif
05553 );
05554
05555 #if OMP_30_ENABLED
05556 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
05557 kmp_task_team_t *task_team = team->t.t_task_team;
05558 if ( task_team != NULL ) {
05559 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
05560 task_team->tt.tt_nproc = new_nproc;
05561 task_team->tt.tt_unfinished_threads = new_nproc;
05562 task_team->tt.tt_ref_ct = new_nproc - 1;
05563 }
05564 }
05565 #endif
05566
05567
05568 for( f = 0 ; f < team->t.t_nproc ; f++ )
05569 __kmp_initialize_info( team->t.t_threads[ f ], team, f,
05570 __kmp_gtid_from_tid( f, team ) );
05571 #ifdef KMP_DEBUG
05572 for ( f = 0; f < team->t.t_nproc; ++ f ) {
05573 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
05574 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
05575 }
05576 #endif
05577
05578 #if OMP_40_ENABLED
05579 team->t.t_proc_bind = new_proc_bind;
05580 # if KMP_OS_WINDOWS || KMP_OS_LINUX
05581 __kmp_partition_places( team );
05582 # endif
05583 #endif
05584
05585 }
05586 else {
05587 KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
05588 #if KMP_MIC
05589
05590
05591 if ( team -> t.t_size_changed == -1 ) {
05592 team -> t.t_size_changed = 1;
05593 } else {
05594 team -> t.t_size_changed = 0;
05595 }
05596 #endif
05597
05598 #if OMP_30_ENABLED
05599
05600 team -> t.t_sched = new_icvs->sched;
05601 #endif
05602
05603 __kmp_reinitialize_team( team, new_nproc,
05604 #if OMP_30_ENABLED
05605 new_icvs,
05606 root->r.r_uber_thread->th.th_ident
05607 #else
05608 new_set_nproc, new_set_dynamic, new_set_nested,
05609 new_set_blocktime, new_bt_intervals, new_bt_set
05610 #endif
05611 );
05612
05613 #if OMP_30_ENABLED
05614 KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
05615 0, team->t.t_threads[0], team ) );
05616 __kmp_push_current_task_to_thread( team -> t.t_threads[ 0 ], team, 0 );
05617 #endif
05618
05619 #if OMP_40_ENABLED
05620 # if (KMP_OS_WINDOWS || KMP_OS_LINUX)
05621 if ( team->t.t_proc_bind == new_proc_bind ) {
05622 KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
05623 team->t.t_id, new_proc_bind, team->t.t_first_place,
05624 team->t.t_last_place ) );
05625 }
05626 else {
05627 team->t.t_proc_bind = new_proc_bind;
05628 __kmp_partition_places( team );
05629 }
05630 # else
05631 if ( team->t.t_proc_bind != new_proc_bind ) {
05632 team->t.t_proc_bind = new_proc_bind;
05633 }
05634 # endif
05635 #endif
05636 }
05637
05638
05639 __kmp_alloc_argv_entries( argc, team, TRUE );
05640 team -> t.t_argc = argc;
05641
05642
05643
05644
05645
05646 KF_TRACE( 10, ( " hot_team = %p\n", team ) );
05647
05648 #if OMP_30_ENABLED && KMP_DEBUG
05649 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
05650 KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team = %p after reinit\n",
05651 team -> t.t_task_team ));
05652 }
05653 #endif
05654
05655 #if OMPT_SUPPORT
05656 __ompt_team_assign_id(team, ompt_parallel_id);
05657 #endif
05658
05659 KMP_MB();
05660
05661 return team;
05662 }
05663
05664
05665 KMP_MB();
05666 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
05667 {
05668
05669 if ( team->t.t_max_nproc >= max_nproc ) {
05670
05671 __kmp_team_pool = team->t.t_next_pool;
05672
05673
05674 __kmp_initialize_team( team, new_nproc,
05675 #if OMP_30_ENABLED
05676 new_icvs,
05677 NULL
05678 #else
05679 new_set_nproc, new_set_dynamic, new_set_nested,
05680 new_set_blocktime, new_bt_intervals, new_bt_set
05681 #endif
05682 );
05683
05684 #if OMP_30_ENABLED
05685 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
05686 &team->t.t_task_team ) );
05687 team -> t.t_task_team = NULL;
05688 #endif
05689
05690
05691 __kmp_alloc_argv_entries( argc, team, TRUE );
05692 team -> t.t_argc = argc;
05693
05694 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
05695 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
05696 {
05697 int b;
05698 for ( b = 0; b < bs_last_barrier; ++ b) {
05699 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
05700 }
05701 }
05702
05703 #if OMP_40_ENABLED
05704 team->t.t_proc_bind = new_proc_bind;
05705 #endif
05706
05707 KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
05708
05709 #if OMPT_SUPPORT
05710 __ompt_team_assign_id(team, ompt_parallel_id);
05711 #endif
05712
05713 KMP_MB();
05714
05715 return team;
05716 }
05717
05718
05719
05720
05721 team = __kmp_reap_team( team );
05722 __kmp_team_pool = team;
05723 }
05724
05725
05726 KMP_MB();
05727 team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) );
05728
05729
05730 team -> t.t_max_nproc = max_nproc;
05731
05732
05733
05734 __kmp_allocate_team_arrays( team, max_nproc );
05735 __kmp_initialize_team( team, new_nproc,
05736 #if OMP_30_ENABLED
05737 new_icvs,
05738 NULL
05739 #else
05740 new_set_nproc, new_set_dynamic, new_set_nested,
05741 new_set_blocktime, new_bt_intervals, new_bt_set
05742 #endif
05743 );
05744
05745 #if OMP_30_ENABLED
05746 KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
05747 &team->t.t_task_team ) );
05748 team -> t.t_task_team = NULL;
05749 #endif
05750
05751 if ( __kmp_storage_map ) {
05752 __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
05753 }
05754
05755
05756 __kmp_alloc_argv_entries( argc, team, FALSE );
05757 team -> t.t_argc = argc;
05758
05759 KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
05760 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
05761 {
05762 int b;
05763 for ( b = 0; b < bs_last_barrier; ++ b ) {
05764 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
05765 }
05766 }
05767
05768 #if OMP_40_ENABLED
05769 team->t.t_proc_bind = new_proc_bind;
05770 #endif
05771
05772 #if OMPT_SUPPORT
05773 __ompt_team_assign_id(team, ompt_parallel_id);
05774 #endif
05775
05776 KMP_MB();
05777
05778 KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
05779
05780 return team;
05781 }
05782
05783
05784
05785
05786
05787
05788 void
05789 __kmp_free_team( kmp_root_t *root, kmp_team_t *team )
05790 {
05791 int f;
05792 KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
05793
05794
05795 KMP_DEBUG_ASSERT( root );
05796 KMP_DEBUG_ASSERT( team );
05797 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
05798 KMP_DEBUG_ASSERT( team->t.t_threads );
05799
05800
05801 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
05802 team -> t.t_copyin_counter = 0;
05803
05804
05805
05806 if( team != root->r.r_hot_team ) {
05807
05808 #if OMP_30_ENABLED
05809 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
05810 kmp_task_team_t *task_team = team->t.t_task_team;
05811 if ( task_team != NULL ) {
05812
05813
05814
05815
05816
05817
05818 KA_TRACE( 20, ( "__kmp_free_team: deactivating task_team %p\n",
05819 task_team ) );
05820 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
05821 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
05822 KMP_MB();
05823 team->t.t_task_team = NULL;
05824 }
05825 }
05826 #endif
05827
05828
05829 team -> t.t_parent = NULL;
05830
05831
05832
05833 for ( f = 1; f < team->t.t_nproc; ++ f ) {
05834 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
05835 __kmp_free_thread( team->t.t_threads[ f ] );
05836 team->t.t_threads[ f ] = NULL;
05837 }
05838
05839
05840
05841
05842 team -> t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
05843 __kmp_team_pool = (volatile kmp_team_t*) team;
05844 }
05845
05846 KMP_MB();
05847 }
05848
05849
05850
05851 kmp_team_t *
05852 __kmp_reap_team( kmp_team_t *team )
05853 {
05854 kmp_team_t *next_pool = team -> t.t_next_pool;
05855
05856 KMP_DEBUG_ASSERT( team );
05857 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
05858 KMP_DEBUG_ASSERT( team -> t.t_disp_buffer );
05859 KMP_DEBUG_ASSERT( team -> t.t_threads );
05860 #if OMP_30_ENABLED
05861 #else
05862 KMP_DEBUG_ASSERT( team -> t.t_set_nproc );
05863 #endif
05864 KMP_DEBUG_ASSERT( team -> t.t_argv );
05865
05866
05867
05868
05869
05870 __kmp_free_team_arrays( team );
05871 #if (KMP_PERF_V106 == KMP_ON)
05872 if ( team -> t.t_argv != &team -> t.t_inline_argv[0] )
05873 __kmp_free( (void*) team -> t.t_argv );
05874 #else
05875 __kmp_free( (void*) team -> t.t_argv );
05876 #endif
05877 __kmp_free( team );
05878
05879 KMP_MB();
05880 return next_pool;
05881 }
05882
05883
05884
05885
05886
05887
05888
05889
05890
05891
05892
05893
05894
05895
05896
05897
05898
05899
05900
05901
05902
05903
05904
05905
05906
05907
05908
05909
05910 void
05911 __kmp_free_thread( kmp_info_t *this_th )
05912 {
05913 int gtid;
05914 kmp_info_t **scan;
05915
05916 KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
05917 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
05918
05919 KMP_DEBUG_ASSERT( this_th );
05920
05921
05922
05923 TCW_PTR(this_th->th.th_team, NULL);
05924 TCW_PTR(this_th->th.th_root, NULL);
05925 TCW_PTR(this_th->th.th_dispatch, NULL);
05926
05927
05928
05929
05930
05931 gtid = this_th->th.th_info.ds.ds_gtid;
05932 if ( __kmp_thread_pool_insert_pt != NULL ) {
05933 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
05934 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
05935 __kmp_thread_pool_insert_pt = NULL;
05936 }
05937 }
05938
05939
05940
05941
05942
05943
05944
05945
05946 if ( __kmp_thread_pool_insert_pt != NULL ) {
05947 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
05948 }
05949 else {
05950 scan = (kmp_info_t **)&__kmp_thread_pool;
05951 }
05952 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
05953 scan = &( (*scan)->th.th_next_pool ) );
05954
05955
05956
05957
05958
05959 TCW_PTR(this_th->th.th_next_pool, *scan);
05960 __kmp_thread_pool_insert_pt = *scan = this_th;
05961 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
05962 || ( this_th->th.th_info.ds.ds_gtid
05963 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
05964 TCW_4(this_th->th.th_in_pool, TRUE);
05965 __kmp_thread_pool_nth++;
05966
05967 TCW_4(__kmp_nth, __kmp_nth - 1);
05968
05969 #ifdef KMP_ADJUST_BLOCKTIME
05970
05971
05972 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
05973 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
05974 if ( __kmp_nth <= __kmp_avail_proc ) {
05975 __kmp_zero_bt = FALSE;
05976 }
05977 }
05978 #endif
05979
05980 KMP_MB();
05981 }
05982
05983 void
05984 __kmp_join_barrier( int gtid )
05985 {
05986 register kmp_info_t *this_thr = __kmp_threads[ gtid ];
05987 register kmp_team_t *team;
05988 register kmp_uint count;
05989 register kmp_uint nproc;
05990 kmp_info_t *master_thread;
05991 int tid;
05992 #ifdef KMP_DEBUG
05993 int team_id;
05994 #endif
05995
05996 KMP_MB();
05997
05998
05999 team = this_thr -> th.th_team;
06000
06001 nproc = this_thr -> th.th_team_nproc;
06002 KMP_DEBUG_ASSERT( nproc == team->t.t_nproc );
06003 tid = __kmp_tid_from_gtid(gtid);
06004 #ifdef KMP_DEBUG
06005 team_id = team -> t.t_id;
06006 #endif
06007
06008 master_thread = this_thr -> th.th_team_master;
06009 #ifdef KMP_DEBUG
06010 if ( master_thread != team->t.t_threads[0] ) {
06011 __kmp_print_structure();
06012 }
06013 #endif
06014 KMP_DEBUG_ASSERT( master_thread == team->t.t_threads[0] );
06015 KMP_MB();
06016
06017
06018 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
06019 KMP_DEBUG_ASSERT( TCR_PTR(this_thr->th.th_team) );
06020 KMP_DEBUG_ASSERT( TCR_PTR(this_thr->th.th_root) );
06021 KMP_DEBUG_ASSERT( this_thr == team -> t.t_threads[tid] );
06022
06023 KA_TRACE( 10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
06024 gtid, team_id, tid ));
06025 #if OMPT_SUPPORT
06026 if ((ompt_status == ompt_status_track_callback) &&
06027 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
06028 int tid = __kmp_tid_from_gtid( gtid );
06029 ompt_callbacks.ompt_callback(ompt_event_barrier_begin)
06030 (team->t.ompt_team_info.parallel_id,
06031 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
06032 }
06033 this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
06034 #endif
06035
06036 #if OMP_30_ENABLED
06037 if ( __kmp_tasking_mode == tskm_extra_barrier ) {
06038 __kmp_tasking_barrier( team, this_thr, gtid );
06039
06040 KA_TRACE( 10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n",
06041 gtid, team_id, tid ));
06042 }
06043 #ifdef KMP_DEBUG
06044 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
06045 KA_TRACE( 20, ( "__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n",
06046 __kmp_gtid_from_thread( this_thr ), team_id, team -> t.t_task_team,
06047 this_thr->th.th_task_team ) );
06048 KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
06049 }
06050 #endif
06051 #endif
06052
06053
06054
06055
06056
06057
06058
06059
06060
06061 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
06062 #if OMP_30_ENABLED
06063 this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
06064 this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
06065 #else
06066 this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
06067 this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
06068 #endif // OMP_30_ENABLED
06069 }
06070
06071 #if KMP_OS_WINDOWS
06072
06073
06074
06075
06076
06077
06078
06079
06080
06081
06082
06083 if( KMP_MASTER_TID( tid ) && TCR_4(__kmp_init_monitor) < 2 ) {
06084 __kmp_wait_sleep( this_thr, (volatile kmp_uint32*)&__kmp_init_monitor, 2, 0
06085 );
06086 }
06087 #endif
06088
06089
06090 if ( __kmp_barrier_gather_pattern[ bs_forkjoin_barrier ] == bp_linear_bar || __kmp_barrier_gather_branch_bits[ bs_forkjoin_barrier ] == 0 ) {
06091 __kmp_linear_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
06092 );
06093 } else if ( __kmp_barrier_gather_pattern[ bs_forkjoin_barrier ] == bp_tree_bar ) {
06094 __kmp_tree_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
06095 );
06096 } else {
06097 __kmp_hyper_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
06098 );
06099 };
06100
06101
06102
06103
06104
06105
06106
06107
06108
06109
06110
06111 #if OMP_30_ENABLED
06112 if ( KMP_MASTER_TID( tid ) ) {
06113 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
06114
06115
06116 __kmp_task_team_wait( this_thr, team
06117 );
06118 }
06119 }
06120 #endif
06121
06122 #if KMP_DEBUG
06123 if( KMP_MASTER_TID( tid )) {
06124 KA_TRACE( 15, ( "__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
06125 gtid, team_id, tid, nproc ));
06126 }
06127 #endif
06128
06129
06130
06131 KMP_MB();
06132 KA_TRACE( 10, ("__kmp_join_barrier: T#%d(%d:%d) leaving\n",
06133 gtid, team_id, tid ));
06134 #if OMPT_SUPPORT
06135 if ((ompt_status == ompt_status_track_callback) &&
06136 ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
06137 int tid = __kmp_tid_from_gtid( gtid );
06138 ompt_callbacks.ompt_callback(ompt_event_barrier_end)
06139 (team->t.ompt_team_info.parallel_id,
06140 team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
06141 }
06142
06143 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
06144 #endif
06145
06146 }
06147
06148
06149
06150
06151 void
06152 __kmp_fork_barrier( int gtid, int tid )
06153 {
06154 kmp_info_t *this_thr = __kmp_threads[ gtid ];
06155 kmp_team_t *team = ( tid == 0 ) ? this_thr -> th.th_team : NULL;
06156
06157 KA_TRACE( 10, ( "__kmp_fork_barrier: T#%d(%d:%d) has arrived\n",
06158 gtid, ( team != NULL ) ? team->t.t_id : -1, tid ));
06159
06160
06161 if ( KMP_MASTER_TID( tid ) ) {
06162
06163
06164 #ifdef KMP_DEBUG
06165
06166 register kmp_info_t **other_threads = team -> t.t_threads;
06167 register int i;
06168
06169
06170 KMP_MB();
06171
06172 for( i = 1; i < team -> t.t_nproc ; i++ ) {
06173 KA_TRACE( 500, ( "__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork "
06174 "go == %u.\n",
06175 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
06176 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
06177 other_threads[i]->th.th_bar[ bs_forkjoin_barrier ].bb.b_go ) );
06178
06179 KMP_DEBUG_ASSERT( ( TCR_4( other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go )
06180 & ~(KMP_BARRIER_SLEEP_STATE) )
06181 == KMP_INIT_BARRIER_STATE );
06182 KMP_DEBUG_ASSERT( other_threads[i]->th.th_team == team );
06183
06184 }
06185 #endif
06186
06187 #if OMP_30_ENABLED
06188 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
06189 __kmp_task_team_setup( this_thr, team );
06190 }
06191 #endif
06192
06193
06194
06195
06196
06197
06198
06199
06200
06201
06202
06203 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
06204 #if OMP_30_ENABLED
06205 this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
06206 this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
06207 #else
06208 this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
06209 this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
06210 #endif // OMP_30_ENABLED
06211 }
06212 }
06213
06214 if ( __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] == 0 ) {
06215 __kmp_linear_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
06216 );
06217 } else if ( __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] == bp_tree_bar ) {
06218 __kmp_tree_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
06219 );
06220 } else {
06221 __kmp_hyper_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
06222 );
06223 };
06224
06225
06226
06227
06228 if ( TCR_4(__kmp_global.g.g_done) ) {
06229
06230 #if OMP_30_ENABLED
06231 if ( this_thr->th.th_task_team != NULL ) {
06232 if ( KMP_MASTER_TID( tid ) ) {
06233 TCW_PTR(this_thr->th.th_task_team, NULL);
06234 }
06235 else {
06236 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
06237 }
06238 }
06239 #endif
06240
06241 KA_TRACE( 10, ( "__kmp_fork_barrier: T#%d is leaving early\n", gtid ));
06242 return;
06243 }
06244
06245
06246
06247
06248
06249
06250
06251
06252 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
06253 KMP_DEBUG_ASSERT( team != NULL );
06254 tid = __kmp_tid_from_gtid( gtid );
06255
06256 #if OMP_30_ENABLED
06257
06258 # if KMP_BARRIER_ICV_PULL
06259
06260
06261
06262
06263
06264 if (! KMP_MASTER_TID( tid ) ) {
06265
06266
06267
06268
06269 __kmp_init_implicit_task( team->t.t_ident, team->t.t_threads[tid],
06270 team, tid, FALSE );
06271 copy_icvs( &team->t.t_implicit_task_taskdata[tid].td_icvs,
06272 &team->t.t_initial_icvs );
06273 }
06274 # endif // KMP_BARRIER_ICV_PULL
06275
06276 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
06277 __kmp_task_team_sync( this_thr, team );
06278 }
06279
06280 #endif
06281
06282 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
06283 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
06284 if ( proc_bind == proc_bind_intel ) {
06285 #endif
06286 #if KMP_MIC
06287
06288
06289
06290 if( __kmp_affinity_type == affinity_balanced && team->t.t_size_changed ) {
06291 __kmp_balanced_affinity( tid, team->t.t_nproc );
06292 }
06293 #endif
06294 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
06295 }
06296 else if ( ( proc_bind != proc_bind_false )
06297 && ( proc_bind != proc_bind_disabled )) {
06298 if ( this_thr->th.th_new_place == this_thr->th.th_current_place ) {
06299 KA_TRACE( 100, ( "__kmp_fork_barrier: T#%d already in correct place %d\n",
06300 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_current_place ) );
06301 }
06302 else {
06303 __kmp_affinity_set_place( gtid );
06304 }
06305 }
06306 #endif
06307
06308 KA_TRACE( 10, ( "__kmp_fork_barrier: T#%d(%d:%d) is leaving\n",
06309 gtid, team->t.t_id, tid ));
06310 }
06311
06312
06313
06314
06315
06316 void *
06317 __kmp_launch_thread( kmp_info_t *this_thr )
06318 {
06319 int gtid = this_thr->th.th_info.ds.ds_gtid;
06320
06321 kmp_team_t *(*volatile pteam);
06322
06323 KMP_MB();
06324 KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) );
06325
06326 if( __kmp_env_consistency_check ) {
06327 this_thr -> th.th_cons = __kmp_allocate_cons_stack( gtid );
06328 }
06329
06330 #if OMPT_SUPPORT
06331 if (ompt_status & ompt_status_track) {
06332 this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
06333 if ((ompt_status == ompt_status_track_callback) &&
06334 ompt_callbacks.ompt_callback(ompt_event_thread_create)) {
06335 ompt_callbacks.ompt_callback(ompt_event_thread_create)();
06336 }
06337 }
06338 #endif
06339
06340
06341 while( ! TCR_4(__kmp_global.g.g_done) ) {
06342 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
06343 KMP_MB();
06344
06345
06346 KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
06347
06348
06349 #if OMPT_SUPPORT
06350 if (ompt_status & ompt_status_track) {
06351 this_thr->th.ompt_thread_info.state = ompt_state_idle;
06352 if ((ompt_status == ompt_status_track_callback) &&
06353 ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
06354 ompt_callbacks.ompt_callback(ompt_event_idle_begin)();
06355 }
06356 }
06357 #endif
06358
06359
06360 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
06361
06362 #if OMPT_SUPPORT
06363 if (ompt_status & ompt_status_track) {
06364 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
06365 if ((ompt_status == ompt_status_track_callback) &&
06366 ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
06367 ompt_callbacks.ompt_callback(ompt_event_idle_end)();
06368 }
06369 }
06370 #endif
06371
06372 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
06373
06374
06375 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
06376
06377 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
06378 int rc;
06379 KA_TRACE( 20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
06380 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn ));
06381
06382 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
06383 if ( __kmp_inherit_fp_control && (*pteam)->t.t_fp_control_saved ) {
06384 __kmp_clear_x87_fpu_status_word();
06385 __kmp_load_x87_fpu_control_word( &(*pteam)->t.t_x87_fpu_control_word );
06386 __kmp_load_mxcsr( &(*pteam)->t.t_mxcsr );
06387 }
06388 #endif
06389
06390 #if OMPT_SUPPORT
06391 if (ompt_status & ompt_status_track) {
06392 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
06393 }
06394 #endif
06395
06396 rc = (*pteam) -> t.t_invoke( gtid );
06397 KMP_ASSERT( rc );
06398
06399 #if OMPT_SUPPORT
06400 if (ompt_status & ompt_status_track) {
06401
06402 int tid = __kmp_tid_from_gtid(gtid);
06403 (*pteam)->t.t_implicit_task_taskdata[tid].
06404 ompt_task_info.frame.exit_runtime_frame = 0;
06405
06406 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
06407 }
06408 #endif
06409 KMP_MB();
06410 KA_TRACE( 20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
06411 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn ));
06412 }
06413
06414
06415 __kmp_join_barrier( gtid );
06416 }
06417 }
06418 TCR_SYNC_PTR(__kmp_global.g.g_done);
06419
06420 #if OMPT_SUPPORT
06421 if ((ompt_status == ompt_status_track_callback) &&
06422 ompt_callbacks.ompt_callback(ompt_event_thread_exit)) {
06423 ompt_callbacks.ompt_callback(ompt_event_thread_exit)();
06424 }
06425 #endif
06426
06427 #if OMP_30_ENABLED
06428 if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) {
06429 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
06430 }
06431 #endif
06432
06433
06434 __kmp_common_destroy_gtid( gtid );
06435
06436 KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) );
06437 KMP_MB();
06438 return this_thr;
06439 }
06440
06441
06442
06443
06444
06445
06446 void
06447 __kmp_internal_end_dest( void *specific_gtid )
06448 {
06449 #ifdef __INTEL_COMPILER
06450 #pragma warning( push )
06451 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
06452 #endif
06453
06454 int gtid = (kmp_intptr_t)specific_gtid - 1;
06455 #ifdef __INTEL_COMPILER
06456 #pragma warning( pop )
06457 #endif
06458
06459 KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid));
06460
06461
06462
06463
06464
06465
06466
06467
06468
06469
06470
06471
06472
06473 if(gtid >= 0 && KMP_UBER_GTID(gtid))
06474 __kmp_gtid_set_specific( gtid );
06475 #ifdef KMP_TDATA_GTID
06476 __kmp_gtid = gtid;
06477 #endif
06478 __kmp_internal_end_thread( gtid );
06479 }
06480
06481 #if KMP_OS_UNIX && GUIDEDLL_EXPORTS
06482
06483
06484
06485
06486
06487 __attribute__(( destructor ))
06488 void
06489 __kmp_internal_end_dtor( void )
06490 {
06491 __kmp_internal_end_atexit();
06492 }
06493
06494 void
06495 __kmp_internal_end_fini( void )
06496 {
06497 __kmp_internal_end_atexit();
06498 }
06499
06500 #endif
06501
06502
06503 void
06504 __kmp_internal_end_atexit( void )
06505 {
06506 KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) );
06507
06508
06509
06510
06511
06512
06513
06514
06515
06516
06517
06518
06519
06520
06521
06522
06523
06524
06525
06526
06527
06528 __kmp_internal_end_library( -1 );
06529 #if KMP_OS_WINDOWS
06530 __kmp_close_console();
06531 #endif
06532 }
06533
06534 static void
06535 __kmp_reap_thread(
06536 kmp_info_t * thread,
06537 int is_root
06538 ) {
06539
06540
06541
06542 int gtid;
06543
06544 KMP_DEBUG_ASSERT( thread != NULL );
06545
06546 gtid = thread->th.th_info.ds.ds_gtid;
06547
06548 if ( ! is_root ) {
06549
06550 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
06551
06552 KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
06553
06554 __kmp_release(
06555 thread,
06556 &thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go,
06557 kmp_release_fence
06558 );
06559 };
06560
06561
06562
06563 __kmp_reap_worker( thread );
06564
06565
06566
06567
06568
06569
06570
06571
06572
06573
06574
06575
06576
06577
06578 if ( thread->th.th_active_in_pool ) {
06579 thread->th.th_active_in_pool = FALSE;
06580 KMP_TEST_THEN_DEC32(
06581 (kmp_int32 *) &__kmp_thread_pool_active_nth );
06582 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
06583 }
06584
06585
06586 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
06587 --__kmp_thread_pool_nth;
06588 };
06589
06590
06591 #if USE_FAST_MEMORY
06592 __kmp_free_fast_memory( thread );
06593 #endif
06594
06595 __kmp_suspend_uninitialize_thread( thread );
06596
06597 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
06598 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
06599
06600 -- __kmp_all_nth;
06601
06602
06603 #ifdef KMP_ADJUST_BLOCKTIME
06604
06605
06606 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
06607 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
06608 if ( __kmp_nth <= __kmp_avail_proc ) {
06609 __kmp_zero_bt = FALSE;
06610 }
06611 }
06612 #endif
06613
06614
06615 if( __kmp_env_consistency_check ) {
06616 if ( thread->th.th_cons ) {
06617 __kmp_free_cons_stack( thread->th.th_cons );
06618 thread->th.th_cons = NULL;
06619 };
06620 }
06621
06622 if ( thread->th.th_pri_common != NULL ) {
06623 __kmp_free( thread->th.th_pri_common );
06624 thread->th.th_pri_common = NULL;
06625 };
06626
06627 #if KMP_USE_BGET
06628 if ( thread->th.th_local.bget_data != NULL ) {
06629 __kmp_finalize_bget( thread );
06630 };
06631 #endif
06632
06633 #if (KMP_OS_WINDOWS || KMP_OS_LINUX)
06634 if ( thread->th.th_affin_mask != NULL ) {
06635 KMP_CPU_FREE( thread->th.th_affin_mask );
06636 thread->th.th_affin_mask = NULL;
06637 };
06638 #endif
06639
06640 __kmp_reap_team( thread->th.th_serial_team );
06641 thread->th.th_serial_team = NULL;
06642 __kmp_free( thread );
06643
06644 KMP_MB();
06645
06646 }
06647
06648 static void
06649 __kmp_internal_end(void)
06650 {
06651 int i;
06652
06653
06654 __kmp_unregister_library();
06655
06656 #if KMP_OS_WINDOWS
06657
06658
06659
06660
06661
06662 __kmp_reclaim_dead_roots();
06663 #endif
06664
06665 for( i=0 ; i<__kmp_threads_capacity ; i++ )
06666 if( __kmp_root[i] )
06667 if( __kmp_root[i] -> r.r_active )
06668 break;
06669 KMP_MB();
06670 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
06671
06672 if ( i < __kmp_threads_capacity ) {
06673
06674 KMP_MB();
06675
06676
06677
06678
06679
06680
06681
06682
06683
06684
06685
06686
06687
06688
06689
06690 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
06691 if ( TCR_4( __kmp_init_monitor ) ) {
06692 __kmp_reap_monitor( & __kmp_monitor );
06693 TCW_4( __kmp_init_monitor, 0 );
06694 }
06695 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
06696 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
06697 } else {
06698
06699 #ifdef KMP_DEBUG
06700
06701 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
06702 if( __kmp_root[i] ) {
06703 KMP_ASSERT( ! KMP_UBER_GTID( i ) );
06704 KMP_ASSERT( ! __kmp_root[i] -> r.r_active );
06705 }
06706 }
06707 #endif
06708
06709 KMP_MB();
06710
06711
06712
06713 while ( __kmp_thread_pool != NULL ) {
06714
06715 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
06716 __kmp_thread_pool = thread->th.th_next_pool;
06717
06718 thread->th.th_next_pool = NULL;
06719 thread->th.th_in_pool = FALSE;
06720 __kmp_reap_thread( thread, 0 );
06721 };
06722 __kmp_thread_pool_insert_pt = NULL;
06723
06724
06725 while ( __kmp_team_pool != NULL ) {
06726
06727 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
06728 __kmp_team_pool = team->t.t_next_pool;
06729
06730 team->t.t_next_pool = NULL;
06731 __kmp_reap_team( team );
06732 };
06733
06734 #if OMP_30_ENABLED
06735 __kmp_reap_task_teams( );
06736 #endif
06737
06738 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
06739
06740
06741 }
06742
06743
06744
06745 TCW_SYNC_4(__kmp_init_common, FALSE);
06746
06747 KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) );
06748 KMP_MB();
06749
06750
06751
06752
06753
06754
06755
06756 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
06757 if ( TCR_4( __kmp_init_monitor ) ) {
06758 __kmp_reap_monitor( & __kmp_monitor );
06759 TCW_4( __kmp_init_monitor, 0 );
06760 }
06761 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
06762 KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
06763
06764 }
06765 TCW_4(__kmp_init_gtid, FALSE);
06766 KMP_MB();
06767
06768
06769 __kmp_cleanup();
06770 #if OMPT_SUPPORT
06771 ompt_fini();
06772 #endif
06773 }
06774
06775 void
06776 __kmp_internal_end_library( int gtid_req )
06777 {
06778 int i;
06779
06780
06781
06782
06783
06784
06785
06786 if( __kmp_global.g.g_abort ) {
06787 KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" ));
06788
06789 return;
06790 }
06791 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
06792 KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" ));
06793 return;
06794 }
06795
06796
06797 KMP_MB();
06798
06799
06800 {
06801 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
06802 KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
06803 if( gtid == KMP_GTID_SHUTDOWN ) {
06804 KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
06805 return;
06806 } else if( gtid == KMP_GTID_MONITOR ) {
06807 KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
06808 return;
06809 } else if( gtid == KMP_GTID_DNE ) {
06810 KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
06811
06812 } else if( KMP_UBER_GTID( gtid )) {
06813
06814 if( __kmp_root[gtid] -> r.r_active ) {
06815 __kmp_global.g.g_abort = -1;
06816 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
06817 KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
06818 return;
06819 } else {
06820 KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
06821 __kmp_unregister_root_current_thread( gtid );
06822 }
06823 } else {
06824
06825
06826
06827
06828 #ifdef DUMP_DEBUG_ON_EXIT
06829 if ( __kmp_debug_buf )
06830 __kmp_dump_debug_buffer( );
06831 #endif
06832 return;
06833 }
06834 }
06835
06836 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
06837
06838
06839 if( __kmp_global.g.g_abort ) {
06840 KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" ));
06841
06842 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
06843 return;
06844 }
06845 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
06846 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
06847 return;
06848 }
06849
06850
06851
06852
06853
06854
06855
06856 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
06857
06858
06859 __kmp_internal_end();
06860
06861 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
06862 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
06863
06864 KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) );
06865
06866 #ifdef DUMP_DEBUG_ON_EXIT
06867 if ( __kmp_debug_buf )
06868 __kmp_dump_debug_buffer();
06869 #endif
06870
06871 #if KMP_OS_WINDOWS
06872 __kmp_close_console();
06873 #endif
06874
06875 __kmp_fini_allocator();
06876
06877 }
06878
06879 void
06880 __kmp_internal_end_thread( int gtid_req )
06881 {
06882 int i;
06883
06884
06885
06886
06887
06888
06889
06890 if( __kmp_global.g.g_abort ) {
06891 KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" ));
06892
06893 return;
06894 }
06895 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
06896 KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" ));
06897 return;
06898 }
06899
06900 KMP_MB();
06901
06902
06903 {
06904 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
06905 KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
06906 if( gtid == KMP_GTID_SHUTDOWN ) {
06907 KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
06908 return;
06909 } else if( gtid == KMP_GTID_MONITOR ) {
06910 KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
06911 return;
06912 } else if( gtid == KMP_GTID_DNE ) {
06913 KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
06914 return;
06915
06916 } else if( KMP_UBER_GTID( gtid )) {
06917
06918 if( __kmp_root[gtid] -> r.r_active ) {
06919 __kmp_global.g.g_abort = -1;
06920 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
06921 KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
06922 return;
06923 } else {
06924 KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
06925 __kmp_unregister_root_current_thread( gtid );
06926 }
06927 } else {
06928
06929 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
06930
06931 #if OMP_30_ENABLED
06932 if ( gtid >= 0 ) {
06933 kmp_info_t *this_thr = __kmp_threads[ gtid ];
06934 if (TCR_PTR(this_thr->th.th_task_team) != NULL) {
06935 __kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
06936 }
06937 }
06938 #endif
06939
06940 KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
06941 return;
06942 }
06943 }
06944 #if defined GUIDEDLL_EXPORTS
06945
06946
06947
06948
06949
06950
06951
06952 KA_TRACE( 10, ("__kmp_internal_end_thread: exiting\n") );
06953 return;
06954 #endif
06955
06956 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
06957
06958
06959 if( __kmp_global.g.g_abort ) {
06960 KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" ));
06961
06962 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
06963 return;
06964 }
06965 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
06966 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
06967 return;
06968 }
06969
06970
06971
06972
06973
06974
06975
06976
06977
06978 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
06979
06980 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
06981 if ( KMP_UBER_GTID( i ) ) {
06982 KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
06983 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
06984 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
06985 return;
06986 };
06987 }
06988
06989
06990
06991 __kmp_internal_end();
06992
06993 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
06994 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
06995
06996 KA_TRACE( 10, ("__kmp_internal_end_thread: exit\n" ) );
06997
06998 #ifdef DUMP_DEBUG_ON_EXIT
06999 if ( __kmp_debug_buf )
07000 __kmp_dump_debug_buffer();
07001 #endif
07002 }
07003
07004
07005
07006
07007 static long __kmp_registration_flag = 0;
07008
07009 static char * __kmp_registration_str = NULL;
07010
07011
07012
07013 static inline
07014 char *
07015 __kmp_reg_status_name() {
07016
07017
07018
07019
07020
07021 return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() );
07022 }
07023
07024
07025 void
07026 __kmp_register_library_startup(
07027 void
07028 ) {
07029
07030 char * name = __kmp_reg_status_name();
07031 int done = 0;
07032 union {
07033 double dtime;
07034 long ltime;
07035 } time;
07036 #if KMP_OS_WINDOWS
07037 __kmp_initialize_system_tick();
07038 #endif
07039 __kmp_read_system_time( & time.dtime );
07040 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
07041 __kmp_registration_str =
07042 __kmp_str_format(
07043 "%p-%lx-%s",
07044 & __kmp_registration_flag,
07045 __kmp_registration_flag,
07046 KMP_LIBRARY_FILE
07047 );
07048
07049 KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
07050
07051 while ( ! done ) {
07052
07053 char * value = NULL;
07054
07055
07056 __kmp_env_set( name, __kmp_registration_str, 0 );
07057
07058 value = __kmp_env_get( name );
07059 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
07060
07061 done = 1;
07062
07063 } else {
07064
07065
07066
07067 int neighbor = 0;
07068 char * tail = value;
07069 char * flag_addr_str = NULL;
07070 char * flag_val_str = NULL;
07071 char const * file_name = NULL;
07072 __kmp_str_split( tail, '-', & flag_addr_str, & tail );
07073 __kmp_str_split( tail, '-', & flag_val_str, & tail );
07074 file_name = tail;
07075 if ( tail != NULL ) {
07076 long * flag_addr = 0;
07077 long flag_val = 0;
07078 sscanf( flag_addr_str, "%p", & flag_addr );
07079 sscanf( flag_val_str, "%lx", & flag_val );
07080 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) {
07081
07082
07083
07084 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
07085 neighbor = 1;
07086 } else {
07087
07088 neighbor = 2;
07089 };
07090 };
07091 };
07092 switch ( neighbor ) {
07093 case 0 :
07094
07095
07096
07097 file_name = "unknown library";
07098
07099 case 1 : {
07100
07101 char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" );
07102 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
07103
07104 __kmp_msg(
07105 kmp_ms_fatal,
07106 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
07107 KMP_HNT( DuplicateLibrary ),
07108 __kmp_msg_null
07109 );
07110 };
07111 KMP_INTERNAL_FREE( duplicate_ok );
07112 __kmp_duplicate_library_ok = 1;
07113 done = 1;
07114 } break;
07115 case 2 : {
07116
07117 __kmp_env_unset( name );
07118 } break;
07119 default : {
07120 KMP_DEBUG_ASSERT( 0 );
07121 } break;
07122 };
07123
07124 };
07125 KMP_INTERNAL_FREE( (void *) value );
07126
07127 };
07128 KMP_INTERNAL_FREE( (void *) name );
07129
07130 }
07131
07132
07133 void
07134 __kmp_unregister_library( void ) {
07135
07136 char * name = __kmp_reg_status_name();
07137 char * value = __kmp_env_get( name );
07138
07139 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
07140 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
07141 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
07142
07143 __kmp_env_unset( name );
07144 };
07145
07146 KMP_INTERNAL_FREE( __kmp_registration_str );
07147 KMP_INTERNAL_FREE( value );
07148 KMP_INTERNAL_FREE( name );
07149
07150 __kmp_registration_flag = 0;
07151 __kmp_registration_str = NULL;
07152
07153 }
07154
07155
07156
07157
07158
07159 static void
07160 __kmp_do_serial_initialize( void )
07161 {
07162 int i, gtid;
07163 int size;
07164
07165 KA_TRACE( 10, ("__kmp_serial_initialize: enter\n" ) );
07166
07167 KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 );
07168 KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 );
07169 KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 );
07170 KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 );
07171 KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) );
07172
07173 __kmp_validate_locks();
07174
07175
07176 __kmp_init_allocator();
07177
07178
07179
07180
07181
07182 __kmp_register_library_startup( );
07183
07184
07185 if( TCR_4(__kmp_global.g.g_done) ) {
07186 KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) );
07187 }
07188
07189 __kmp_global.g.g_abort = 0;
07190 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
07191
07192
07193 __kmp_init_lock( & __kmp_global_lock );
07194 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
07195 __kmp_init_lock( & __kmp_debug_lock );
07196 __kmp_init_atomic_lock( & __kmp_atomic_lock );
07197 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
07198 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
07199 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
07200 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
07201 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
07202 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
07203 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
07204 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
07205 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
07206 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
07207 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
07208 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
07209 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
07210 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
07211 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
07212 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
07213
07214
07215
07216 __kmp_runtime_initialize();
07217
07218
07219 #ifdef KMP_DEBUG
07220 kmp_diag = 0;
07221 #endif
07222 __kmp_abort_delay = 0;
07223
07224
07225
07226 __kmp_dflt_team_nth_ub = __kmp_xproc;
07227 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
07228 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
07229 }
07230 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
07231 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
07232 }
07233 __kmp_max_nth = __kmp_sys_max_nth;
07234 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
07235
07236
07237 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
07238 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
07239 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
07240
07241 __kmp_library = library_throughput;
07242
07243 __kmp_static = kmp_sch_static_balanced;
07244
07245
07246 #if OMP_30_ENABLED
07247
07248 #endif // OMP_30_ENABLED
07249
07250
07251 #if KMP_FAST_REDUCTION_BARRIER
07252 #define kmp_reduction_barrier_gather_bb ((int)1)
07253 #define kmp_reduction_barrier_release_bb ((int)1)
07254 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
07255 #define kmp_reduction_barrier_release_pat bp_hyper_bar
07256 #endif // KMP_FAST_REDUCTION_BARRIER
07257 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
07258 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
07259 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
07260 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
07261 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
07262 #if KMP_FAST_REDUCTION_BARRIER
07263 if( i == bs_reduction_barrier ) {
07264 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
07265 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
07266 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
07267 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
07268 }
07269 #endif // KMP_FAST_REDUCTION_BARRIER
07270 }
07271 #if KMP_FAST_REDUCTION_BARRIER
07272 #undef kmp_reduction_barrier_release_pat
07273 #undef kmp_reduction_barrier_gather_pat
07274 #undef kmp_reduction_barrier_release_bb
07275 #undef kmp_reduction_barrier_gather_bb
07276 #endif // KMP_FAST_REDUCTION_BARRIER
07277 #if KMP_MIC
07278
07279 __kmp_barrier_gather_branch_bits [ 0 ] = 3;
07280 __kmp_barrier_release_branch_bits[ 1 ] = 1;
07281 #endif
07282
07283
07284 #ifdef KMP_DEBUG
07285 __kmp_env_checks = TRUE;
07286 #else
07287 __kmp_env_checks = FALSE;
07288 #endif
07289
07290
07291 __kmp_foreign_tp = TRUE;
07292
07293 __kmp_global.g.g_dynamic = FALSE;
07294 __kmp_global.g.g_dynamic_mode = dynamic_default;
07295
07296 __kmp_env_initialize( NULL );
07297
07298 #ifdef KMP_DEBUG
07299 char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" );
07300 if ( __kmp_str_match_true( val ) ) {
07301 kmp_str_buf_t buffer;
07302 __kmp_str_buf_init( & buffer );
07303 __kmp_i18n_dump_catalog( buffer );
07304 __kmp_printf( "%s", buffer.str );
07305 __kmp_str_buf_free( & buffer );
07306 };
07307 __kmp_env_free( & val );
07308 #endif
07309
07310
07311 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
07312
07313
07314
07315
07316
07317
07318 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
07319 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
07320 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
07321 __kmp_thread_pool = NULL;
07322 __kmp_thread_pool_insert_pt = NULL;
07323 __kmp_team_pool = NULL;
07324
07325
07326
07327
07328 size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
07329 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
07330 __kmp_root = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity );
07331
07332
07333 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 );
07334 KMP_DEBUG_ASSERT( __kmp_nth == 0 );
07335 __kmp_all_nth = 0;
07336 __kmp_nth = 0;
07337
07338
07339 gtid = __kmp_register_root( TRUE );
07340 KA_TRACE( 10, ("__kmp_do_serial_initialize T#%d\n", gtid ));
07341 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
07342 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
07343
07344 KMP_MB();
07345
07346 __kmp_common_initialize();
07347
07348 #if KMP_OS_UNIX
07349
07350 __kmp_register_atfork();
07351 #endif
07352
07353 #if ! defined GUIDEDLL_EXPORTS
07354 {
07355
07356
07357
07358 int rc = atexit( __kmp_internal_end_atexit );
07359 if ( rc != 0 ) {
07360 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
07361 };
07362 }
07363 #endif
07364
07365 #if KMP_HANDLE_SIGNALS
07366 #if KMP_OS_UNIX
07367
07368
07369
07370
07371
07372 __kmp_install_signals( FALSE );
07373 #endif
07374 #if KMP_OS_WINDOWS
07375 __kmp_install_signals( TRUE );
07376 #endif
07377 #endif
07378
07379
07380 __kmp_init_counter ++;
07381
07382 __kmp_init_serial = TRUE;
07383
07384 if (__kmp_settings) {
07385 __kmp_env_print();
07386 }
07387
07388 KMP_MB();
07389
07390 KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
07391 }
07392
07393 void
07394 __kmp_serial_initialize( void )
07395 {
07396 if ( __kmp_init_serial ) {
07397 return;
07398 }
07399 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
07400 if ( __kmp_init_serial ) {
07401 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
07402 return;
07403 }
07404 __kmp_do_serial_initialize();
07405 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
07406 }
07407
07408 static void
07409 __kmp_do_middle_initialize( void )
07410 {
07411 int i, j;
07412 int prev_dflt_team_nth;
07413
07414 if( !__kmp_init_serial ) {
07415 __kmp_do_serial_initialize();
07416 }
07417
07418 KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) );
07419
07420
07421
07422
07423
07424 prev_dflt_team_nth = __kmp_dflt_team_nth;
07425
07426 #if KMP_OS_WINDOWS || KMP_OS_LINUX
07427
07428
07429
07430
07431 __kmp_affinity_initialize();
07432
07433
07434
07435
07436
07437 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
07438 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
07439 __kmp_affinity_set_init_mask( i, TRUE );
07440 }
07441 }
07442 #endif
07443
07444 KMP_ASSERT( __kmp_xproc > 0 );
07445 if ( __kmp_avail_proc == 0 ) {
07446 __kmp_avail_proc = __kmp_xproc;
07447 }
07448
07449
07450 j = 0;
07451 while ( __kmp_nested_nth.used && ! __kmp_nested_nth.nth[ j ] ) {
07452 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
07453 j++;
07454 }
07455
07456 if ( __kmp_dflt_team_nth == 0 ) {
07457 #ifdef KMP_DFLT_NTH_CORES
07458
07459
07460
07461 __kmp_dflt_team_nth = __kmp_ncores;
07462 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
07463 __kmp_dflt_team_nth ) );
07464 #else
07465
07466
07467
07468 __kmp_dflt_team_nth = __kmp_avail_proc;
07469 KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
07470 __kmp_dflt_team_nth ) );
07471 #endif
07472 }
07473
07474 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
07475 __kmp_dflt_team_nth = KMP_MIN_NTH;
07476 }
07477 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
07478 __kmp_dflt_team_nth = __kmp_sys_max_nth;
07479 }
07480
07481
07482
07483
07484
07485 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
07486
07487 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
07488
07489
07490
07491
07492
07493
07494 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
07495 kmp_info_t *thread = __kmp_threads[ i ];
07496 if ( thread == NULL ) continue;
07497 #if OMP_30_ENABLED
07498 if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue;
07499 #else
07500 if ( thread->th.th_team->t.t_set_nproc[ thread->th.th_info.ds.ds_tid ] != 0 ) continue;
07501 #endif
07502
07503 set__nproc_p( __kmp_threads[ i ], __kmp_dflt_team_nth );
07504 }
07505 }
07506 KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
07507 __kmp_dflt_team_nth) );
07508
07509 #ifdef KMP_ADJUST_BLOCKTIME
07510
07511
07512 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
07513 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
07514 if ( __kmp_nth > __kmp_avail_proc ) {
07515 __kmp_zero_bt = TRUE;
07516 }
07517 }
07518 #endif
07519
07520
07521 TCW_SYNC_4(__kmp_init_middle, TRUE);
07522
07523 KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) );
07524 }
07525
07526 void
07527 __kmp_middle_initialize( void )
07528 {
07529 if ( __kmp_init_middle ) {
07530 return;
07531 }
07532 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
07533 if ( __kmp_init_middle ) {
07534 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
07535 return;
07536 }
07537 __kmp_do_middle_initialize();
07538 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
07539 }
07540
07541 void
07542 __kmp_parallel_initialize( void )
07543 {
07544 int gtid = __kmp_entry_gtid();
07545
07546
07547 if( TCR_4(__kmp_init_parallel) ) return;
07548 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
07549 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; }
07550
07551
07552 if( TCR_4(__kmp_global.g.g_done) ) {
07553 KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
07554 __kmp_infinite_loop();
07555 }
07556
07557
07558
07559
07560 if( !__kmp_init_middle ) {
07561 __kmp_do_middle_initialize();
07562 }
07563
07564
07565 KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) );
07566 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
07567
07568 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
07569
07570
07571
07572
07573 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
07574 __kmp_store_mxcsr( &__kmp_init_mxcsr );
07575 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
07576 #endif
07577
07578 #if KMP_OS_UNIX
07579 # if KMP_HANDLE_SIGNALS
07580
07581 __kmp_install_signals( TRUE );
07582 # endif
07583 #endif
07584
07585 __kmp_suspend_initialize();
07586
07587 # if defined(USE_LOAD_BALANCE)
07588 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
07589 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
07590 }
07591 #else
07592 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
07593 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
07594 }
07595 #endif
07596
07597 if ( __kmp_version ) {
07598 __kmp_print_version_2();
07599 }
07600
07601
07602 TCW_SYNC_4(__kmp_init_parallel, TRUE);
07603
07604 KMP_MB();
07605 KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
07606
07607 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
07608 }
07609
07610
07611
07612
07613 void
07614 __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
07615 kmp_team_t *team )
07616 {
07617 kmp_disp_t *dispatch;
07618
07619 KMP_MB();
07620
07621
07622 this_thr->th.th_local.this_construct = 0;
07623 this_thr->th.th_local.last_construct = 0;
07624 #if KMP_CACHE_MANAGE
07625 KMP_CACHE_PREFETCH( &this_thr -> th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
07626 #endif
07627 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
07628 KMP_DEBUG_ASSERT( dispatch );
07629 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
07630
07631
07632 dispatch -> th_disp_index = 0;
07633
07634 if( __kmp_env_consistency_check )
07635 __kmp_push_parallel( gtid, team->t.t_ident );
07636
07637 KMP_MB();
07638 }
07639
07640 void
07641 __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
07642 kmp_team_t *team )
07643 {
07644 if( __kmp_env_consistency_check )
07645 __kmp_pop_parallel( gtid, team->t.t_ident );
07646 }
07647
07648 int
07649 __kmp_invoke_task_func( int gtid )
07650 {
07651 int rc;
07652 int tid = __kmp_tid_from_gtid( gtid );
07653 kmp_info_t *this_thr = __kmp_threads[ gtid ];
07654 kmp_team_t *team = this_thr -> th.th_team;
07655
07656
07657 #if OMPT_SUPPORT
07658 void **exit_runtime_p =
07659 &(team->t.t_implicit_task_taskdata[tid].
07660 ompt_task_info.frame.exit_runtime_frame);
07661 #else
07662 void *dummy;
07663 void **exit_runtime_p = &dummy;
07664 #endif
07665
07666 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
07667 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
07668 gtid, tid, (int) team->t.t_argc,
07669 (void **) team->t.t_argv, exit_runtime_p );
07670
07671 #if OMPT_SUPPORT
07672 team->t.t_implicit_task_taskdata[tid].
07673 ompt_task_info.frame.exit_runtime_frame = 0;
07674 #endif
07675
07676 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
07677
07678 return rc;
07679 }
07680
07681
07682
07683
07684
07685
07686 void
07687 __kmp_push_num_threads( ident_t *id, int gtid, int num_threads )
07688 {
07689 kmp_info_t *thr = __kmp_threads[gtid];
07690
07691 if( num_threads > 0 )
07692 thr -> th.th_set_nproc = num_threads;
07693 }
07694
07695 #if OMP_40_ENABLED
07696
07697
07698
07699
07700 void
07701 __kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind )
07702 {
07703 kmp_info_t *thr = __kmp_threads[gtid];
07704 thr -> th.th_set_proc_bind = proc_bind;
07705 }
07706
07707 #endif
07708
07709
07710
07711 void
07712 __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team )
07713 {
07714 kmp_info_t *this_thr = __kmp_threads[gtid];
07715
07716 #ifdef KMP_DEBUG
07717 int f;
07718 #endif
07719
07720 KMP_DEBUG_ASSERT( team );
07721 KMP_DEBUG_ASSERT( this_thr -> th.th_team == team );
07722 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
07723 KMP_MB();
07724
07725 team -> t.t_construct = 0;
07726 team -> t.t_ordered.dt.t_value = 0;
07727
07728
07729 KMP_DEBUG_ASSERT( team -> t.t_disp_buffer );
07730 if ( team->t.t_max_nproc > 1 ) {
07731 int i;
07732 for (i = 0; i < KMP_MAX_DISP_BUF; ++i)
07733 team -> t.t_disp_buffer[ i ].buffer_index = i;
07734 } else {
07735 team -> t.t_disp_buffer[ 0 ].buffer_index = 0;
07736 }
07737
07738 KMP_MB();
07739 KMP_ASSERT( this_thr -> th.th_team == team );
07740
07741 #ifdef KMP_DEBUG
07742 for( f=0 ; f<team->t.t_nproc ; f++ ) {
07743 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
07744 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
07745 }
07746 #endif
07747
07748
07749 __kmp_fork_barrier( gtid, 0 );
07750 }
07751
07752
07753 void
07754 __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team )
07755 {
07756 kmp_info_t *this_thr = __kmp_threads[gtid];
07757
07758 KMP_DEBUG_ASSERT( team );
07759 KMP_DEBUG_ASSERT( this_thr -> th.th_team == team );
07760 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
07761 KMP_MB();
07762
07763
07764
07765 #ifdef KMP_DEBUG
07766 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
07767 __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
07768 __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
07769 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
07770 __kmp_print_structure();
07771 }
07772 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
07773 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
07774 #endif
07775
07776 __kmp_join_barrier( gtid );
07777
07778 KMP_MB();
07779 KMP_ASSERT( this_thr -> th.th_team == team );
07780 }
07781
07782
07783
07784
07785
07786 #ifdef USE_LOAD_BALANCE
07787
07788
07789
07790
07791
07792 static int
07793 __kmp_active_hot_team_nproc( kmp_root_t *root )
07794 {
07795 int i;
07796 int retval;
07797 kmp_team_t *hot_team;
07798
07799 if ( root->r.r_active ) {
07800 return 0;
07801 }
07802 hot_team = root->r.r_hot_team;
07803 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
07804 return hot_team->t.t_nproc - 1;
07805 }
07806
07807
07808
07809
07810 retval = 0;
07811 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
07812 if ( hot_team->t.t_threads[i]->th.th_active ) {
07813 retval++;
07814 }
07815 }
07816 return retval;
07817 }
07818
07819
07820
07821
07822
07823 static int
07824 __kmp_load_balance_nproc( kmp_root_t *root, int set_nproc )
07825 {
07826 int retval;
07827 int pool_active;
07828 int hot_team_active;
07829 int team_curr_active;
07830 int system_active;
07831
07832 KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
07833 root, set_nproc ) );
07834 KMP_DEBUG_ASSERT( root );
07835 #if OMP_30_ENABLED
07836 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
07837 #else
07838 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_set_dynamic[0] == TRUE );
07839 #endif
07840 KMP_DEBUG_ASSERT( set_nproc > 1 );
07841
07842 if ( set_nproc == 1) {
07843 KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) );
07844 return 1;
07845 }
07846
07847
07848
07849
07850
07851
07852
07853
07854 pool_active = TCR_4(__kmp_thread_pool_active_nth);
07855 hot_team_active = __kmp_active_hot_team_nproc( root );
07856 team_curr_active = pool_active + hot_team_active + 1;
07857
07858
07859
07860
07861 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
07862 KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
07863 system_active, pool_active, hot_team_active ) );
07864
07865 if ( system_active < 0 ) {
07866
07867
07868
07869
07870
07871
07872 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
07873 KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" );
07874
07875
07876
07877
07878 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
07879 : root->r.r_hot_team->t.t_nproc);
07880 if ( retval > set_nproc ) {
07881 retval = set_nproc;
07882 }
07883 if ( retval < KMP_MIN_NTH ) {
07884 retval = KMP_MIN_NTH;
07885 }
07886
07887 KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
07888 return retval;
07889 }
07890
07891
07892
07893
07894
07895
07896
07897 if ( system_active < team_curr_active ) {
07898 system_active = team_curr_active;
07899 }
07900 retval = __kmp_avail_proc - system_active + team_curr_active;
07901 if ( retval > set_nproc ) {
07902 retval = set_nproc;
07903 }
07904 if ( retval < KMP_MIN_NTH ) {
07905 retval = KMP_MIN_NTH;
07906 }
07907
07908 KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
07909 return retval;
07910 }
07911
07912 #endif
07913
07914
07915
07916
07917
07918
07919 void
07920 __kmp_cleanup( void )
07921 {
07922 int f;
07923
07924 KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) );
07925
07926 if (TCR_4(__kmp_init_parallel)) {
07927 #if KMP_HANDLE_SIGNALS
07928 __kmp_remove_signals();
07929 #endif
07930 TCW_4(__kmp_init_parallel, FALSE);
07931 }
07932
07933 if (TCR_4(__kmp_init_middle)) {
07934 #if KMP_OS_WINDOWS || KMP_OS_LINUX
07935 __kmp_affinity_uninitialize();
07936 #endif
07937 TCW_4(__kmp_init_middle, FALSE);
07938 }
07939
07940 KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) );
07941
07942 if (__kmp_init_serial) {
07943
07944 __kmp_runtime_destroy();
07945
07946 __kmp_init_serial = FALSE;
07947 }
07948
07949 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
07950 if ( __kmp_root[ f ] != NULL ) {
07951 __kmp_free( __kmp_root[ f ] );
07952 __kmp_root[ f ] = NULL;
07953 }
07954 }
07955 __kmp_free( __kmp_threads );
07956
07957
07958 __kmp_threads = NULL;
07959 __kmp_root = NULL;
07960 __kmp_threads_capacity = 0;
07961
07962 __kmp_cleanup_user_locks();
07963
07964 #if KMP_OS_LINUX || KMP_OS_WINDOWS
07965 KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );
07966 __kmp_cpuinfo_file = NULL;
07967 #endif
07968
07969 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
07970 __kmp_nested_nth.nth = NULL;
07971 __kmp_nested_nth.size = 0;
07972 __kmp_nested_nth.used = 0;
07973
07974 __kmp_i18n_catclose();
07975
07976 KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
07977 }
07978
07979
07980
07981
07982 int
07983 __kmp_ignore_mppbeg( void )
07984 {
07985 char *env;
07986
07987 if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) {
07988 if (__kmp_str_match_false( env ))
07989 return FALSE;
07990 }
07991
07992 return TRUE;
07993 }
07994
07995 int
07996 __kmp_ignore_mppend( void )
07997 {
07998 char *env;
07999
08000 if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) {
08001 if (__kmp_str_match_false( env ))
08002 return FALSE;
08003 }
08004
08005 return TRUE;
08006 }
08007
08008 void
08009 __kmp_internal_begin( void )
08010 {
08011 int gtid;
08012 kmp_root_t *root;
08013
08014
08015
08016 gtid = __kmp_entry_gtid();
08017 root = __kmp_threads[ gtid ] -> th.th_root;
08018 KMP_ASSERT( KMP_UBER_GTID( gtid ));
08019
08020 if( root->r.r_begin ) return;
08021 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
08022 if( root->r.r_begin ) {
08023 __kmp_release_lock( & root->r.r_begin_lock, gtid );
08024 return;
08025 }
08026
08027 root -> r.r_begin = TRUE;
08028
08029 __kmp_release_lock( & root->r.r_begin_lock, gtid );
08030 }
08031
08032
08033
08034
08035
08036 void
08037 __kmp_user_set_library (enum library_type arg)
08038 {
08039 int gtid;
08040 kmp_root_t *root;
08041 kmp_info_t *thread;
08042
08043
08044
08045 gtid = __kmp_entry_gtid();
08046 thread = __kmp_threads[ gtid ];
08047
08048 root = thread -> th.th_root;
08049
08050 KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
08051 if (root->r.r_in_parallel) {
08052 KMP_WARNING( SetLibraryIncorrectCall );
08053 return;
08054 }
08055
08056 switch ( arg ) {
08057 case library_serial :
08058 thread -> th.th_set_nproc = 0;
08059 set__nproc_p( thread, 1 );
08060 break;
08061 case library_turnaround :
08062 thread -> th.th_set_nproc = 0;
08063 set__nproc_p( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
08064 break;
08065 case library_throughput :
08066 thread -> th.th_set_nproc = 0;
08067 set__nproc_p( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
08068 break;
08069 default:
08070 KMP_FATAL( UnknownLibraryType, arg );
08071 }
08072
08073 __kmp_aux_set_library ( arg );
08074 }
08075
08076 void
08077 __kmp_aux_set_stacksize( size_t arg )
08078 {
08079 if (! __kmp_init_serial)
08080 __kmp_serial_initialize();
08081
08082 #if KMP_OS_DARWIN
08083 if (arg & (0x1000 - 1)) {
08084 arg &= ~(0x1000 - 1);
08085 if(arg + 0x1000)
08086 arg += 0x1000;
08087 }
08088 #endif
08089 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
08090
08091
08092 if (! TCR_4(__kmp_init_parallel)) {
08093 size_t value = arg;
08094
08095 if (value < __kmp_sys_min_stksize )
08096 value = __kmp_sys_min_stksize ;
08097 else if (value > KMP_MAX_STKSIZE)
08098 value = KMP_MAX_STKSIZE;
08099
08100 __kmp_stksize = value;
08101
08102 __kmp_env_stksize = TRUE;
08103 }
08104
08105 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
08106 }
08107
08108
08109
08110 void
08111 __kmp_aux_set_library (enum library_type arg)
08112 {
08113 __kmp_library = arg;
08114
08115 switch ( __kmp_library ) {
08116 case library_serial :
08117 {
08118 KMP_INFORM( LibraryIsSerial );
08119 (void) __kmp_change_library( TRUE );
08120 }
08121 break;
08122 case library_turnaround :
08123 (void) __kmp_change_library( TRUE );
08124 break;
08125 case library_throughput :
08126 (void) __kmp_change_library( FALSE );
08127 break;
08128 default:
08129 KMP_FATAL( UnknownLibraryType, arg );
08130 }
08131 }
08132
08133
08134
08135
08136 void
08137 __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid)
08138 {
08139 int blocktime = arg;
08140 int bt_intervals;
08141 int bt_set;
08142
08143 __kmp_save_internal_controls( thread );
08144
08145
08146 if (blocktime < KMP_MIN_BLOCKTIME)
08147 blocktime = KMP_MIN_BLOCKTIME;
08148 else if (blocktime > KMP_MAX_BLOCKTIME)
08149 blocktime = KMP_MAX_BLOCKTIME;
08150
08151 set__blocktime_team( thread -> th.th_team, tid, blocktime );
08152 set__blocktime_team( thread -> th.th_serial_team, 0, blocktime );
08153
08154
08155 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
08156
08157 set__bt_intervals_team( thread -> th.th_team, tid, bt_intervals );
08158 set__bt_intervals_team( thread -> th.th_serial_team, 0, bt_intervals );
08159
08160
08161 bt_set = TRUE;
08162
08163 set__bt_set_team( thread -> th.th_team, tid, bt_set );
08164 set__bt_set_team( thread -> th.th_serial_team, 0, bt_set );
08165 KF_TRACE(10, ( "kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
08166 __kmp_gtid_from_tid(tid, thread->th.th_team),
08167 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
08168 }
08169
08170 void
08171 __kmp_aux_set_defaults(
08172 char const * str,
08173 int len
08174 ) {
08175 if ( ! __kmp_init_serial ) {
08176 __kmp_serial_initialize();
08177 };
08178 __kmp_env_initialize( str );
08179
08180 if (__kmp_settings) {
08181 __kmp_env_print();
08182 }
08183 }
08184
08185
08186
08187
08188
08189
08190
08191
08192
08193
08194
08195
08196
08197
08198
08199 PACKED_REDUCTION_METHOD_T
08200 __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
08201 kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
08202 kmp_critical_name *lck )
08203 {
08204
08205
08206
08207
08208
08209
08210 PACKED_REDUCTION_METHOD_T retval;
08211
08212 int team_size;
08213
08214 KMP_DEBUG_ASSERT( loc );
08215 KMP_DEBUG_ASSERT( lck );
08216
08217 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
08218 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
08219
08220 retval = critical_reduce_block;
08221
08222 team_size = __kmp_get_team_num_threads( global_tid );
08223
08224 if( team_size == 1 ) {
08225
08226 retval = empty_reduce_block;
08227
08228 } else {
08229
08230 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
08231 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
08232
08233 #if KMP_ARCH_X86_64
08234
08235 #if KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN
08236 #if KMP_MIC
08237 #define REDUCTION_TEAMSIZE_CUTOFF 8
08238 #else // KMP_MIC
08239 #define REDUCTION_TEAMSIZE_CUTOFF 4
08240 #endif // KMP_MIC
08241 if( tree_available ) {
08242 if( team_size <= REDUCTION_TEAMSIZE_CUTOFF ) {
08243 if ( atomic_available ) {
08244 retval = atomic_reduce_block;
08245 }
08246 } else {
08247 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
08248 }
08249 } else if ( atomic_available ) {
08250 retval = atomic_reduce_block;
08251 }
08252 #else
08253 #error "Unknown or unsupported OS"
08254 #endif // KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN
08255
08256 #elif KMP_ARCH_X86
08257
08258 #if KMP_OS_LINUX || KMP_OS_WINDOWS
08259
08260
08261
08262
08263
08264
08265
08266
08267
08268
08269
08270
08271
08272
08273
08274 if( atomic_available ) {
08275 if( num_vars <= 2 ) {
08276 retval = atomic_reduce_block;
08277 }
08278 }
08279
08280 #elif KMP_OS_DARWIN
08281
08282
08283 if( atomic_available && ( num_vars <= 3 ) ) {
08284 retval = atomic_reduce_block;
08285 } else if( tree_available ) {
08286 if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) {
08287 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
08288 }
08289 }
08290
08291 #else
08292 #error "Unknown or unsupported OS"
08293 #endif
08294
08295 #else
08296 #error "Unknown or unsupported architecture"
08297 #endif
08298
08299 }
08300
08301
08302
08303
08304
08305
08306
08307
08308
08309
08310
08311
08312
08313
08314
08315 if( __kmp_force_reduction_method != reduction_method_not_defined ) {
08316
08317 PACKED_REDUCTION_METHOD_T forced_retval;
08318
08319 int atomic_available, tree_available;
08320
08321 switch( ( forced_retval = __kmp_force_reduction_method ) )
08322 {
08323 case critical_reduce_block:
08324 KMP_ASSERT( lck );
08325 if( team_size <= 1 ) {
08326 forced_retval = empty_reduce_block;
08327 }
08328 break;
08329
08330 case atomic_reduce_block:
08331 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
08332 KMP_ASSERT( atomic_available );
08333 break;
08334
08335 case tree_reduce_block:
08336 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
08337 KMP_ASSERT( tree_available );
08338 #if KMP_FAST_REDUCTION_BARRIER
08339 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
08340 #endif
08341 break;
08342
08343 default:
08344 KMP_ASSERT( 0 );
08345 }
08346
08347 retval = forced_retval;
08348 }
08349
08350 KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) );
08351
08352 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
08353 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
08354
08355 return ( retval );
08356 }
08357
08358
08359 kmp_int32
08360 __kmp_get_reduce_method( void ) {
08361 return ( ( __kmp_entry_thread() -> th.th_local.packed_reduction_method ) >> 8 );
08362 }
08363
08364