kmp_runtime.c

Go to the documentation of this file.
00001 /*
00002  * kmp_runtime.c -- KPTS runtime support library
00003  * $Revision: 42248 $
00004  * $Date: 2013-04-03 07:08:13 -0500 (Wed, 03 Apr 2013) $
00005  */
00006 
00007 /* <copyright>
00008     Copyright (c) 1997-2013 Intel Corporation.  All Rights Reserved.
00009 
00010     Redistribution and use in source and binary forms, with or without
00011     modification, are permitted provided that the following conditions
00012     are met:
00013 
00014       * Redistributions of source code must retain the above copyright
00015         notice, this list of conditions and the following disclaimer.
00016       * Redistributions in binary form must reproduce the above copyright
00017         notice, this list of conditions and the following disclaimer in the
00018         documentation and/or other materials provided with the distribution.
00019       * Neither the name of Intel Corporation nor the names of its
00020         contributors may be used to endorse or promote products derived
00021         from this software without specific prior written permission.
00022 
00023     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00024     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00025     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00026     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00027     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00028     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00029     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00030     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00031     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00032     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00033     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00034 
00035 
00036 ------------------------------------------------------------------------
00037 
00038     Portions of this software are protected under the following patents:
00039         U.S. Patent 5,812,852
00040         U.S. Patent 6,792,599
00041         U.S. Patent 7,069,556
00042         U.S. Patent 7,328,433
00043         U.S. Patent 7,500,242
00044 
00045 </copyright> */
00046 
00047 #include "kmp.h"
00048 #include "kmp_atomic.h"
00049 #include "kmp_wrapper_getpid.h"
00050 #include "kmp_environment.h"
00051 #include "kmp_str.h"
00052 #include "kmp_settings.h"
00053 #include "kmp_i18n.h"
00054 #include "kmp_io.h"
00055 #include "kmp_error.h"
00056 
00057 #if OMPT_SUPPORT
00058 #include "ompt-specific.h"
00059 #endif
00060 
00061 /* these are temporary issues to be dealt with */
00062 #define KMP_USE_PRCTL 0
00063 #define KMP_USE_POOLED_ALLOC 0
00064 
00065 #if KMP_OS_WINDOWS
00066 #include <process.h>
00067 #endif
00068 
00069 
00070 #if defined(KMP_GOMP_COMPAT)
00071 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes";
00072 #endif /* defined(KMP_GOMP_COMPAT) */
00073 
00074 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: "
00075 #if OMP_30_ENABLED
00076     "3.1 (201107)";
00077 #else
00078     "2.5 (200505)";
00079 #endif
00080 
00081 #ifdef KMP_DEBUG
00082 
00083 char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable";
00084 
00085 char const __kmp_version_perf_v19[] = KMP_VERSION_PREFIX "perf v19: "
00086 #if KMP_PERF_V19 == KMP_ON
00087     "on";
00088 #elif KMP_PERF_V19 == KMP_OFF
00089     "off";
00090 #else
00091     #error "Must specify KMP_PERF_V19 option"
00092 #endif
00093 
00094 char const __kmp_version_perf_v106[] = KMP_VERSION_PREFIX "perf v106: "
00095 #if KMP_PERF_V106 == KMP_ON
00096     "on";
00097 #elif KMP_PERF_V106 == KMP_OFF
00098     "off";
00099 #else
00100     #error "Must specify KMP_PERF_V106 option"
00101 #endif
00102 
00103 #endif /* KMP_DEBUG */
00104 
00105 
00106 
00107 /* ------------------------------------------------------------------------ */
00108 /* ------------------------------------------------------------------------ */
00109 
00110 kmp_info_t __kmp_monitor;
00111 
00112 /* ------------------------------------------------------------------------ */
00113 /* ------------------------------------------------------------------------ */
00114 
00115 /* Forward declarations */
00116 
00117 
00118 void __kmp_cleanup( void );
00119 
00120 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid );
00121 static void __kmp_initialize_team(
00122     kmp_team_t * team,
00123     int          new_nproc,
00124     #if OMP_30_ENABLED
00125         kmp_internal_control_t * new_icvs,
00126         ident_t *                loc
00127     #else
00128         int new_set_nproc, int new_set_dynamic, int new_set_nested,
00129         int new_set_blocktime, int new_bt_intervals, int new_bt_set
00130     #endif // OMP_30_ENABLED
00131 );
00132 static void __kmp_partition_places( kmp_team_t *team );
00133 static void __kmp_do_serial_initialize( void );
00134 
00135 
00136 #ifdef USE_LOAD_BALANCE
00137 static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc );
00138 #endif
00139 
00140 static int __kmp_expand_threads(int nWish, int nNeed);
00141 static int __kmp_unregister_root_other_thread( int gtid );
00142 static void __kmp_unregister_library( void ); // called by __kmp_internal_end()
00143 static void __kmp_reap_thread( kmp_info_t * thread, int is_root );
00144 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
00145 
00146 /* ------------------------------------------------------------------------ */
00147 /* ------------------------------------------------------------------------ */
00148 
00149 /* Calculate the identifier of the current thread */
00150 /* fast (and somewhat portable) way to get unique */
00151 /* identifier of executing thread.                */
00152 /* returns KMP_GTID_DNE if we haven't been assigned a gtid   */
00153 
00154 int
00155 __kmp_get_global_thread_id( )
00156 {
00157     int i;
00158     kmp_info_t   **other_threads;
00159     size_t         stack_data;
00160     char          *stack_addr;
00161     size_t         stack_size;
00162     char          *stack_base;
00163 
00164     KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d  all_nproc=%d\n",
00165                       __kmp_nth, __kmp_all_nth ));
00166 
00167     /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a
00168              parallel region, made it return KMP_GTID_DNE to force serial_initialize by
00169              caller.  Had to handle KMP_GTID_DNE at all call-sites, or else guarantee
00170              __kmp_init_gtid for this to work.  */
00171 
00172     if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE;
00173 
00174 #ifdef KMP_TDATA_GTID
00175     if ( TCR_4(__kmp_gtid_mode) >= 3) {
00176         KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" ));
00177         return __kmp_gtid;
00178     }
00179 #endif
00180     if ( TCR_4(__kmp_gtid_mode) >= 2) {
00181         KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" ));
00182         return __kmp_gtid_get_specific();
00183     }
00184     KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" ));
00185 
00186     stack_addr    = (char*) & stack_data;
00187     other_threads = __kmp_threads;
00188 
00189     /*
00190         ATT: The code below is a source of potential bugs due to unsynchronized access to
00191         __kmp_threads array. For example:
00192             1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL.
00193             2. Current thread is suspended by OS.
00194             3. Another thread unregisters and finishes (debug versions of free() may fill memory
00195                with something like 0xEF).
00196             4. Current thread is resumed.
00197             5. Current thread reads junk from *thr.
00198         TODO: Fix it.
00199         --ln
00200     */
00201 
00202     for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
00203 
00204         kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
00205         if( !thr ) continue;
00206 
00207         stack_size =  (size_t)TCR_PTR(thr -> th.th_info.ds.ds_stacksize);
00208         stack_base =  (char *)TCR_PTR(thr -> th.th_info.ds.ds_stackbase);
00209 
00210         /* stack grows down -- search through all of the active threads */
00211 
00212         if( stack_addr <= stack_base ) {
00213             size_t stack_diff = stack_base - stack_addr;
00214 
00215             if( stack_diff <= stack_size ) {
00216                 /* The only way we can be closer than the allocated */
00217                 /* stack size is if we are running on this thread. */
00218                 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
00219                 return i;
00220             }
00221         }
00222     }
00223 
00224     /* get specific to try and determine our gtid */
00225     KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find "
00226                       "thread, using TLS\n" ));
00227     i = __kmp_gtid_get_specific();
00228 
00229     /*fprintf( stderr, "=== %d\n", i );  */ /* GROO */
00230 
00231     /* if we havn't been assigned a gtid, then return code */
00232     if( i<0 ) return i;
00233 
00234     /* dynamically updated stack window for uber threads to avoid get_specific call */
00235     if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
00236         KMP_FATAL( StackOverflow, i );
00237     }
00238 
00239     stack_base = (char *) other_threads[i] -> th.th_info.ds.ds_stackbase;
00240     if( stack_addr > stack_base ) {
00241         TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
00242         TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
00243           other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
00244     } else {
00245         TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
00246     }
00247 
00248     /* Reprint stack bounds for ubermaster since they have been refined */
00249     if ( __kmp_storage_map ) {
00250         char *stack_end = (char *) other_threads[i] -> th.th_info.ds.ds_stackbase;
00251         char *stack_beg = stack_end - other_threads[i] -> th.th_info.ds.ds_stacksize;
00252         __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
00253                                       other_threads[i] -> th.th_info.ds.ds_stacksize,
00254                                       "th_%d stack (refinement)", i );
00255     }
00256     return i;
00257 }
00258 
00259 int
00260 __kmp_get_global_thread_id_reg( )
00261 {
00262     int gtid;
00263 
00264     if ( !__kmp_init_serial ) {
00265         gtid = KMP_GTID_DNE;
00266     } else
00267 #ifdef KMP_TDATA_GTID
00268     if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
00269         KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
00270         gtid = __kmp_gtid;
00271     } else
00272 #endif
00273     if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
00274         KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
00275         gtid = __kmp_gtid_get_specific();
00276     } else {
00277         KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
00278         gtid = __kmp_get_global_thread_id();
00279     }
00280 
00281     /* we must be a new uber master sibling thread */
00282     if( gtid == KMP_GTID_DNE ) {
00283         KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. "
00284                         "Registering a new gtid.\n" ));
00285         __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
00286         if( !__kmp_init_serial ) {
00287             __kmp_do_serial_initialize();
00288             gtid = __kmp_gtid_get_specific();
00289         } else {
00290             gtid = __kmp_register_root(FALSE);
00291         }
00292         __kmp_release_bootstrap_lock( &__kmp_initz_lock );
00293         /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */
00294     }
00295 
00296     KMP_DEBUG_ASSERT( gtid >=0 );
00297 
00298     return gtid;
00299 }
00300 
00301 /* caller must hold forkjoin_lock */
00302 void
00303 __kmp_check_stack_overlap( kmp_info_t *th )
00304 {
00305     int f;
00306     char *stack_beg = NULL;
00307     char *stack_end = NULL;
00308     int gtid;
00309 
00310     KA_TRACE(10,("__kmp_check_stack_overlap: called\n"));
00311     if ( __kmp_storage_map ) {
00312         stack_end = (char *) th -> th.th_info.ds.ds_stackbase;
00313         stack_beg = stack_end - th -> th.th_info.ds.ds_stacksize;
00314 
00315         gtid = __kmp_gtid_from_thread( th );
00316 
00317         if (gtid == KMP_GTID_MONITOR) {
00318             __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
00319                                      "th_%s stack (%s)", "mon",
00320                                      ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
00321         } else {
00322             __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
00323                                      "th_%d stack (%s)", gtid,
00324                                      ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" );
00325         }
00326     }
00327 
00328     /* No point in checking ubermaster threads since they use refinement and cannot overlap */
00329     if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid = __kmp_gtid_from_thread( th )))
00330     {
00331         KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n"));
00332         if ( stack_beg == NULL ) {
00333             stack_end = (char *) th -> th.th_info.ds.ds_stackbase;
00334             stack_beg = stack_end - th -> th.th_info.ds.ds_stacksize;
00335         }
00336 
00337         for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
00338             kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
00339 
00340             if( f_th && f_th != th ) {
00341                 char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
00342                 char *other_stack_beg = other_stack_end -
00343                                         (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
00344                 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
00345                    (stack_end > other_stack_beg && stack_end < other_stack_end)) {
00346 
00347                     /* Print the other stack values before the abort */
00348                     if ( __kmp_storage_map )
00349                         __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
00350                             (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
00351                             "th_%d stack (overlapped)",
00352                                                  __kmp_gtid_from_thread( f_th ) );
00353 
00354                     __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
00355                 }
00356             }
00357         }
00358     }
00359     KA_TRACE(10,("__kmp_check_stack_overlap: returning\n"));
00360 }
00361 
00362 
00363 /* ------------------------------------------------------------------------ */
00364 
00365 #ifndef KMP_DEBUG
00366 # define __kmp_static_delay( arg )     /* nothing to do */
00367 #else
00368 
00369 static void
00370 __kmp_static_delay( int arg )
00371 {
00372 /* Work around weird code-gen bug that causes assert to trip */
00373 # if KMP_ARCH_X86_64 && KMP_OS_LINUX
00374     KMP_ASSERT( arg != 0 );
00375 # else
00376     KMP_ASSERT( arg >= 0 );
00377 # endif
00378 }
00379 #endif /* KMP_DEBUG */
00380 
00381 static void
00382 __kmp_static_yield( int arg )
00383 {
00384     __kmp_yield( arg );
00385 }
00386 
00387 /*
00388  * Spin wait loop that first does pause, then yield, then sleep.
00389  * Wait until spinner is equal to checker to exit.
00390  *
00391  * A thread that calls __kmp_wait_sleep must make certain that another thread
00392  * calls __kmp_release to wake it back up up to prevent deadlocks!
00393  */
00394 
00395 void
00396 __kmp_wait_sleep( kmp_info_t *this_thr, 
00397                   volatile kmp_uint *spinner, 
00398                   kmp_uint checker, 
00399                   int final_spin
00400 )
00401 {
00402     /* note: we may not belong to a team at this point */
00403     register volatile kmp_uint    *spin      = spinner;
00404     register          kmp_uint     check     = checker;
00405     register          kmp_uint32   spins;
00406     register          int          hibernate;
00407                       int          th_gtid, th_tid;
00408 #if OMP_30_ENABLED
00409                       int          flag = FALSE;
00410 #endif /* OMP_30_ENABLED */
00411 
00412     th_gtid = this_thr->th.th_info.ds.ds_gtid;
00413 
00414     if( TCR_4(*spin) == check ) {
00415         return;
00416     }
00417 
00418 #if OMPT_SUPPORT
00419     if ((ompt_status == ompt_status_track_callback) &&
00420     (ompt_callbacks.ompt_callback(ompt_event_idle_begin))) {
00421       ompt_callbacks.ompt_callback(ompt_event_idle_begin)();
00422     }
00423 #endif
00424 
00425     KA_TRACE( 20, ("__kmp_wait_sleep: T#%d waiting for spin(%p) == %d\n",
00426                   th_gtid,
00427                   spin, check ) );
00428 
00429     /* setup for waiting */
00430     KMP_INIT_YIELD( spins );
00431 
00432     if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
00433         //
00434         // The worker threads cannot rely on the team struct existing at this
00435         // point.  Use the bt values cached in the thread struct instead.
00436         //
00437         #ifdef KMP_ADJUST_BLOCKTIME
00438             if ( __kmp_zero_bt && ! this_thr->th.th_team_bt_set ) {
00439                 /* force immediate suspend if not set by user and more threads than available procs */
00440                 hibernate = 0;
00441             } else {
00442                 hibernate = this_thr->th.th_team_bt_intervals;
00443             }
00444         #else
00445             hibernate = this_thr->th.th_team_bt_intervals;
00446         #endif /* KMP_ADJUST_BLOCKTIME */
00447         if ( hibernate == 0 ) {
00448             hibernate--;
00449         }
00450         hibernate += TCR_4( __kmp_global.g.g_time.dt.t_value );
00451 
00452         KF_TRACE( 20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
00453                       th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
00454                       hibernate - __kmp_global.g.g_time.dt.t_value ));
00455     }
00456 
00457     KMP_MB();
00458 
00459     /* main wait spin loop */
00460     while( TCR_4(*spin) != check ) {
00461         int in_pool;
00462 
00463         #if OMP_30_ENABLED
00464             //
00465             // If the task team is NULL, it means one of things:
00466             //   1) A newly-created thread is first being released by
00467             //      __kmp_fork_barrier(), and its task team has not been set up
00468             //      yet.
00469             //   2) All tasks have been executed to completion, this thread has
00470             //      decremented the task team's ref ct and possibly deallocated
00471             //      it, and should no longer reference it.
00472             //   3) Tasking is off for this region.  This could be because we
00473             //      are in a serialized region (perhaps the outer one), or else
00474             //      tasking was manually disabled (KMP_TASKING=0).
00475             //
00476             kmp_task_team_t * task_team = NULL;
00477             if ( __kmp_tasking_mode != tskm_immediate_exec ) {
00478                 task_team = this_thr->th.th_task_team;
00479                 if ( task_team != NULL ) {
00480                     if ( ! TCR_SYNC_4( task_team->tt.tt_active ) ) {
00481                         KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( this_thr->th.th_info.ds.ds_tid ) );
00482                         __kmp_unref_task_team( task_team, this_thr );
00483                     } else if ( KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
00484                         __kmp_execute_tasks( this_thr, th_gtid, spin, check, final_spin, &flag
00485                                              );
00486                     }
00487                 }; // if
00488             }; // if
00489         #endif /* OMP_30_ENABLED */
00490 
00491         if( TCR_4(__kmp_global.g.g_done) ) {
00492             if( __kmp_global.g.g_abort )
00493                 __kmp_abort_thread( );
00494             break;
00495         }
00496 
00497         __kmp_static_delay( 1 );
00498 
00499         /* if we are oversubscribed,
00500            or have waited a bit (and KMP_LIBRARY=throughput), then yield */
00501         KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
00502         // TODO: Should it be number of cores instead of thread contexts? Like:
00503         // KMP_YIELD( TCR_4(__kmp_nth) > __kmp_ncores );
00504         // Need performance improvement data to make the change...
00505         KMP_YIELD_SPIN( spins );
00506 
00507         //
00508         // Check if this thread was transferred from a team
00509         // to the thread pool (or vice-versa) while spinning.
00510         //
00511         in_pool = !!TCR_4(this_thr->th.th_in_pool);
00512         if ( in_pool != !!this_thr->th.th_active_in_pool ) {
00513             if ( in_pool ) {
00514                 //
00515                 // recently transferred from team to pool
00516                 //
00517                 KMP_TEST_THEN_INC32(
00518                   (kmp_int32 *) &__kmp_thread_pool_active_nth );
00519                 this_thr->th.th_active_in_pool = TRUE;
00520 
00521                 //
00522                 // Here, we cannot assert that
00523                 //
00524                 // KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth)
00525                 //  <= __kmp_thread_pool_nth );
00526                 //
00527                 // __kmp_thread_pool_nth is inc/dec'd by the master thread
00528                 // while the fork/join lock is held, whereas
00529                 // __kmp_thread_pool_active_nth is inc/dec'd asynchronously
00530                 // by the workers.  The two can get out of sync for brief
00531                 // periods of time.
00532                 //
00533             }
00534             else {
00535                 //
00536                 // recently transferred from pool to team
00537                 //
00538                 KMP_TEST_THEN_DEC32(
00539                   (kmp_int32 *) &__kmp_thread_pool_active_nth );
00540                 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
00541                 this_thr->th.th_active_in_pool = FALSE;
00542             }
00543         }
00544 
00545         #if OMP_30_ENABLED
00546             // Don't suspend if there is a likelihood of new tasks being spawned.
00547             if ( ( task_team != NULL ) && TCR_4(task_team->tt.tt_found_tasks) ) {
00548                 continue;
00549             }
00550         #endif /* OMP_30_ENABLED */
00551 
00552         /* Don't suspend if KMP_BLOCKTIME is set to "infinite" */
00553         if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
00554             continue;
00555         }
00556 
00557         /* if we have waited a bit more, fall asleep */
00558         if( TCR_4( __kmp_global.g.g_time.dt.t_value ) <= hibernate ) {
00559             continue;
00560         }
00561 
00562         KF_TRACE( 50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid ) );
00563 
00564         __kmp_suspend( th_gtid, spin, check );
00565 
00566         if( TCR_4( __kmp_global.g.g_done ) && __kmp_global.g.g_abort ) {
00567             __kmp_abort_thread( );
00568         }
00569 
00570         /* TODO */
00571         /* if thread is done with work and timesout, disband/free */
00572     }
00573 
00574 #if OMPT_SUPPORT
00575     if ((ompt_status == ompt_status_track_callback) &&
00576     (ompt_callbacks.ompt_callback(ompt_event_idle_end))) {
00577       ompt_callbacks.ompt_callback(ompt_event_idle_end)();
00578     }
00579 #endif
00580 
00581 }
00582 
00583 
00584 /*
00585  * Release the thread specified by target_thr from waiting by setting the location
00586  * specified by spin and resume the thread if indicated by the sleep parameter.
00587  *
00588  * A thread that calls __kmp_wait_sleep must call this function to wake up the
00589  * potentially sleeping thread and prevent deadlocks!
00590  */
00591 
00592 void
00593 __kmp_release( kmp_info_t *target_thr, volatile kmp_uint *spin,
00594                enum kmp_mem_fence_type fetchadd_fence )
00595 {
00596     kmp_uint old_spin;
00597     #ifdef KMP_DEBUG
00598         int target_gtid = target_thr->th.th_info.ds.ds_gtid;
00599         int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
00600     #endif
00601 
00602     KF_TRACE( 20, ( "__kmp_release: T#%d releasing T#%d spin(%p) fence_type(%d)\n",
00603                     gtid, target_gtid, spin, fetchadd_fence ));
00604 
00605     KMP_DEBUG_ASSERT( spin );
00606 
00607     KMP_DEBUG_ASSERT( fetchadd_fence == kmp_acquire_fence ||
00608                       fetchadd_fence == kmp_release_fence );
00609 
00610     old_spin = ( fetchadd_fence == kmp_acquire_fence )
00611                  ? KMP_TEST_THEN_ADD4_ACQ32( (volatile kmp_int32 *) spin )
00612                  : KMP_TEST_THEN_ADD4_32( (volatile kmp_int32 *) spin );
00613 
00614     KF_TRACE( 100, ( "__kmp_release: T#%d old spin(%p)=%d, set new spin=%d\n",
00615                      gtid, spin, old_spin, *spin ) );
00616 
00617     if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
00618         /* Only need to check sleep stuff if infinite block time not set */
00619         if ( old_spin & KMP_BARRIER_SLEEP_STATE ) {
00620             #ifndef KMP_DEBUG
00621                 int target_gtid = target_thr->th.th_info.ds.ds_gtid;
00622             #endif
00623             /* wake up thread if needed */
00624             KF_TRACE( 50, ( "__kmp_release: T#%d waking up thread T#%d since sleep spin(%p) set\n",
00625                             gtid, target_gtid, spin ));
00626             __kmp_resume( target_gtid, spin );
00627         } else {
00628             KF_TRACE( 50, ( "__kmp_release: T#%d don't wake up thread T#%d since sleep spin(%p) not set\n",
00629                             gtid, target_gtid, spin ));
00630         }
00631     }
00632 
00633 }
00634 
00635 /* ------------------------------------------------------------------------ */
00636 
00637 void
00638 __kmp_infinite_loop( void )
00639 {
00640     static int done = FALSE;
00641 
00642     while (! done) {
00643         KMP_YIELD( 1 );
00644     }
00645 }
00646 
00647 #define MAX_MESSAGE     512
00648 
00649 void
00650 __kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) {
00651     char buffer[MAX_MESSAGE];
00652     int node;
00653     va_list ap;
00654 
00655     va_start( ap, format);
00656     sprintf( buffer, "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format );
00657     __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
00658     __kmp_vprintf( kmp_err, buffer, ap );
00659 #if KMP_PRINT_DATA_PLACEMENT
00660     if(gtid >= 0) {
00661         if(p1 <= p2 && (char*)p2 - (char*)p1 == size) {
00662             if( __kmp_storage_map_verbose ) {
00663                 node = __kmp_get_host_node(p1);
00664                 if(node < 0)  /* doesn't work, so don't try this next time */
00665                     __kmp_storage_map_verbose = FALSE;
00666                 else {
00667                     char *last;
00668                     int lastNode;
00669                     int localProc = __kmp_get_cpu_from_gtid(gtid);
00670 
00671                     p1 = (void *)( (size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
00672                     p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)PAGE_SIZE - 1) );
00673                     if(localProc >= 0)
00674                         __kmp_printf_no_lock("  GTID %d localNode %d\n", gtid, localProc>>1);
00675                     else
00676                         __kmp_printf_no_lock("  GTID %d\n", gtid);
00677 # if KMP_USE_PRCTL  
00678 /* The more elaborate format is disabled for now because of the prctl hanging bug. */
00679                     do {
00680                         last = p1;
00681                         lastNode = node;
00682                         /* This loop collates adjacent pages with the same host node. */
00683                         do {
00684                             (char*)p1 += PAGE_SIZE;
00685                         } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
00686                         __kmp_printf_no_lock("    %p-%p memNode %d\n", last,
00687                                              (char*)p1 - 1, lastNode);
00688                     } while(p1 <= p2);
00689 # else
00690                     __kmp_printf_no_lock("    %p-%p memNode %d\n", p1,
00691                                          (char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
00692                     if(p1 < p2)  {
00693                         __kmp_printf_no_lock("    %p-%p memNode %d\n", p2,
00694                                              (char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
00695                     }
00696 # endif
00697                 }
00698             }
00699         } else
00700             __kmp_printf_no_lock("  %s\n", KMP_I18N_STR( StorageMapWarning ) );
00701     }
00702 #endif /* KMP_PRINT_DATA_PLACEMENT */
00703     __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
00704 }
00705 
00706 void
00707 __kmp_warn( char const * format, ... )
00708 {
00709     char buffer[MAX_MESSAGE];
00710     va_list ap;
00711 
00712     if ( __kmp_generate_warnings == kmp_warnings_off ) {
00713         return;
00714     }
00715 
00716     va_start( ap, format );
00717 
00718     snprintf( buffer, sizeof(buffer) , "OMP warning: %s\n", format );
00719     __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
00720     __kmp_vprintf( kmp_err, buffer, ap );
00721     __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
00722 
00723     va_end( ap );
00724 }
00725 
00726 void
00727 __kmp_abort_process()
00728 {
00729 
00730     // Later threads may stall here, but that's ok because abort() will kill them.
00731     __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
00732 
00733     if ( __kmp_debug_buf ) {
00734         __kmp_dump_debug_buffer();
00735     }; // if
00736 
00737     if ( KMP_OS_WINDOWS ) {
00738         // Let other threads know of abnormal termination and prevent deadlock
00739         // if abort happened during library initialization or shutdown
00740         __kmp_global.g.g_abort = SIGABRT;
00741 
00742         /*
00743             On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing.
00744             Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior()
00745             works well, but this function is not available in VS7 (this is not problem for DLL, but
00746             it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does
00747             not help, at least in some versions of MS C RTL.
00748 
00749             It seems following sequence is the only way to simulate abort() and avoid pop-up error
00750             box.
00751         */
00752         raise( SIGABRT );
00753         _exit( 3 );    // Just in case, if signal ignored, exit anyway.
00754     } else {
00755         abort();
00756     }; // if
00757 
00758     __kmp_infinite_loop();
00759     __kmp_release_bootstrap_lock( & __kmp_exit_lock );
00760 
00761 } // __kmp_abort_process
00762 
00763 void
00764 __kmp_abort_thread( void )
00765 {
00766     // TODO: Eliminate g_abort global variable and this function.
00767     // In case of abort just call abort(), it will kill all the threads.
00768     __kmp_infinite_loop();
00769 } // __kmp_abort_thread
00770 
00771 /* ------------------------------------------------------------------------ */
00772 
00773 /*
00774  * Print out the storage map for the major kmp_info_t thread data structures
00775  * that are allocated together.
00776  */
00777 
00778 static void
00779 __kmp_print_thread_storage_map( kmp_info_t *thr, int gtid )
00780 {
00781     __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid );
00782 
00783     __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t),
00784                              "th_%d.th_info", gtid );
00785 
00786     __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t),
00787                              "th_%d.th_local", gtid );
00788 
00789     __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
00790                              sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid );
00791 
00792     __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
00793                              &thr->th.th_bar[bs_plain_barrier+1],
00794                              sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid);
00795 
00796     __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
00797                              &thr->th.th_bar[bs_forkjoin_barrier+1],
00798                              sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid);
00799 
00800     #if KMP_FAST_REDUCTION_BARRIER
00801         __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
00802                              &thr->th.th_bar[bs_reduction_barrier+1],
00803                              sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid);
00804     #endif // KMP_FAST_REDUCTION_BARRIER
00805 }
00806 
00807 /*
00808  * Print out the storage map for the major kmp_team_t team data structures
00809  * that are allocated together.
00810  */
00811 
00812 static void
00813 __kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr )
00814 {
00815     int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2;
00816     __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d",
00817                              header, team_id );
00818 
00819     __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
00820                              sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id );
00821 
00822 
00823     __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
00824                              sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id );
00825 
00826     __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
00827                              sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id );
00828 
00829     #if KMP_FAST_REDUCTION_BARRIER
00830         __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
00831                              sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id );
00832     #endif // KMP_FAST_REDUCTION_BARRIER
00833 
00834     __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
00835                              sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id );
00836 
00837     __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
00838                              sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id );
00839 
00840     __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
00841                              sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer",
00842                              header, team_id );
00843 
00844     /*
00845     __kmp_print_storage_map_gtid( -1, &team->t.t_set_nproc[0], &team->t.t_set_nproc[num_thr],
00846                              sizeof(int) * num_thr, "%s_%d.t_set_nproc", header, team_id );
00847 
00848     __kmp_print_storage_map_gtid( -1, &team->t.t_set_dynamic[0], &team->t.t_set_dynamic[num_thr],
00849                              sizeof(int) * num_thr, "%s_%d.t_set_dynamic", header, team_id );
00850 
00851     __kmp_print_storage_map_gtid( -1, &team->t.t_set_nested[0], &team->t.t_set_nested[num_thr],
00852                              sizeof(int) * num_thr, "%s_%d.t_set_nested", header, team_id );
00853 
00854     __kmp_print_storage_map_gtid( -1, &team->t.t_set_blocktime[0], &team->t.t_set_blocktime[num_thr],
00855                              sizeof(int) * num_thr, "%s_%d.t_set_nproc", header, team_id );
00856 
00857     __kmp_print_storage_map_gtid( -1, &team->t.t_set_bt_intervals[0], &team->t.t_set_bt_intervals[num_thr],
00858                              sizeof(int) * num_thr, "%s_%d.t_set_dynamic", header, team_id );
00859 
00860     __kmp_print_storage_map_gtid( -1, &team->t.t_set_bt_set[0], &team->t.t_set_bt_set[num_thr],
00861                              sizeof(int) * num_thr, "%s_%d.t_set_nested", header, team_id );
00862 
00863 #if OMP_30_ENABLED
00864     //__kmp_print_storage_map_gtid( -1, &team->t.t_set_max_active_levels[0], &team->t.t_set_max_active_levels[num_thr],
00865     //                        sizeof(int) * num_thr, "%s_%d.t_set_max_active_levels", header, team_id );
00866 
00867     __kmp_print_storage_map_gtid( -1, &team->t.t_set_sched[0], &team->t.t_set_sched[num_thr],
00868                              sizeof(kmp_r_sched_t) * num_thr, "%s_%d.t_set_sched", header, team_id );
00869 #endif // OMP_30_ENABLED
00870 #if OMP_40_ENABLED
00871     __kmp_print_storage_map_gtid( -1, &team->t.t_set_proc_bind[0], &team->t.t_set_proc_bind[num_thr],
00872                              sizeof(kmp_proc_bind_t) * num_thr, "%s_%d.t_set_proc_bind", header, team_id );
00873 #endif
00874     */
00875 
00876     __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
00877                              sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id );
00878 }
00879 
00880 static void __kmp_init_allocator() {}
00881 static void __kmp_fini_allocator() {}
00882 static void __kmp_fini_allocator_thread() {}
00883 
00884 /* ------------------------------------------------------------------------ */
00885 
00886 #ifdef GUIDEDLL_EXPORTS
00887 # if KMP_OS_WINDOWS
00888 
00889 
00890 static void
00891 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
00892     // TODO: Change to __kmp_break_bootstrap_lock().
00893     __kmp_init_bootstrap_lock( lck ); // make the lock released
00894 }
00895 
00896 static void
00897 __kmp_reset_locks_on_process_detach( int gtid_req ) {
00898     int i;
00899     int thread_count;
00900 
00901     // PROCESS_DETACH is expected to be called by a thread
00902     // that executes ProcessExit() or FreeLibrary().
00903     // OS terminates other threads (except the one calling ProcessExit or FreeLibrary).
00904     // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock.
00905     // However, in fact, some threads can be still alive here, although being about to be terminated.
00906     // The threads in the array with ds_thread==0 are most suspicious.
00907     // Actually, it can be not safe to access the __kmp_threads[].
00908 
00909     // TODO: does it make sense to check __kmp_roots[] ?
00910 
00911     // Let's check that there are no other alive threads registered with the OMP lib.
00912     while( 1 ) {
00913         thread_count = 0;
00914         for( i = 0; i < __kmp_threads_capacity; ++i ) {
00915             if( !__kmp_threads ) continue;
00916             kmp_info_t* th = __kmp_threads[ i ];
00917             if( th == NULL ) continue;
00918             int gtid = th->th.th_info.ds.ds_gtid;
00919             if( gtid == gtid_req ) continue;
00920             if( gtid < 0 ) continue;
00921             DWORD exit_val;
00922             int alive = __kmp_is_thread_alive( th, &exit_val );
00923             if( alive ) {
00924             ++thread_count;
00925             }
00926         }
00927         if( thread_count == 0 ) break; // success
00928     }
00929 
00930     // Assume that I'm alone.
00931 
00932     // Now it might be probably safe to check and reset locks.
00933     // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset.
00934     __kmp_reset_lock( &__kmp_forkjoin_lock );
00935     #ifdef KMP_DEBUG
00936     __kmp_reset_lock( &__kmp_stdio_lock );
00937     #endif // KMP_DEBUG
00938 
00939 
00940 }
00941 
00942 BOOL WINAPI
00943 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
00944     //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
00945 
00946     switch( fdwReason ) {
00947 
00948         case DLL_PROCESS_ATTACH:
00949             KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" ));
00950 
00951             return TRUE;
00952 
00953         case DLL_PROCESS_DETACH:
00954             KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n",
00955                         __kmp_gtid_get_specific() ));
00956 
00957             if( lpReserved != NULL )
00958             {
00959                 // lpReserved is used for telling the difference:
00960                 //  lpReserved == NULL when FreeLibrary() was called,
00961                 //  lpReserved != NULL when the process terminates.
00962                 // When FreeLibrary() is called, worker threads remain alive.
00963                 // So they will release the forkjoin lock by themselves.
00964                 // When the process terminates, worker threads disappear triggering
00965                 // the problem of unreleased forkjoin lock as described below.
00966 
00967                 // A worker thread can take the forkjoin lock
00968                 // in __kmp_suspend()->__kmp_rml_decrease_load_before_sleep().
00969                 // The problem comes up if that worker thread becomes dead
00970                 // before it releases the forkjoin lock.
00971                 // The forkjoin lock remains taken, while the thread
00972                 // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below
00973                 // will try to take the forkjoin lock and will always fail,
00974                 // so that the application will never finish [normally].
00975                 // This scenario is possible if __kmpc_end() has not been executed.
00976                 // It looks like it's not a corner case, but common cases:
00977                 // - the main function was compiled by an alternative compiler;
00978                 // - the main function was compiled by icl but without /Qopenmp (application with plugins);
00979                 // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP.
00980                 // - alive foreign thread prevented __kmpc_end from doing cleanup.
00981 
00982                 // This is a hack to work around the problem.
00983                 // TODO: !!! to figure out something better.
00984                 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
00985             }
00986 
00987             __kmp_internal_end_library( __kmp_gtid_get_specific() );
00988 
00989             return TRUE;
00990 
00991         case DLL_THREAD_ATTACH:
00992             KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" ));
00993 
00994             /* if we wanted to register new siblings all the time here call
00995              * __kmp_get_gtid(); */
00996             return TRUE;
00997 
00998         case DLL_THREAD_DETACH:
00999             KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n",
01000                         __kmp_gtid_get_specific() ));
01001 
01002             __kmp_internal_end_thread( __kmp_gtid_get_specific() );
01003             return TRUE;
01004     }
01005 
01006     return TRUE;
01007 }
01008 
01009 # endif /* KMP_OS_WINDOWS */
01010 #endif /* GUIDEDLL_EXPORTS
01011 
01012 
01013 /* ------------------------------------------------------------------------ */
01014 
01015 /* Change the library type to "status" and return the old type */
01016 /* called from within initialization routines where __kmp_initz_lock is held */
01017 int
01018 __kmp_change_library( int status )
01019 {
01020     int old_status;
01021 
01022     old_status = __kmp_yield_init & 1;  // check whether KMP_LIBRARY=throughput (even init count)
01023 
01024     if (status) {
01025         __kmp_yield_init |= 1;  // throughput => turnaround (odd init count)
01026     }
01027     else {
01028         __kmp_yield_init &= ~1; // turnaround => throughput (even init count)
01029     }
01030 
01031     return old_status;  // return previous setting of whether KMP_LIBRARY=throughput
01032 }
01033 
01034 /* ------------------------------------------------------------------------ */
01035 /* ------------------------------------------------------------------------ */
01036 
01037 /* __kmp_parallel_deo --
01038  * Wait until it's our turn.
01039  */
01040 void
01041 __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
01042 {
01043     int gtid = *gtid_ref;
01044 #ifdef BUILD_PARALLEL_ORDERED
01045     kmp_team_t *team = __kmp_team_from_gtid( gtid );
01046 #endif /* BUILD_PARALLEL_ORDERED */
01047 
01048     if( __kmp_env_consistency_check ) {
01049         if( __kmp_threads[gtid] -> th.th_root -> r.r_active )
01050             __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
01051     }
01052 #ifdef BUILD_PARALLEL_ORDERED
01053     if( !team -> t.t_serialized ) {
01054         kmp_uint32  spins;
01055 
01056         KMP_MB();
01057         KMP_WAIT_YIELD(&team -> t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
01058         KMP_MB();
01059     }
01060 #endif /* BUILD_PARALLEL_ORDERED */
01061 }
01062 
01063 /* __kmp_parallel_dxo --
01064  * Signal the next task.
01065  */
01066 
01067 void
01068 __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
01069 {
01070     int gtid = *gtid_ref;
01071 #ifdef BUILD_PARALLEL_ORDERED
01072     int tid =  __kmp_tid_from_gtid( gtid );
01073     kmp_team_t *team = __kmp_team_from_gtid( gtid );
01074 #endif /* BUILD_PARALLEL_ORDERED */
01075 
01076     if( __kmp_env_consistency_check ) {
01077         if( __kmp_threads[gtid] -> th.th_root -> r.r_active )
01078             __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
01079     }
01080 #ifdef BUILD_PARALLEL_ORDERED
01081     if ( ! team -> t.t_serialized ) {
01082         KMP_MB();       /* Flush all pending memory write invalidates.  */
01083 
01084         /* use the tid of the next thread in this team */
01085         /* TODO repleace with general release procedure */
01086         team -> t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
01087 
01088 #if OMPT_SUPPORT
01089     if ((ompt_status == ompt_status_track_callback) &&
01090         (ompt_callbacks.ompt_callback(ompt_event_release_ordered))) {
01091       /* accept blame for "ordered" waiting */
01092       kmp_info_t *this_thread = __kmp_threads[gtid];
01093       ompt_callbacks.ompt_callback(ompt_event_release_ordered)
01094         (this_thread->th.ompt_thread_info.wait_id);
01095     }
01096 #endif 
01097 
01098         KMP_MB();       /* Flush all pending memory write invalidates.  */
01099     }
01100 #endif /* BUILD_PARALLEL_ORDERED */
01101 }
01102 
01103 /* ------------------------------------------------------------------------ */
01104 /* ------------------------------------------------------------------------ */
01105 
01106 /* ------------------------------------------------------------------------ */
01107 /* ------------------------------------------------------------------------ */
01108 
01109 /* The BARRIER for a SINGLE process section is always explicit   */
01110 
01111 int
01112 __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws )
01113 {
01114     int status;
01115     kmp_info_t *th;
01116     kmp_team_t *team;
01117 
01118     if( ! TCR_4(__kmp_init_parallel) )
01119         __kmp_parallel_initialize();
01120 
01121     th   = __kmp_threads[ gtid ];
01122     team = th -> th.th_team;
01123     status = 0;
01124 
01125     th->th.th_ident = id_ref;
01126 
01127     if ( team -> t.t_serialized ) {
01128         status = 1;
01129     } else {
01130         kmp_int32 old_this = th->th.th_local.this_construct;
01131 
01132         ++th->th.th_local.this_construct;
01133         /* try to set team count to thread count--success means thread got the
01134            single block
01135         */
01136         /* TODO: Should this be acquire or release? */
01137         status = KMP_COMPARE_AND_STORE_ACQ32(&team -> t.t_construct, old_this,
01138                                              th->th.th_local.this_construct);
01139     }
01140 
01141     if( __kmp_env_consistency_check ) {
01142         if (status && push_ws) {
01143             __kmp_push_workshare( gtid, ct_psingle, id_ref );
01144         } else {
01145             __kmp_check_workshare( gtid, ct_psingle, id_ref );
01146         }
01147     }
01148     return status;
01149 }
01150 
01151 void
01152 __kmp_exit_single( int gtid )
01153 {
01154     if( __kmp_env_consistency_check )
01155         __kmp_pop_workshare( gtid, ct_psingle, NULL );
01156 }
01157 
01158 
01159 /* ------------------------------------------------------------------------ */
01160 /* ------------------------------------------------------------------------ */
01161 
01162 static void
01163 __kmp_linear_barrier_gather( enum barrier_type bt,
01164                              kmp_info_t *this_thr, 
01165                              int gtid,
01166                              int tid, 
01167                              void (*reduce)(void *, void *)
01168                              )
01169 {
01170     register kmp_team_t    *team          = this_thr -> th.th_team;
01171     register kmp_bstate_t  *thr_bar       = & this_thr -> th.th_bar[ bt ].bb;
01172     register kmp_info_t   **other_threads = team -> t.t_threads;
01173 
01174     KA_TRACE( 20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
01175                    gtid, team->t.t_id, tid, bt ) );
01176 
01177     KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
01178 
01179     /*
01180      * We now perform a linear reduction to signal that all
01181      * of the threads have arrived.
01182      *
01183      * Collect all the worker team member threads.
01184      */
01185     if ( ! KMP_MASTER_TID( tid )) {
01186 
01187         KA_TRACE( 20, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
01188                         "arrived(%p): %u => %u\n",
01189                         gtid, team->t.t_id, tid,
01190                         __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
01191                         &thr_bar -> b_arrived, thr_bar -> b_arrived,
01192                         thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
01193                       ) );
01194 
01195         /* mark arrival to master thread */
01196         //
01197         // After performing this write, a worker thread may not assume that
01198         // the team is valid any more - it could be deallocated by the master
01199         // thread at any time.
01200         //
01201         __kmp_release( other_threads[0], &thr_bar -> b_arrived, kmp_release_fence );
01202 
01203     } else {
01204         register kmp_balign_team_t *team_bar  = & team -> t.t_bar[ bt ];
01205         register int                nproc     = this_thr -> th.th_team_nproc;
01206         register int                i;
01207         register kmp_uint           new_state;
01208 
01209         /* Don't have to worry about sleep bit here or atomic since team setting */
01210         new_state = team_bar -> b_arrived + KMP_BARRIER_STATE_BUMP;
01211 
01212         /* Collect all the worker team member threads. */
01213         for (i = 1; i < nproc; i++) {
01214 #if KMP_CACHE_MANAGE
01215             /* prefetch next thread's arrived count */
01216             if ( i+1 < nproc )
01217                 KMP_CACHE_PREFETCH( &other_threads[ i+1 ] -> th.th_bar[ bt ].bb.b_arrived );
01218 #endif /* KMP_CACHE_MANAGE */
01219             KA_TRACE( 20, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
01220                             "arrived(%p) == %u\n",
01221                             gtid, team->t.t_id, tid,
01222                             __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
01223                             &other_threads[i] -> th.th_bar[ bt ].bb.b_arrived,
01224                             new_state ) );
01225 
01226             /* wait for worker thread to arrive */
01227             __kmp_wait_sleep( this_thr,
01228                               & other_threads[ i ] -> th.th_bar[ bt ].bb.b_arrived,
01229                               new_state, FALSE
01230                               );
01231 
01232             if (reduce) {
01233 
01234                 KA_TRACE( 100, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
01235                                  gtid, team->t.t_id, tid,
01236                                  __kmp_gtid_from_tid( i, team ), team->t.t_id, i ) );
01237 
01238                 (*reduce)( this_thr -> th.th_local.reduce_data,
01239                            other_threads[ i ] -> th.th_local.reduce_data );
01240 
01241             }
01242 
01243         }
01244 
01245         /* Don't have to worry about sleep bit here or atomic since team setting */
01246         team_bar -> b_arrived = new_state;
01247         KA_TRACE( 20, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
01248                         "arrived(%p) = %u\n",
01249                         gtid, team->t.t_id, tid, team->t.t_id,
01250                         &team_bar -> b_arrived, new_state ) );
01251     }
01252 
01253     KA_TRACE( 20, ( "__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
01254                     gtid, team->t.t_id, tid, bt ) );
01255 }
01256 
01257 
01258 static void
01259 __kmp_tree_barrier_gather( enum barrier_type bt, 
01260                            kmp_info_t *this_thr, 
01261                            int gtid, 
01262                            int tid,
01263                            void (*reduce) (void *, void *)
01264                            )
01265 {
01266     register kmp_team_t    *team          = this_thr -> th.th_team;
01267     register kmp_bstate_t  *thr_bar       = & this_thr -> th.th_bar[ bt ].bb;
01268     register kmp_info_t   **other_threads = team -> t.t_threads;
01269     register kmp_uint32     nproc         = this_thr -> th.th_team_nproc;
01270     register kmp_uint32     branch_bits   = __kmp_barrier_gather_branch_bits[ bt ];
01271     register kmp_uint32     branch_factor = 1 << branch_bits ;
01272     register kmp_uint32     child;
01273     register kmp_int32      child_tid;
01274     register kmp_uint       new_state;
01275 
01276     KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
01277                     gtid, team->t.t_id, tid, bt ) );
01278 
01279     KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
01280 
01281     /*
01282      * We now perform a tree gather to wait until all
01283      * of the threads have arrived, and reduce any required data
01284      * as we go.
01285      */
01286 
01287     child_tid = (tid << branch_bits) + 1;
01288 
01289     if ( child_tid < nproc ) {
01290 
01291         /* parent threads wait for all their children to arrive */
01292         new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
01293         child = 1;
01294 
01295         do {
01296             register kmp_info_t   *child_thr = other_threads[ child_tid ];
01297             register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
01298 #if KMP_CACHE_MANAGE
01299             /* prefetch next thread's arrived count */
01300             if ( child+1 <= branch_factor && child_tid+1 < nproc )
01301                 KMP_CACHE_PREFETCH( &other_threads[ child_tid+1 ] -> th.th_bar[ bt ].bb.b_arrived );
01302 #endif /* KMP_CACHE_MANAGE */
01303             KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
01304                             "arrived(%p) == %u\n",
01305                             gtid, team->t.t_id, tid,
01306                             __kmp_gtid_from_tid( child_tid, team ), team->t.t_id, child_tid,
01307                             &child_bar -> b_arrived, new_state ) );
01308 
01309             /* wait for child to arrive */
01310             __kmp_wait_sleep( this_thr, &child_bar -> b_arrived, new_state, FALSE
01311                               );
01312 
01313             if (reduce) {
01314 
01315                 KA_TRACE( 100, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
01316                                  gtid, team->t.t_id, tid,
01317                                  __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
01318                                  child_tid ) );
01319 
01320                 (*reduce)( this_thr -> th.th_local.reduce_data,
01321                            child_thr -> th.th_local.reduce_data );
01322 
01323             }
01324 
01325             child++;
01326             child_tid++;
01327         }
01328         while ( child <= branch_factor && child_tid < nproc );
01329     }
01330 
01331     if ( !KMP_MASTER_TID(tid) ) {
01332         /* worker threads */
01333         register kmp_int32 parent_tid = (tid - 1) >> branch_bits;
01334 
01335         KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
01336                         "arrived(%p): %u => %u\n",
01337                         gtid, team->t.t_id, tid,
01338                         __kmp_gtid_from_tid( parent_tid, team ), team->t.t_id, parent_tid,
01339                         &thr_bar -> b_arrived, thr_bar -> b_arrived,
01340                         thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
01341                       ) );
01342 
01343         /* mark arrival to parent thread */
01344         //
01345         // After performing this write, a worker thread may not assume that
01346         // the team is valid any more - it could be deallocated by the master
01347         // thread at any time.
01348         //
01349         __kmp_release( other_threads[parent_tid], &thr_bar -> b_arrived, kmp_release_fence );
01350 
01351     } else {
01352         /* Need to update the team arrived pointer if we are the master thread */
01353 
01354         if ( nproc > 1 )
01355             /* New value was already computed in above loop */
01356             team -> t.t_bar[ bt ].b_arrived = new_state;
01357         else
01358             team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP;
01359 
01360         KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %u\n",
01361                         gtid, team->t.t_id, tid, team->t.t_id,
01362                         &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived ) );
01363     }
01364 
01365     KA_TRACE( 20, ( "__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
01366                     gtid, team->t.t_id, tid, bt ) );
01367 }
01368 
01369 
01370 static void
01371 __kmp_hyper_barrier_gather( enum barrier_type bt, 
01372                             kmp_info_t *this_thr, 
01373                             int gtid, 
01374                             int tid,
01375                             void (*reduce) (void *, void *)
01376                             )
01377 {
01378     register kmp_team_t    *team          = this_thr -> th.th_team;
01379     register kmp_bstate_t  *thr_bar       = & this_thr -> th.th_bar[ bt ].bb;
01380     register kmp_info_t   **other_threads = team -> t.t_threads;
01381     register kmp_uint       new_state     = KMP_BARRIER_UNUSED_STATE;
01382     register kmp_uint32     num_threads   = this_thr -> th.th_team_nproc;
01383     register kmp_uint32     branch_bits   = __kmp_barrier_gather_branch_bits[ bt ];
01384     register kmp_uint32     branch_factor = 1 << branch_bits ;
01385     register kmp_uint32     offset;
01386     register kmp_uint32     level;
01387 
01388     KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
01389                     gtid, team->t.t_id, tid, bt ) );
01390 
01391     KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
01392 
01393     /*
01394      * We now perform a hypercube-embedded tree gather to wait until all
01395      * of the threads have arrived, and reduce any required data
01396      * as we go.
01397      */
01398 
01399     for ( level=0, offset =1;
01400           offset < num_threads;
01401           level += branch_bits, offset <<= branch_bits )
01402     {
01403         register kmp_uint32     child;
01404         register kmp_int32  child_tid;
01405 
01406         if ( ((tid >> level) & (branch_factor - 1)) != 0 ) {
01407             register kmp_int32 parent_tid = tid & ~( (1 << (level + branch_bits)) -1 );
01408 
01409             KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
01410                             "arrived(%p): %u => %u\n",
01411                             gtid, team->t.t_id, tid,
01412                             __kmp_gtid_from_tid( parent_tid, team ), team->t.t_id, parent_tid,
01413                             &thr_bar -> b_arrived, thr_bar -> b_arrived,
01414                             thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
01415                           ) );
01416 
01417             /* mark arrival to parent thread */
01418             //
01419             // After performing this write (in the last iteration of the
01420             // enclosing for loop), a worker thread may not assume that the
01421             // team is valid any more - it could be deallocated by the master
01422             // thread at any time.
01423             //
01424             __kmp_release( other_threads[parent_tid], &thr_bar -> b_arrived, kmp_release_fence );
01425             break;
01426         }
01427 
01428         /* parent threads wait for children to arrive */
01429 
01430         for ( child = 1, child_tid = tid + (1 << level);
01431               child < branch_factor && child_tid < num_threads;
01432               child++, child_tid += (1 << level) )
01433         {
01434             register kmp_info_t   *child_thr = other_threads[ child_tid ];
01435             register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
01436 #if KMP_CACHE_MANAGE
01437             register kmp_uint32 next_child_tid = child_tid + (1 << level);
01438             /* prefetch next thread's arrived count */
01439             if ( child+1 < branch_factor && next_child_tid < num_threads )
01440                 KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ] -> th.th_bar[ bt ].bb.b_arrived );
01441 #endif /* KMP_CACHE_MANAGE */
01442             /* Only read this arrived flag once per thread that needs it */
01443             if (new_state == KMP_BARRIER_UNUSED_STATE)
01444                 new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
01445 
01446             KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
01447                             "arrived(%p) == %u\n",
01448                             gtid, team->t.t_id, tid,
01449                             __kmp_gtid_from_tid( child_tid, team ), team->t.t_id, child_tid,
01450                             &child_bar -> b_arrived, new_state ) );
01451 
01452             /* wait for child to arrive */
01453             __kmp_wait_sleep( this_thr, &child_bar -> b_arrived, new_state, FALSE
01454                               );
01455 
01456             if (reduce) {
01457 
01458                 KA_TRACE( 100, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
01459                                  gtid, team->t.t_id, tid,
01460                                  __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
01461                                  child_tid ) );
01462 
01463                 (*reduce)( this_thr -> th.th_local.reduce_data,
01464                            child_thr -> th.th_local.reduce_data );
01465 
01466             }
01467         }
01468     }
01469 
01470 
01471     if ( KMP_MASTER_TID(tid) ) {
01472         /* Need to update the team arrived pointer if we are the master thread */
01473 
01474         if (new_state == KMP_BARRIER_UNUSED_STATE)
01475             team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP;
01476         else
01477             team -> t.t_bar[ bt ].b_arrived = new_state;
01478 
01479         KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %u\n",
01480                         gtid, team->t.t_id, tid, team->t.t_id,
01481                         &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived ) );
01482     }
01483 
01484     KA_TRACE( 20, ( "__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
01485                     gtid, team->t.t_id, tid, bt ) );
01486 
01487 }
01488 
01489 static void
01490 __kmp_linear_barrier_release( enum barrier_type bt, 
01491                               kmp_info_t *this_thr, 
01492                               int gtid, 
01493                               int tid, 
01494                               int propagate_icvs
01495                               )
01496 {
01497     register kmp_bstate_t *thr_bar = &this_thr -> th.th_bar[ bt ].bb;
01498     register kmp_team_t *team;
01499 
01500     if (KMP_MASTER_TID( tid )) {
01501         register int i;
01502         register kmp_uint32 nproc = this_thr -> th.th_team_nproc;
01503         register kmp_info_t **other_threads;
01504 
01505         team = __kmp_threads[ gtid ]-> th.th_team;
01506         KMP_DEBUG_ASSERT( team != NULL );
01507         other_threads = team -> t.t_threads;
01508 
01509         KA_TRACE( 20, ( "__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
01510           gtid, team->t.t_id, tid, bt ) );
01511 
01512         /* release all of the worker threads */
01513         if (nproc > 1) {
01514             for (i = 1; i < nproc; i++) {
01515 #if KMP_CACHE_MANAGE
01516                 /* prefetch next thread's go flag */
01517                 if( i+1 < nproc )
01518                     KMP_CACHE_PREFETCH( &other_threads[ i+1 ]-> th.th_bar[ bt ].bb.b_go );
01519 #endif /* KMP_CACHE_MANAGE */
01520 
01521 #if KMP_BARRIER_ICV_PUSH
01522                 if ( propagate_icvs ) {
01523                     __kmp_init_implicit_task( team->t.t_ident,
01524                       team->t.t_threads[i], team, i, FALSE );
01525                     copy_icvs( &team->t.t_implicit_task_taskdata[i].td_icvs,
01526                       &team->t.t_implicit_task_taskdata[0].td_icvs );
01527                 }
01528 #endif // KMP_BARRIER_ICV_PUSH
01529 
01530                 KA_TRACE( 20, ( "__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
01531                                 "go(%p): %u => %u\n",
01532                                 gtid, team->t.t_id, tid,
01533                                 other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i,
01534                                 &other_threads[i]->th.th_bar[bt].bb.b_go,
01535                                 other_threads[i]->th.th_bar[bt].bb.b_go,
01536                                 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP
01537                                 ) );
01538 
01539                 __kmp_release( other_threads[ i ],
01540                                &other_threads[ i ]-> th.th_bar[ bt ].bb.b_go, kmp_acquire_fence );
01541             }
01542         }
01543     } else {
01544         /* Wait for the MASTER thread to release us */
01545 
01546         KA_TRACE( 20, ( "__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
01547           gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
01548 
01549         __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
01550                           );
01551 
01552         //
01553         // early exit for reaping threads releasing forkjoin barrier
01554         //
01555         if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
01556             return;
01557 
01558         //
01559         // The worker thread may now assume that the team is valid.
01560         //
01561         #ifdef KMP_DEBUG
01562             tid = __kmp_tid_from_gtid( gtid );
01563             team = __kmp_threads[ gtid ]-> th.th_team;
01564         #endif
01565         KMP_DEBUG_ASSERT( team != NULL );
01566 
01567         TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
01568         KA_TRACE( 20, ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
01569           gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
01570 
01571         KMP_MB();       /* Flush all pending memory write invalidates.  */
01572     }
01573 
01574     KA_TRACE( 20, ( "__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
01575       gtid, team->t.t_id, tid, bt ) );
01576 }
01577 
01578 
01579 static void
01580 __kmp_tree_barrier_release( enum barrier_type bt, 
01581                             kmp_info_t *this_thr, 
01582                             int gtid,
01583                             int tid, 
01584                             int propagate_icvs
01585                             )
01586 {
01587     /* handle fork barrier workers who aren't part of a team yet */
01588     register kmp_team_t    *team;
01589     register kmp_bstate_t  *thr_bar       = & this_thr -> th.th_bar[ bt ].bb;
01590     register kmp_uint32     nproc;
01591     register kmp_uint32     branch_bits   = __kmp_barrier_release_branch_bits[ bt ];
01592     register kmp_uint32     branch_factor = 1 << branch_bits ;
01593     register kmp_uint32     child;
01594     register kmp_int32      child_tid;
01595 
01596     /*
01597      * We now perform a tree release for all
01598      * of the threads that have been gathered
01599      */
01600 
01601     if ( ! KMP_MASTER_TID( tid )) {
01602         /* worker threads */
01603 
01604         KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n",
01605           gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
01606 
01607         /* wait for parent thread to release us */
01608         __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
01609                           );
01610 
01611         //
01612         // early exit for reaping threads releasing forkjoin barrier
01613         //
01614         if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
01615             return;
01616 
01617         //
01618         // The worker thread may now assume that the team is valid.
01619         //
01620         team = __kmp_threads[ gtid ]-> th.th_team;
01621         KMP_DEBUG_ASSERT( team != NULL );
01622         tid = __kmp_tid_from_gtid( gtid );
01623 
01624         TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
01625         KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
01626           gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
01627 
01628         KMP_MB();       /* Flush all pending memory write invalidates.  */
01629 
01630     } else {
01631         team = __kmp_threads[ gtid ]-> th.th_team;
01632         KMP_DEBUG_ASSERT( team != NULL );
01633 
01634         KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
01635           gtid, team->t.t_id, tid, bt ) );
01636     }
01637 
01638 
01639     nproc     = this_thr -> th.th_team_nproc;
01640     child_tid = ( tid << branch_bits ) + 1;
01641 
01642     if ( child_tid < nproc ) {
01643         register kmp_info_t **other_threads = team -> t.t_threads;
01644         child = 1;
01645         /* parent threads release all their children */
01646 
01647         do {
01648             register kmp_info_t   *child_thr = other_threads[ child_tid ];
01649             register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
01650 #if KMP_CACHE_MANAGE
01651             /* prefetch next thread's go count */
01652             if ( child+1 <= branch_factor && child_tid+1 < nproc )
01653                 KMP_CACHE_PREFETCH( &other_threads[ child_tid+1 ] -> th.th_bar[ bt ].bb.b_go );
01654 #endif /* KMP_CACHE_MANAGE */
01655 
01656 #if KMP_BARRIER_ICV_PUSH
01657             if ( propagate_icvs ) {
01658                 __kmp_init_implicit_task( team->t.t_ident,
01659                   team->t.t_threads[child_tid], team, child_tid, FALSE );
01660                 copy_icvs( &team->t.t_implicit_task_taskdata[child_tid].td_icvs,
01661                   &team->t.t_implicit_task_taskdata[0].td_icvs );
01662             }
01663 #endif // KMP_BARRIER_ICV_PUSH
01664 
01665             KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)"
01666                             "go(%p): %u => %u\n",
01667                             gtid, team->t.t_id, tid,
01668                             __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
01669                             child_tid, &child_bar -> b_go, child_bar -> b_go,
01670                             child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
01671 
01672             /* release child from barrier */
01673             __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
01674 
01675             child++;
01676             child_tid++;
01677         }
01678         while ( child <= branch_factor && child_tid < nproc );
01679     }
01680 
01681     KA_TRACE( 20, ( "__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
01682       gtid, team->t.t_id, tid, bt ) );
01683 }
01684 
01685 /* The reverse versions seem to beat the forward versions overall */
01686 #define KMP_REVERSE_HYPER_BAR
01687 #ifdef KMP_REVERSE_HYPER_BAR
01688 static void
01689 __kmp_hyper_barrier_release( enum barrier_type bt, 
01690                              kmp_info_t *this_thr, 
01691                              int gtid,
01692                              int tid, 
01693                              int propagate_icvs
01694                              )
01695 {
01696     /* handle fork barrier workers who aren't part of a team yet */
01697     register kmp_team_t    *team;
01698     register kmp_bstate_t  *thr_bar       = & this_thr -> th.th_bar[ bt ].bb;
01699     register kmp_info_t   **other_threads;
01700     register kmp_uint32     num_threads;
01701     register kmp_uint32     branch_bits   = __kmp_barrier_release_branch_bits[ bt ];
01702     register kmp_uint32     branch_factor = 1 << branch_bits;
01703     register kmp_uint32     child;
01704     register kmp_int32      child_tid;
01705     register kmp_uint32     offset;
01706     register kmp_uint32     level;
01707 
01708     /*
01709      * We now perform a hypercube-embedded tree release for all
01710      * of the threads that have been gathered, but in the exact
01711      * reverse order from the corresponding gather (for load balance.
01712      */
01713 
01714     if ( ! KMP_MASTER_TID( tid )) {
01715         /* worker threads */
01716 
01717         KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
01718           gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
01719 
01720         /* wait for parent thread to release us */
01721         __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
01722                           );
01723 
01724         //
01725         // early exit for reaping threads releasing forkjoin barrier
01726         //
01727         if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
01728             return;
01729 
01730         //
01731         // The worker thread may now assume that the team is valid.
01732         //
01733         team = __kmp_threads[ gtid ]-> th.th_team;
01734         KMP_DEBUG_ASSERT( team != NULL );
01735         tid = __kmp_tid_from_gtid( gtid );
01736 
01737         TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
01738         KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
01739           gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
01740 
01741         KMP_MB();       /* Flush all pending memory write invalidates.  */
01742 
01743     } else {  /* KMP_MASTER_TID(tid) */
01744         team = __kmp_threads[ gtid ]-> th.th_team;
01745         KMP_DEBUG_ASSERT( team != NULL );
01746 
01747         KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
01748           gtid, team->t.t_id, tid, bt ) );
01749     }
01750 
01751     num_threads = this_thr -> th.th_team_nproc;
01752     other_threads = team -> t.t_threads;
01753 
01754     /* count up to correct level for parent */
01755     for ( level = 0, offset = 1;
01756           offset < num_threads && (((tid >> level) & (branch_factor-1)) == 0);
01757           level += branch_bits, offset <<= branch_bits );
01758 
01759     /* now go down from there */
01760     for ( level -= branch_bits, offset >>= branch_bits;
01761           offset != 0;
01762           level -= branch_bits, offset >>= branch_bits )
01763     {
01764         register kmp_uint32 child;
01765         register kmp_int32  child_tid;
01766 
01767         /* Now go in reverse order through the children, highest to lowest.
01768            Initial setting of child is conservative here. */
01769         child = num_threads >> ((level==0)?level:level-1);
01770         for ( child = (child < branch_factor-1) ? child : branch_factor-1,
01771                   child_tid = tid + (child << level);
01772               child >= 1;
01773               child--, child_tid -= (1 << level) )
01774         {
01775 
01776             if ( child_tid >= num_threads ) continue;   /* child doesn't exist so keep going */
01777             else {
01778                 register kmp_info_t   *child_thr = other_threads[ child_tid ];
01779                 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
01780 #if KMP_CACHE_MANAGE
01781                 register kmp_uint32 next_child_tid = child_tid - (1 << level);
01782                 /* prefetch next thread's go count */
01783                 if ( child-1 >= 1 && next_child_tid < num_threads )
01784                     KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go );
01785 #endif /* KMP_CACHE_MANAGE */
01786 
01787 #if KMP_BARRIER_ICV_PUSH
01788                 if ( propagate_icvs ) {
01789                     KMP_DEBUG_ASSERT( team != NULL );
01790                     __kmp_init_implicit_task( team->t.t_ident,
01791                       team->t.t_threads[child_tid], team, child_tid, FALSE );
01792                     copy_icvs( &team->t.t_implicit_task_taskdata[child_tid].td_icvs,
01793                       &team->t.t_implicit_task_taskdata[0].td_icvs );
01794                 }
01795 #endif // KMP_BARRIER_ICV_PUSH
01796                 KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)"
01797                                 "go(%p): %u => %u\n",
01798                                 gtid, team->t.t_id, tid,
01799                                 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
01800                                 child_tid, &child_bar -> b_go, child_bar -> b_go,
01801                                 child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
01802 
01803                 /* release child from barrier */
01804                 __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
01805             }
01806         }
01807     }
01808 
01809     KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
01810       gtid, team->t.t_id, tid, bt ) );
01811 }
01812 
01813 #else /* !KMP_REVERSE_HYPER_BAR */
01814 
01815 static void
01816 __kmp_hyper_barrier_release( enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, int propagate_icvs )
01817 {
01818     /* handle fork barrier workers who aren't part of a team yet */
01819     register kmp_team_t    *team;
01820     register kmp_bstate_t  *thr_bar       = & this_thr -> th.th_bar[ bt ].bb;
01821     register kmp_info_t   **other_threads;
01822     register kmp_uint32     num_threads;
01823     register kmp_uint32     branch_bits   = __kmp_barrier_release_branch_bits[ bt ];
01824     register kmp_uint32     branch_factor = 1 << branch_bits;
01825     register kmp_uint32     child;
01826     register kmp_int32      child_tid;
01827     register kmp_uint32     offset;
01828     register kmp_uint32     level;
01829 
01830     /*
01831      * We now perform a hypercube-embedded tree release for all
01832      * of the threads that have been gathered, but in the same order
01833      * as the gather.
01834      */
01835 
01836     if ( ! KMP_MASTER_TID( tid )) {
01837         /* worker threads */
01838 
01839         KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
01840           gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
01841 
01842         /* wait for parent thread to release us */
01843         __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE, NULL );
01844 
01845         //
01846         // early exit for reaping threads releasing forkjoin barrier
01847         //
01848         if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
01849             return;
01850 
01851         //
01852         // The worker thread may now assume that the team is valid.
01853         //
01854         team = __kmp_threads[ gtid ]-> th.th_team;
01855         KMP_DEBUG_ASSERT( team != NULL );
01856         tid = __kmp_tid_from_gtid( gtid );
01857 
01858         TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
01859         KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
01860                         gtid, ( team != NULL ) ? team->t.t_id : -1, tid,
01861                         &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
01862 
01863         KMP_MB();       /* Flush all pending memory write invalidates.  */
01864 
01865     } else {  /* KMP_MASTER_TID(tid) */
01866         team = __kmp_threads[ gtid ]-> th.th_team;
01867         KMP_DEBUG_ASSERT( team != NULL );
01868 
01869         KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) enter for barrier type %d\n",
01870           gtid, team->t.t_id, tid, bt ) );
01871     }
01872 
01873     /* Now set up team parameters since workers have been released */
01874     if ( team == NULL )  {
01875         /* handle fork barrier workers who are now part of a team */
01876         tid = __kmp_tid_from_gtid( gtid );
01877         team = __kmp_threads[ gtid ]-> th.th_team;
01878     }
01879     num_threads = this_thr -> th.th_team_nproc;
01880     other_threads = team -> t.t_threads;
01881 
01882     /* Go down the tree, level by level */
01883     for ( level = 0, offset = 1;
01884           offset < num_threads;
01885           level += branch_bits, offset <<= branch_bits )
01886     {
01887         register kmp_uint32 child;
01888         register kmp_int32  child_tid;
01889 
01890         if (((tid >> level) & (branch_factor - 1)) != 0)
01891             /* No need to go any lower than this, since this is the level
01892                parent would be notified */
01893             break;
01894 
01895         /* iterate through children on this level of the tree */
01896         for ( child = 1, child_tid = tid + (1 << level);
01897               child < branch_factor && child_tid < num_threads;
01898               child++, child_tid += (1 << level) )
01899         {
01900             register kmp_info_t   *child_thr = other_threads[ child_tid ];
01901             register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
01902 #if KMP_CACHE_MANAGE
01903             {
01904                 register kmp_uint32 next_child_tid = child_tid + (1 << level);
01905                 /* prefetch next thread's go count */
01906                 if ( child+1 < branch_factor && next_child_tid < num_threads )
01907                     KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go );
01908             }
01909 #endif /* KMP_CACHE_MANAGE */
01910 
01911 #if KMP_BARRIER_ICV_PUSH
01912             if ( propagate_icvs ) {
01913                 KMP_DEBUG_ASSERT( team != NULL );
01914                 __kmp_init_implicit_task( team->t.t_ident,
01915                   team->t.t_threads[child_tid], team, child_tid, FALSE );
01916                 copy_icvs( &team->t.t_implicit_task_taskdata[child_tid].td_icvs,
01917                   &team->t.t_implicit_task_taskdata[0].td_icvs );
01918             }
01919 #endif // KMP_BARRIER_ICV_PUSH
01920 
01921             KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) releasing "
01922                             "T#%d(%d:%d) go(%p): %u => %u\n",
01923                             gtid, team->t.t_id, tid,
01924                             __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
01925                             child_tid, &child_bar -> b_go, child_bar -> b_go,
01926                             child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
01927 
01928             /* release child from barrier */
01929             __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
01930         }
01931     }
01932 
01933     KA_TRACE( 20, ( "__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
01934       gtid, team->t.t_id, tid, bt ) );
01935 }
01936 #endif /* KMP_REVERSE_HYPER_BAR */
01937 
01938 
01939 /*
01940  * Internal function to do a barrier.
01941  * If is_split is true, do a split barrier, otherwise, do a plain barrier
01942  * If reduce is non-NULL, do a split reduction barrier, otherwise, do a split barrier
01943  * Returns 0 if master thread, 1 if worker thread.
01944  */
01945 int
01946 __kmp_barrier( enum barrier_type bt, int gtid, int is_split,
01947                size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *) )
01948 {
01949     register int          tid             = __kmp_tid_from_gtid( gtid );
01950     register kmp_info_t  *this_thr        = __kmp_threads[ gtid ];
01951     register kmp_team_t  *team            = this_thr -> th.th_team;
01952     register int status = 0;
01953     ompt_task_id_t my_task_id;
01954     ompt_parallel_id_t my_parallel_id;
01955 
01956     KA_TRACE( 15, ( "__kmp_barrier: T#%d(%d:%d) has arrived\n",
01957                     gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) );
01958 #if OMPT_SUPPORT
01959     if ((ompt_status & ompt_status_track)) {
01960       my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id;
01961       my_parallel_id = team->t.ompt_team_info.parallel_id;
01962       
01963       this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
01964       if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) {
01965     if ((ompt_status == ompt_status_track_callback)) {
01966       if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) {
01967         ompt_callbacks.ompt_callback(ompt_event_single_others_end)
01968               (my_parallel_id, my_task_id);
01969       }
01970     }
01971       }
01972       if ((ompt_status == ompt_status_track_callback) &&
01973       ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
01974     ompt_callbacks.ompt_callback(ompt_event_barrier_begin)
01975           (my_parallel_id, my_task_id);
01976       }
01977     }
01978 #endif
01979 
01980     if ( ! team->t.t_serialized ) {
01981         #if OMP_30_ENABLED
01982             if ( __kmp_tasking_mode == tskm_extra_barrier ) {
01983                 __kmp_tasking_barrier( team, this_thr, gtid );
01984                 KA_TRACE( 15, ( "__kmp_barrier: T#%d(%d:%d) past tasking barrier\n",
01985                                gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) );
01986             }
01987         #endif /* OMP_30_ENABLED */
01988 
01989         //
01990         // Copy the blocktime info to the thread, where __kmp_wait_sleep()
01991         // can access it when the team struct is not guaranteed to exist.
01992         //
01993         // See the note about the corresponding code in __kmp_join_barrier()
01994         // being performance-critical.
01995         //
01996         if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
01997             #if OMP_30_ENABLED
01998                 this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
01999                 this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
02000             #else
02001                 this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
02002                 this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
02003             #endif // OMP_30_ENABLED
02004         }
02005 
02006 
02007         if ( reduce != NULL ) {
02008             //KMP_DEBUG_ASSERT( is_split == TRUE );  // #C69956
02009             this_thr -> th.th_local.reduce_data = reduce_data;
02010         }
02011         if ( __kmp_barrier_gather_pattern[ bt ] == bp_linear_bar || __kmp_barrier_gather_branch_bits[ bt ] == 0 ) {
02012             __kmp_linear_barrier_gather( bt, this_thr, gtid, tid, reduce
02013                                          );
02014         } else if ( __kmp_barrier_gather_pattern[ bt ] == bp_tree_bar ) {
02015             __kmp_tree_barrier_gather( bt, this_thr, gtid, tid, reduce
02016                                        );
02017         } else {
02018             __kmp_hyper_barrier_gather( bt, this_thr, gtid, tid, reduce
02019                                         );
02020         }; // if
02021 
02022 
02023         KMP_MB();
02024 
02025         if ( KMP_MASTER_TID( tid ) ) {
02026             status = 0;
02027 
02028             #if OMP_30_ENABLED
02029                 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02030                     __kmp_task_team_wait(  this_thr, team
02031                                            );
02032                     __kmp_task_team_setup( this_thr, team );
02033                 }
02034             #endif /* OMP_30_ENABLED */
02035 
02036         } else {
02037             status = 1;
02038         }
02039         if ( status == 1 || ! is_split ) {
02040             if ( __kmp_barrier_release_pattern[ bt ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bt ] == 0 ) {
02041                 __kmp_linear_barrier_release( bt, this_thr, gtid, tid, FALSE
02042                                               );
02043             } else if ( __kmp_barrier_release_pattern[ bt ] == bp_tree_bar ) {
02044                 __kmp_tree_barrier_release( bt, this_thr, gtid, tid, FALSE
02045                                             );
02046             } else {
02047                 __kmp_hyper_barrier_release( bt, this_thr, gtid, tid, FALSE
02048                                              );
02049             }
02050             #if OMP_30_ENABLED
02051                 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02052                     __kmp_task_team_sync( this_thr, team );
02053                 }
02054             #endif /* OMP_30_ENABLED */
02055         }
02056 
02057 
02058     } else {    // Team is serialized.
02059 
02060         status = 0;
02061 
02062         #if OMP_30_ENABLED
02063             if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02064                 //
02065                 // The task team should be NULL for serialized code.
02066                 // (tasks will be executed immediately).
02067                 //
02068                 KMP_DEBUG_ASSERT( team->t.t_task_team == NULL );
02069                 KMP_DEBUG_ASSERT( this_thr->th.th_task_team == NULL );
02070             }
02071         #endif /* OMP_30_ENABLED */
02072     }
02073 
02074     KA_TRACE( 15, ( "__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
02075                     gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid),
02076                     status ) );
02077 #if OMPT_SUPPORT
02078     if (ompt_status & ompt_status_track) {
02079       if ((ompt_status == ompt_status_track_callback) &&
02080       ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
02081     ompt_callbacks.ompt_callback(ompt_event_barrier_end)
02082           (my_parallel_id, my_task_id);
02083       }
02084       this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
02085     }
02086 #endif
02087     return status;
02088 }
02089 
02090 
02091 void
02092 __kmp_end_split_barrier( enum barrier_type bt, int gtid )
02093 {
02094     int         tid      = __kmp_tid_from_gtid( gtid );
02095     kmp_info_t *this_thr = __kmp_threads[ gtid ];
02096     kmp_team_t *team     = this_thr -> th.th_team;
02097 
02098     if( ! team -> t.t_serialized ) {
02099         if( KMP_MASTER_GTID( gtid ) ) {
02100             if ( __kmp_barrier_release_pattern[ bt ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bt ] == 0 ) {
02101                 __kmp_linear_barrier_release( bt, this_thr, gtid, tid, FALSE
02102                                               );
02103             } else if ( __kmp_barrier_release_pattern[ bt ] == bp_tree_bar ) {
02104                 __kmp_tree_barrier_release( bt, this_thr, gtid, tid, FALSE
02105                                             );
02106             } else {
02107                 __kmp_hyper_barrier_release( bt, this_thr, gtid, tid, FALSE
02108                                              );
02109             }; // if
02110             #if OMP_30_ENABLED
02111                 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02112                     __kmp_task_team_sync( this_thr, team );
02113                 }; // if
02114             #endif /* OMP_30_ENABLED */
02115         }
02116     }
02117 }
02118 
02119 /* ------------------------------------------------------------------------ */
02120 /* ------------------------------------------------------------------------ */
02121 
02122 /*
02123  * determine if we can go parallel or must use a serialized parallel region and
02124  * how many threads we can use
02125  * set_nproc is the number of threads requested for the team
02126  * returns 0 if we should serialize or only use one thread,
02127  * otherwise the number of threads to use
02128  * The forkjoin lock is held by the caller.
02129  */
02130 static int
02131 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
02132    int master_tid, int set_nthreads
02133 )
02134 {
02135     int capacity;
02136     int new_nthreads;
02137     int use_rml_to_adjust_nth;
02138     KMP_DEBUG_ASSERT( __kmp_init_serial );
02139     KMP_DEBUG_ASSERT( root && parent_team );
02140 
02141     //
02142     // Initial check to see if we should use a serialized team.
02143     //
02144     if ( set_nthreads == 1 ) {
02145         KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d reserving 1 thread; requested %d threads\n",
02146                         __kmp_get_gtid(), set_nthreads ));
02147         return 1;
02148     }
02149     if ( ( !get__nested_2(parent_team,master_tid) && root->r.r_in_parallel )
02150        || ( __kmp_library == library_serial ) ) {
02151         KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team; requested %d threads\n",
02152                         __kmp_get_gtid(), set_nthreads ));
02153         return 1;
02154     }
02155 
02156     //
02157     // If dyn-var is set, dynamically adjust the number of desired threads,
02158     // according to the method specified by dynamic_mode.
02159     //
02160     new_nthreads = set_nthreads;
02161     use_rml_to_adjust_nth = FALSE;
02162     if ( ! get__dynamic_2( parent_team, master_tid ) ) {
02163         ;
02164     }
02165 #ifdef USE_LOAD_BALANCE
02166     else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
02167         new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
02168         if ( new_nthreads == 1 ) {
02169             KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
02170               master_tid ));
02171             return 1;
02172         }
02173         if ( new_nthreads < set_nthreads ) {
02174             KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
02175               master_tid, new_nthreads ));
02176         }
02177     }
02178 #endif /* USE_LOAD_BALANCE */
02179     else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
02180         new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
02181           : root->r.r_hot_team->t.t_nproc);
02182         if ( new_nthreads <= 1 ) {
02183             KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
02184               master_tid ));
02185             return 1;
02186         }
02187         if ( new_nthreads < set_nthreads ) {
02188             KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
02189               master_tid, new_nthreads ));
02190         }
02191         else {
02192             new_nthreads = set_nthreads;
02193         }
02194     }
02195     else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
02196         if ( set_nthreads > 2 ) {
02197             new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
02198             new_nthreads = ( new_nthreads % set_nthreads ) + 1;
02199             if ( new_nthreads == 1 ) {
02200                 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
02201                   master_tid ));
02202                 return 1;
02203             }
02204             if ( new_nthreads < set_nthreads ) {
02205                 KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
02206                   master_tid, new_nthreads ));
02207             }
02208         }
02209     }
02210     else {
02211         KMP_ASSERT( 0 );
02212     }
02213 
02214     //
02215     // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT.
02216     //
02217     if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
02218       root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
02219         int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
02220           root->r.r_hot_team->t.t_nproc );
02221         if ( tl_nthreads <= 0 ) {
02222             tl_nthreads = 1;
02223         }
02224 
02225         //
02226         // If dyn-var is false, emit a 1-time warning.
02227         //
02228         if ( ! get__dynamic_2( parent_team, master_tid )
02229           && ( ! __kmp_reserve_warn ) ) {
02230             __kmp_reserve_warn = 1;
02231             __kmp_msg(
02232                 kmp_ms_warning,
02233                 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
02234                 KMP_HNT( Unset_ALL_THREADS ),
02235                 __kmp_msg_null
02236             );
02237         }
02238         if ( tl_nthreads == 1 ) {
02239             KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
02240               master_tid ));
02241             return 1;
02242         }
02243         KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
02244           master_tid, tl_nthreads ));
02245         new_nthreads = tl_nthreads;
02246     }
02247 
02248 
02249     //
02250     // Check if the threads array is large enough, or needs expanding.
02251     //
02252     // See comment in __kmp_register_root() about the adjustment if
02253     // __kmp_threads[0] == NULL.
02254     //
02255     capacity = __kmp_threads_capacity;
02256     if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
02257         --capacity;
02258     }
02259     if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
02260       root->r.r_hot_team->t.t_nproc ) > capacity ) {
02261         //
02262         // Expand the threads array.
02263         //
02264         int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
02265           root->r.r_hot_team->t.t_nproc ) - capacity;
02266         int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
02267         if ( slotsAdded < slotsRequired ) {
02268             //
02269             // The threads array was not expanded enough.
02270             //
02271             new_nthreads -= ( slotsRequired - slotsAdded );
02272             KMP_ASSERT( new_nthreads >= 1 );
02273 
02274             //
02275             // If dyn-var is false, emit a 1-time warning.
02276             //
02277             if ( ! get__dynamic_2( parent_team, master_tid )
02278               && ( ! __kmp_reserve_warn ) ) {
02279                 __kmp_reserve_warn = 1;
02280                 if ( __kmp_tp_cached ) {
02281                     __kmp_msg(
02282                         kmp_ms_warning,
02283                         KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
02284                         KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
02285                         KMP_HNT( PossibleSystemLimitOnThreads ),
02286                         __kmp_msg_null
02287                     );
02288                 }
02289                 else {
02290                     __kmp_msg(
02291                         kmp_ms_warning,
02292                         KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
02293                         KMP_HNT( SystemLimitOnThreads ),
02294                         __kmp_msg_null
02295                     );
02296                 }
02297             }
02298         }
02299     }
02300 
02301     if ( new_nthreads == 1 ) {
02302         KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
02303                         __kmp_get_gtid(), set_nthreads ) );
02304         return 1;
02305     }
02306 
02307     KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
02308                     __kmp_get_gtid(), new_nthreads, set_nthreads ));
02309     return new_nthreads;
02310 }
02311 
02312 /* ------------------------------------------------------------------------ */
02313 /* ------------------------------------------------------------------------ */
02314 
02315 /* allocate threads from the thread pool and assign them to the new team */
02316 /* we are assured that there are enough threads available, because we
02317  * checked on that earlier within critical section forkjoin */
02318 
02319 static void
02320 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
02321                          kmp_info_t *master_th, int master_gtid )
02322 {
02323     int         i;
02324 
02325     KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
02326     KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
02327     KMP_MB();
02328 
02329     /* first, let's setup the master thread */
02330     master_th -> th.th_info .ds.ds_tid = 0;
02331     master_th -> th.th_team            = team;
02332     master_th -> th.th_team_nproc      = team -> t.t_nproc;
02333     master_th -> th.th_team_master     = master_th;
02334     master_th -> th.th_team_serialized = FALSE;
02335     master_th -> th.th_dispatch        = & team -> t.t_dispatch[ 0 ];
02336 
02337     /* make sure we are not the optimized hot team */
02338     if ( team != root->r.r_hot_team ) {
02339 
02340         /* install the master thread */
02341         team -> t.t_threads[ 0 ]    = master_th;
02342         __kmp_initialize_info( master_th, team, 0, master_gtid );
02343 
02344         /* now, install the worker threads */
02345         for ( i=1 ;  i < team->t.t_nproc ; i++ ) {
02346 
02347             /* fork or reallocate a new thread and install it in team */
02348             team -> t.t_threads[ i ] =  __kmp_allocate_thread( root, team, i );
02349             KMP_DEBUG_ASSERT( team->t.t_threads[i] );
02350             KMP_DEBUG_ASSERT( team->t.t_threads[i]->th.th_team == team );
02351             /* align team and thread arrived states */
02352             KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%u, plain=%u\n",
02353                             __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
02354                             __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
02355                             team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
02356                             team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
02357 
02358             { // Initialize threads' barrier data.
02359                 int b;
02360                 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
02361                 for ( b = 0; b < bs_last_barrier; ++ b ) {
02362                     balign[ b ].bb.b_arrived        = team->t.t_bar[ b ].b_arrived;
02363                 }; // for b
02364             }
02365         }
02366 
02367 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
02368         __kmp_partition_places( team );
02369 #endif
02370 
02371     }
02372 
02373     KMP_MB();
02374 }
02375 
02376 
02377 /* most of the work for a fork */
02378 /* return true if we really went parallel, false if serialized */
02379 int
02380 __kmp_fork_call(
02381     ident_t   * loc,
02382     int         gtid,
02383     int         exec_master, // 0 - GNU native code, master doesn't invoke microtask
02384                              // 1 - Intel code, master invokes microtask
02385                              // 2 - MS native code, use special invoker
02386     kmp_int32   argc,
02387     microtask_t microtask,
02388     launch_t    invoker,
02389 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
02390 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
02391     va_list   * ap
02392 #else
02393     va_list     ap
02394 #endif
02395     )
02396 {
02397     void          **argv;
02398     int             i;
02399     int             master_tid;
02400     int             master_this_cons;
02401     int             master_last_cons;
02402     kmp_team_t     *team;
02403     kmp_team_t     *parent_team;
02404     kmp_info_t     *master_th;
02405     kmp_root_t     *root;
02406     int             nthreads;
02407     int             master_active;
02408     int             master_set_numthreads;
02409     int             level;
02410 
02411 #if OMPT_SUPPORT
02412     ompt_state_t prev_state;
02413 #endif
02414 
02415     KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
02416 
02417     /* initialize if needed */
02418     KMP_DEBUG_ASSERT( __kmp_init_serial );
02419     if( ! TCR_4(__kmp_init_parallel) )
02420         __kmp_parallel_initialize();
02421 
02422     /* setup current data */
02423     master_th     = __kmp_threads[ gtid ];
02424 
02425     parent_team   = master_th -> th.th_team;
02426     master_tid    = master_th -> th.th_info.ds.ds_tid;
02427 
02428 #if OMPT_SUPPORT
02429     ompt_parallel_id_t ompt_parallel_id = 
02430       __ompt_parallel_id_new(master_th, master_tid);
02431     ompt_task_id_t ompt_task_id = 0; // FIXME -- johnmc
02432     ompt_frame_t  *ompt_frame = 0; // FIXME -- johnmc
02433     if (ompt_status & ompt_status_track) {
02434       master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
02435       if ((ompt_status == ompt_status_track_callback) &&
02436       ompt_callbacks.ompt_callback(ompt_event_parallel_create)) {
02437     ompt_callbacks.ompt_callback(ompt_event_parallel_create)
02438       (ompt_task_id, ompt_frame,
02439        ompt_parallel_id, (void *) microtask);
02440       }
02441     }
02442 #endif
02443 
02444     master_this_cons = master_th -> th.th_local.this_construct;
02445     master_last_cons = master_th -> th.th_local.last_construct;
02446     root          = master_th -> th.th_root;
02447     master_active = root -> r.r_active;
02448     master_set_numthreads = master_th -> th.th_set_nproc;
02449 #if OMP_30_ENABLED
02450     // Nested level will be an index in the nested nthreads array
02451     level         = parent_team->t.t_level;
02452 #endif // OMP_30_ENABLED
02453 
02454 
02455     master_th->th.th_ident = loc;
02456 
02457 #if OMP_30_ENABLED && KMP_DEBUG
02458     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02459         KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
02460     }
02461 #endif // OMP_30_ENABLED
02462 
02463     /* determine how many new threads we can use */
02464     __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
02465 
02466 #if OMP_30_ENABLED
02467     if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
02468         nthreads = 1;
02469     }
02470     else
02471 #endif // OMP_30_ENABLED
02472 
02473     {
02474         nthreads = master_set_numthreads ?
02475             master_set_numthreads : get__nproc_2( parent_team, master_tid );
02476         nthreads = __kmp_reserve_threads( root, parent_team, master_tid, nthreads
02477         );
02478     }
02479     KMP_DEBUG_ASSERT( nthreads > 0 );
02480 
02481     /* If we temporarily changed the set number of threads then restore it now */
02482     master_th -> th.th_set_nproc = 0;
02483 
02484 
02485     /* create a serialized parallel region? */
02486     if ( nthreads == 1 ) {
02487         /* josh todo: hypothetical question: what do we do for OS X*? */
02488 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
02489         void *   args[ argc ];
02490 #else
02491         void * * args = (void**) alloca( argc * sizeof( void * ) );
02492 #endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) */
02493 
02494         __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
02495         KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
02496 
02497         __kmpc_serialized_parallel(loc, gtid);
02498 
02499         if ( exec_master == 0 ) {
02500             // we were called from GNU native code
02501             KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
02502             return FALSE;
02503         } else if ( exec_master == 1 ) {
02504             /* TODO this sucks, use the compiler itself to pass args! :) */
02505             argv = args;
02506             for( i=argc-1; i >= 0; --i )
02507             /* TODO: revert workaround for Intel(R) 64 tracker #96 */
02508             #if KMP_ARCH_X86_64 && KMP_OS_LINUX
02509                 *argv++ = va_arg( *ap, void * );
02510             #else
02511                 *argv++ = va_arg( ap, void * );
02512             #endif
02513             master_th -> th.th_serial_team -> t.t_ident =  loc;
02514             KMP_MB();
02515 
02516 #if OMPT_SUPPORT
02517         ompt_lw_taskteam_t lw_taskteam;
02518         void **exit_runtime_p = 
02519           &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame);
02520         if (ompt_status & ompt_status_track) {
02521           __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, microtask, 
02522                       ompt_parallel_id);
02523 
02524           /* OMPT state */
02525           master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
02526 
02527 #if 0
02528           /* OMPT event */
02529           if ((ompt_status & ompt_status_track_callback) &&
02530           ompt_callbacks.ompt_callback(ompt_event_parallel_create)) {
02531         ompt_callbacks.ompt_callback(ompt_event_parallel_create)
02532           (lw_taskteam.ompt_task_info.task_id,
02533            &lw_taskteam.ompt_task_info.frame,
02534            lw_taskteam.ompt_team_info.parallel_id,
02535            (void *) microtask
02536            );
02537           }
02538 #endif
02539           __ompt_lw_taskteam_link(&lw_taskteam, master_th);
02540         }
02541 #else
02542         void *dummy;
02543         void **exit_runtime_p = &dummy;
02544 #endif
02545 
02546            __kmp_invoke_microtask( microtask, gtid, 0, argc, args, exit_runtime_p );
02547 
02548 #if OMPT_SUPPORT
02549         if (ompt_status & ompt_status_track) {
02550           lw_taskteam.ompt_task_info.frame.exit_runtime_frame = 0;
02551           __ompt_lw_taskteam_unlink(&lw_taskteam, master_th);
02552           if ((ompt_status & ompt_status_track_callback) &&
02553           ompt_callbacks.ompt_callback(ompt_event_parallel_exit)) {
02554         ompt_callbacks.ompt_callback(ompt_event_parallel_exit)
02555           (lw_taskteam.ompt_task_info.task_id,
02556            &lw_taskteam.ompt_task_info.frame,
02557            lw_taskteam.ompt_team_info.parallel_id,
02558            (void *) microtask
02559           );
02560           }
02561           master_th->th.ompt_thread_info.state = prev_state;
02562         }
02563 #endif
02564         }
02565         else {
02566             KMP_ASSERT2( exec_master <= 1, "__kmp_fork_call: unknown parameter exec_master" );
02567         }
02568 
02569         KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid ));
02570 
02571         KMP_MB();
02572         return FALSE;
02573     }
02574 
02575 #if OMP_30_ENABLED
02576     // GEH: only modify the executing flag in the case when not serialized
02577     //      serialized case is handled in kmpc_serialized_parallel
02578     KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
02579                     parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
02580                     master_th->th.th_current_task->td_icvs.max_active_levels ) );
02581     // TODO: GEH - cannot do this assertion because root thread not set up as executing
02582     // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );
02583     master_th->th.th_current_task->td_flags.executing = 0;
02584 #endif
02585 
02586     /* Increment our nested depth level */
02587     KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
02588 
02589 #if OMP_30_ENABLED
02590     //
02591     // See if we need to make a copy of the ICVs.
02592     //
02593     int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
02594     if ( ( level + 1 < __kmp_nested_nth.used ) &&
02595       ( __kmp_nested_nth.nth[level + 1] != nthreads_icv ) ) {
02596         nthreads_icv = __kmp_nested_nth.nth[level + 1];
02597     }
02598     else {
02599         nthreads_icv = 0;  // don't update
02600     }
02601 
02602 #if OMP_40_ENABLED
02603     //
02604     // Figure out the proc_bind_policy for the new team.
02605     //
02606     kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
02607     kmp_proc_bind_t proc_bind_icv; // proc_bind_default means don't update
02608 
02609     if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
02610         proc_bind = proc_bind_false;
02611         proc_bind_icv = proc_bind_default;
02612     }
02613     else {
02614         proc_bind_icv = master_th->th.th_current_task->td_icvs.proc_bind;
02615         if ( proc_bind == proc_bind_default ) {
02616             //
02617             // No proc_bind clause was specified, so use the current value
02618             // of proc-bind-var for this parallel region.
02619             //
02620             proc_bind = proc_bind_icv;
02621         }
02622         else {
02623             //
02624             // The proc_bind policy was specified explicitly on the parallel
02625             // clause.  This overrides the proc-bind-var for this parallel
02626             // region, but does not change proc-bind-var.
02627             //
02628         }
02629 
02630         //
02631         // Figure the value of proc-bind-var for the child threads.
02632         //
02633         if ( ( level + 1 < __kmp_nested_proc_bind.used )
02634           && ( __kmp_nested_proc_bind.bind_types[level + 1] != proc_bind_icv ) ) {
02635             proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
02636         }
02637         else {
02638             proc_bind_icv = proc_bind_default;
02639         }
02640     }
02641 
02642     //
02643     // Reset for next parallel region
02644     //
02645     master_th->th.th_set_proc_bind = proc_bind_default;
02646 #endif /* OMP_40_ENABLED */
02647 
02648     if ( ( nthreads_icv > 0 )
02649 #if OMP_40_ENABLED
02650       || ( proc_bind_icv != proc_bind_default )
02651 #endif /* OMP_40_ENABLED */
02652       )
02653     {
02654         kmp_internal_control_t new_icvs;
02655         copy_icvs( & new_icvs, & master_th->th.th_current_task->td_icvs );
02656         new_icvs.next = NULL;
02657 
02658         if ( nthreads_icv > 0 ) {
02659             new_icvs.nproc = nthreads_icv;
02660         }
02661 
02662 #if OMP_40_ENABLED
02663         if ( proc_bind_icv != proc_bind_default ) {
02664             new_icvs.proc_bind = proc_bind_icv;
02665         }
02666 #endif /* OMP_40_ENABLED */
02667 
02668         /* allocate a new parallel team */
02669         KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
02670         team = __kmp_allocate_team(root, nthreads, nthreads,
02671 #if OMPT_SUPPORT
02672           ompt_parallel_id,
02673 #endif
02674 #if OMP_40_ENABLED
02675           proc_bind,
02676 #endif
02677           &new_icvs, argc );
02678     } else
02679 #endif /* OMP_30_ENABLED */
02680     {
02681         /* allocate a new parallel team */
02682         KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) );
02683         team = __kmp_allocate_team(root, nthreads, nthreads,
02684 #if OMPT_SUPPORT
02685           ompt_parallel_id,
02686 #endif
02687 #if OMP_40_ENABLED
02688                 proc_bind,
02689 #endif
02690 #if OMP_30_ENABLED
02691                 &master_th->th.th_current_task->td_icvs,
02692 #else
02693                 parent_team->t.t_set_nproc[master_tid],
02694                 parent_team->t.t_set_dynamic[master_tid],
02695                 parent_team->t.t_set_nested[master_tid],
02696                 parent_team->t.t_set_blocktime[master_tid],
02697                 parent_team->t.t_set_bt_intervals[master_tid],
02698                 parent_team->t.t_set_bt_set[master_tid],
02699 #endif // OMP_30_ENABLED
02700                 argc );
02701     }
02702 
02703     KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n",
02704             team ) );
02705 
02706     /* setup the new team */
02707     team->t.t_master_tid = master_tid;
02708     team->t.t_master_this_cons = master_this_cons;
02709     team->t.t_master_last_cons = master_last_cons;
02710 
02711     team->t.t_parent     = parent_team;
02712     TCW_SYNC_PTR(team->t.t_pkfn, microtask);
02713     team->t.t_invoke     = invoker;  /* TODO move this to root, maybe */
02714     team->t.t_ident      = loc;
02715 
02716 #if OMP_30_ENABLED
02717     // TODO: parent_team->t.t_level == INT_MAX ???
02718     team->t.t_level        = parent_team->t.t_level + 1;
02719     team->t.t_active_level = parent_team->t.t_active_level + 1;
02720     team->t.t_sched      = get__sched_2( parent_team, master_tid ); // set master's schedule as new run-time schedule
02721 
02722 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
02723     if ( __kmp_inherit_fp_control ) {
02724         __kmp_store_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
02725         __kmp_store_mxcsr( &team->t.t_mxcsr );
02726         team->t.t_mxcsr &= KMP_X86_MXCSR_MASK;
02727         team->t.t_fp_control_saved = TRUE;
02728     }
02729     else {
02730         team->t.t_fp_control_saved = FALSE;
02731     }
02732 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
02733 
02734     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02735         //
02736         // Set the master thread's task team to the team's task team.
02737         // Unless this is the hot team, it should be NULL.
02738         //
02739         KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
02740         KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
02741                         __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
02742                         parent_team, team->t.t_task_team, team ) );
02743         master_th->th.th_task_team = team->t.t_task_team;
02744         KMP_DEBUG_ASSERT( ( master_th->th.th_task_team == NULL ) || ( team == root->r.r_hot_team ) ) ;
02745     }
02746 #endif // OMP_30_ENABLED
02747 
02748     KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
02749                 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
02750     KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
02751                       ( team->t.t_master_tid == 0 &&
02752                         ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
02753     KMP_MB();
02754 
02755     /* now, setup the arguments */
02756     argv = (void**) team -> t.t_argv;
02757     for( i=argc-1; i >= 0; --i )
02758 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
02759 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
02760         *argv++ = va_arg( *ap, void * );
02761 #else
02762         *argv++ = va_arg( ap, void * );
02763 #endif
02764 
02765     /* now actually fork the threads */
02766 
02767     team->t.t_master_active = master_active;
02768     if (!root -> r.r_active)  /* Only do the assignment if it makes a difference to prevent cache ping-pong */
02769         root -> r.r_active = TRUE;
02770 
02771     __kmp_fork_team_threads( root, team, master_th, gtid );
02772 
02773 #if 0
02774 #if OMPT_SUPPORT
02775     if (ompt_status & ompt_status_track) {
02776       master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
02777       if ((ompt_status == ompt_status_track_callback) &&
02778       ompt_callbacks.ompt_callback(ompt_event_parallel_create)) {
02779     int  tid = __kmp_tid_from_gtid( gtid );
02780     ompt_callbacks.ompt_callback(ompt_event_parallel_create)
02781       (team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
02782        &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame),
02783        team->t.ompt_team_info.parallel_id,
02784        (void *) microtask);
02785       }
02786     }
02787 #endif
02788 #endif
02789 
02790     __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
02791 
02792 
02793 
02794     /* now go on and do the work */
02795     KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
02796     KMP_MB();
02797 
02798     KF_TRACE( 10, ( "__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", root, team, master_th, gtid ) );
02799 
02800 
02801     __kmp_internal_fork( loc, gtid, team );
02802     KF_TRACE( 10, ( "__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n", root, team, master_th, gtid ) );
02803 
02804     if (! exec_master) {
02805         KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
02806         return TRUE;
02807     }
02808 
02809     /* Invoke microtask for MASTER thread */
02810     KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
02811                 gtid, team->t.t_id, team->t.t_pkfn ) );
02812 
02813     if (! team->t.t_invoke( gtid )) {
02814         KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
02815     }
02816     KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
02817         gtid, team->t.t_id, team->t.t_pkfn ) );
02818     KMP_MB();       /* Flush all pending memory write invalidates.  */
02819 
02820     KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid ));
02821 
02822 #if OMPT_SUPPORT
02823     if (ompt_status & ompt_status_track) {
02824       master_th->th.ompt_thread_info.state = prev_state;
02825     }
02826 #endif
02827 
02828     return TRUE;
02829 }
02830 
02831 
02832 void
02833 __kmp_join_call(ident_t *loc, int gtid)
02834 {
02835     kmp_team_t     *team;
02836     kmp_team_t     *parent_team;
02837     kmp_info_t     *master_th;
02838     kmp_root_t     *root;
02839     int             master_active;
02840     int             i;
02841 
02842     KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid ));
02843 
02844     /* setup current data */
02845     master_th     = __kmp_threads[ gtid ];
02846 
02847 
02848 #if OMPT_SUPPORT
02849     if (ompt_status & ompt_status_track) {
02850       master_th->th.ompt_thread_info.state = ompt_state_overhead;
02851     }
02852 #endif
02853 
02854     root          = master_th -> th.th_root;
02855     team          = master_th -> th.th_team;
02856     parent_team   = team->t.t_parent;
02857 
02858     master_th->th.th_ident = loc;
02859 
02860 #if OMP_30_ENABLED && KMP_DEBUG
02861     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02862         KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
02863                          __kmp_gtid_from_thread( master_th ), team,
02864                          team -> t.t_task_team, master_th->th.th_task_team) );
02865         KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team );
02866     }
02867 #endif // OMP_30_ENABLED
02868 
02869     if( team->t.t_serialized ) {
02870         __kmpc_end_serialized_parallel( loc, gtid );
02871         return;
02872     }
02873 
02874     master_active = team->t.t_master_active;
02875 
02876     __kmp_internal_join( loc, gtid, team );
02877     KMP_MB();
02878 
02879 #if 0
02880 #if OMPT_SUPPORT
02881    if ((ompt_status & ompt_status_track_callback) &&
02882        ompt_callbacks.ompt_callback(ompt_event_parallel_exit)) {
02883      int  tid = __kmp_tid_from_gtid( gtid );
02884      ompt_callbacks.ompt_callback(ompt_event_parallel_exit)
02885        (team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
02886         &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame),
02887         team->t.ompt_team_info.parallel_id,
02888         (void *) team->t.t_pkfn);
02889    }
02890 #endif
02891 #endif
02892 
02893 #if OMPT_SUPPORT
02894    ompt_parallel_info_t parallel_info;
02895    if ((ompt_status & ompt_status_track_callback) &&
02896        ompt_callbacks.ompt_callback(ompt_event_parallel_exit)) {
02897      int  tid = __kmp_tid_from_gtid( gtid );
02898      parallel_info =  (ompt_parallel_info_t)
02899        { .parent_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id,
02900      .parent_task_frame = &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame),
02901      .parallel_id = team->t.ompt_team_info.parallel_id,
02902      .parallel_function = (void *) team->t.t_pkfn 
02903        };
02904    }
02905 #endif
02906 
02907     /* do cleanup and restore the parent team */
02908     master_th -> th.th_info .ds.ds_tid = team -> t.t_master_tid;
02909     master_th -> th.th_local.this_construct = team -> t.t_master_this_cons;
02910     master_th -> th.th_local.last_construct = team -> t.t_master_last_cons;
02911 
02912     master_th -> th.th_dispatch =
02913                 & parent_team -> t.t_dispatch[ team -> t.t_master_tid ];
02914 
02915     /* jc: The following lock has instructions with REL and ACQ semantics,
02916        separating the parallel user code called in this parallel region
02917        from the serial user code called after this function returns.
02918     */
02919     __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
02920 
02921     /* Decrement our nested depth level */
02922     KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
02923     KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
02924 
02925     #if OMP_30_ENABLED
02926     KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
02927                    0, master_th, team ) );
02928     __kmp_pop_current_task_from_thread( master_th );
02929     #endif // OMP_30_ENABLED
02930 
02931 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
02932     //
02933     // Restore master thread's partition.
02934     //
02935     master_th -> th.th_first_place = team -> t.t_first_place;
02936     master_th -> th.th_last_place = team -> t.t_last_place;
02937 #endif /* OMP_40_ENABLED */
02938 
02939 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
02940     if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
02941         __kmp_clear_x87_fpu_status_word();
02942         __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
02943         __kmp_load_mxcsr( &team->t.t_mxcsr );
02944     }
02945 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
02946 
02947     if ( root -> r.r_active != master_active )
02948         root -> r.r_active = master_active;
02949 
02950     __kmp_free_team( root, team ); /* this will free worker threads */
02951 
02952     /* this race was fun to find.  make sure the following is in the critical
02953      * region otherwise assertions may fail occasiounally since the old team
02954      * may be reallocated and the hierarchy appears inconsistant.  it is
02955      * actually safe to run and won't cause any bugs, but will cause thoose
02956      * assertion failures.  it's only one deref&assign so might as well put this
02957      * in the critical region */
02958     master_th -> th.th_team        =   parent_team;
02959     master_th -> th.th_team_nproc  =   parent_team -> t.t_nproc;
02960     master_th -> th.th_team_master =   parent_team -> t.t_threads[0];
02961     master_th -> th.th_team_serialized = parent_team -> t.t_serialized;
02962 
02963     /* restore serialized team, if need be */
02964     if( parent_team -> t.t_serialized &&
02965         parent_team != master_th->th.th_serial_team &&
02966         parent_team != root->r.r_root_team ) {
02967             __kmp_free_team( root, master_th -> th.th_serial_team );
02968             master_th -> th.th_serial_team = parent_team;
02969     }
02970 
02971 #if OMP_30_ENABLED
02972     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
02973         //
02974         // Copy the task team from the new child / old parent team
02975         // to the thread.  If non-NULL, copy the state flag also.
02976         //
02977         if ( ( master_th -> th.th_task_team = parent_team -> t.t_task_team ) != NULL ) {
02978             master_th -> th.th_task_state = master_th -> th.th_task_team -> tt.tt_state;
02979         }
02980         KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
02981                         __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
02982                         parent_team ) );
02983     }
02984 #endif /* OMP_30_ENABLED */
02985 
02986     #if OMP_30_ENABLED
02987          // TODO: GEH - cannot do this assertion because root thread not set up as executing
02988          // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );
02989          master_th->th.th_current_task->td_flags.executing = 1;
02990     #endif // OMP_30_ENABLED
02991 
02992     __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
02993 
02994 #if OMPT_SUPPORT
02995    if ((ompt_status & ompt_status_track_callback) &&
02996        ompt_callbacks.ompt_callback(ompt_event_parallel_exit)) {
02997      ompt_callbacks.ompt_callback(ompt_event_parallel_exit)
02998        (parallel_info.parent_task_id, parallel_info.parent_task_frame,
02999     parallel_info.parallel_id, parallel_info.parallel_function);
03000    }
03001 #endif
03002 
03003     KMP_MB();
03004     KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid ));
03005 }
03006 
03007 /* ------------------------------------------------------------------------ */
03008 /* ------------------------------------------------------------------------ */
03009 
03010 /* Check whether we should push an internal control record onto the
03011    serial team stack.  If so, do it.  */
03012 void
03013 __kmp_save_internal_controls ( kmp_info_t * thread )
03014 {
03015 
03016     if ( thread -> th.th_team != thread -> th.th_serial_team ) {
03017         return;
03018     }
03019     if (thread -> th.th_team -> t.t_serialized > 1) {
03020         int push = 0;
03021 
03022         if (thread -> th.th_team -> t.t_control_stack_top == NULL) {
03023             push = 1;
03024         } else {
03025             if ( thread -> th.th_team -> t.t_control_stack_top -> serial_nesting_level !=
03026                  thread -> th.th_team -> t.t_serialized ) {
03027                 push = 1;
03028             }
03029         }
03030         if (push) {  /* push a record on the serial team's stack */
03031             kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t));
03032 
03033 #if OMP_30_ENABLED
03034             copy_icvs( control, & thread->th.th_current_task->td_icvs );
03035 #else
03036             control->nproc        = thread->th.th_team->t.t_set_nproc[0];
03037             control->dynamic      = thread->th.th_team->t.t_set_dynamic[0];
03038             control->nested       = thread->th.th_team->t.t_set_nested[0];
03039             control->blocktime    = thread->th.th_team->t.t_set_blocktime[0];
03040             control->bt_intervals = thread->th.th_team->t.t_set_bt_intervals[0];
03041             control->bt_set       = thread->th.th_team->t.t_set_bt_set[0];
03042 #endif // OMP_30_ENABLED
03043 
03044             control->serial_nesting_level = thread->th.th_team->t.t_serialized;
03045 
03046             control->next = thread -> th.th_team -> t.t_control_stack_top;
03047             thread -> th.th_team -> t.t_control_stack_top = control;
03048         }
03049     }
03050 }
03051 
03052 /* Changes set_nproc */
03053 void
03054 __kmp_set_num_threads( int new_nth, int gtid )
03055 {
03056     kmp_info_t *thread;
03057     kmp_root_t *root;
03058 
03059     KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
03060     KMP_DEBUG_ASSERT( __kmp_init_serial );
03061 
03062     if (new_nth < 1)
03063         new_nth = 1;
03064     else if (new_nth > __kmp_max_nth)
03065         new_nth = __kmp_max_nth;
03066 
03067     thread = __kmp_threads[gtid];
03068 
03069     __kmp_save_internal_controls( thread );
03070 
03071     set__nproc( thread, new_nth );
03072 
03073     //
03074     // If this omp_set_num_threads() call will cause the hot team size to be
03075     // reduced (in the absence of a num_threads clause), then reduce it now,
03076     // rather than waiting for the next parallel region.
03077     //
03078     root = thread->th.th_root;
03079     if ( __kmp_init_parallel && ( ! root->r.r_active )
03080       && ( root->r.r_hot_team->t.t_nproc > new_nth ) ) {
03081         kmp_team_t *hot_team = root->r.r_hot_team;
03082         int f;
03083 
03084         __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
03085 
03086 
03087 #if OMP_30_ENABLED
03088         if ( __kmp_tasking_mode != tskm_immediate_exec ) {
03089             kmp_task_team_t *task_team = hot_team->t.t_task_team;
03090             if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
03091                 //
03092                 // Signal the worker threads (esp. the extra ones) to stop
03093                 // looking for tasks while spin waiting.  The task teams
03094                 // are reference counted and will be deallocated by the
03095                 // last worker thread.
03096                 //
03097                 KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
03098                 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
03099                 KMP_MB();
03100 
03101                 KA_TRACE( 20, ( "__kmp_set_num_threads: setting task_team %p to NULL\n",
03102                   &hot_team->t.t_task_team ) );
03103                   hot_team->t.t_task_team = NULL;
03104             }
03105             else {
03106                 KMP_DEBUG_ASSERT( task_team == NULL );
03107             }
03108         }
03109 #endif // OMP_30_ENABLED
03110 
03111         //
03112         // Release the extra threads we don't need any more.
03113         //
03114         for ( f = new_nth;  f < hot_team->t.t_nproc; f++ ) {
03115             KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
03116             __kmp_free_thread( hot_team->t.t_threads[f] );
03117             hot_team->t.t_threads[f] =  NULL;
03118         }
03119         hot_team->t.t_nproc = new_nth;
03120 
03121 
03122         __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
03123 
03124         //
03125         // Update the t_nproc field in the threads that are still active.
03126         //
03127         for( f=0 ; f < new_nth; f++ ) {
03128             KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
03129             hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
03130         }
03131 #if KMP_MIC
03132         // Special flag in case omp_set_num_threads() call
03133         hot_team -> t.t_size_changed = -1;
03134 #endif
03135     }
03136 
03137 }
03138 
03139 #if OMP_30_ENABLED
03140 /* Changes max_active_levels */
03141 void
03142 __kmp_set_max_active_levels( int gtid, int max_active_levels )
03143 {
03144     kmp_info_t *thread;
03145 
03146     KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
03147     KMP_DEBUG_ASSERT( __kmp_init_serial );
03148 
03149     // validate max_active_levels
03150     if( max_active_levels < 0 ) {
03151         KMP_WARNING( ActiveLevelsNegative, max_active_levels );
03152         // We ignore this call if the user has specified a negative value.
03153         // The current setting won't be changed. The last valid setting will be used.
03154         // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var).
03155         KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
03156         return;
03157     }
03158     if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
03159         // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ]
03160         // We allow a zero value. (implementation defined behavior)
03161     } else {
03162         KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT  );
03163         max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
03164         // Current upper limit is MAX_INT. (implementation defined behavior)
03165         // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior)
03166         // Actually, the flow should never get here until we use MAX_INT limit.
03167     }
03168     KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
03169 
03170     thread = __kmp_threads[ gtid ];
03171 
03172     __kmp_save_internal_controls( thread );
03173 
03174     set__max_active_levels( thread, max_active_levels );
03175 
03176 }
03177 
03178 /* Gets max_active_levels */
03179 int
03180 __kmp_get_max_active_levels( int gtid )
03181 {
03182     kmp_info_t *thread;
03183 
03184     KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) );
03185     KMP_DEBUG_ASSERT( __kmp_init_serial );
03186 
03187     thread = __kmp_threads[ gtid ];
03188     KMP_DEBUG_ASSERT( thread -> th.th_current_task );
03189     KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
03190         gtid, thread -> th.th_current_task, thread -> th.th_current_task -> td_icvs.max_active_levels ) );
03191     return thread -> th.th_current_task -> td_icvs.max_active_levels;
03192 }
03193 
03194 /* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
03195 void
03196 __kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk )
03197 {
03198     kmp_info_t *thread;
03199 //    kmp_team_t *team;
03200 
03201     KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk ));
03202     KMP_DEBUG_ASSERT( __kmp_init_serial );
03203 
03204     // Check if the kind parameter is valid, correct if needed.
03205     // Valid parameters should fit in one of two intervals - standard or extended:
03206     //       <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
03207     // 2008-01-25: 0,  1 - 4,       5,         100,     101 - 102, 103
03208     if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
03209        ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
03210     {
03211         // TODO: Hint needs attention in case we change the default schedule.
03212         __kmp_msg(
03213             kmp_ms_warning,
03214             KMP_MSG( ScheduleKindOutOfRange, kind ),
03215             KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ),
03216             __kmp_msg_null
03217         );
03218         kind = kmp_sched_default;
03219         chunk = 0;         // ignore chunk value in case of bad kind
03220     }
03221 
03222     thread = __kmp_threads[ gtid ];
03223 
03224     __kmp_save_internal_controls( thread );
03225 
03226     if ( kind < kmp_sched_upper_std ) {
03227         if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
03228             // differ static chunked vs. unchunked:
03229             // chunk should be invalid to indicate unchunked schedule (which is the default)
03230             thread -> th.th_current_task -> td_icvs.sched.r_sched_type = kmp_sch_static;
03231         } else {
03232             thread -> th.th_current_task -> td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
03233         }
03234     } else {
03235         //    __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
03236         thread -> th.th_current_task -> td_icvs.sched.r_sched_type =
03237             __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
03238     }
03239     if ( kind == kmp_sched_auto ) {
03240         // ignore parameter chunk for schedule auto
03241         thread -> th.th_current_task -> td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
03242     } else {
03243         thread -> th.th_current_task -> td_icvs.sched.chunk = chunk;
03244     }
03245 }
03246 
03247 /* Gets def_sched_var ICV values */
03248 void
03249 __kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk )
03250 {
03251     kmp_info_t     *thread;
03252     enum sched_type th_type;
03253     int             i;
03254 
03255     KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid ));
03256     KMP_DEBUG_ASSERT( __kmp_init_serial );
03257 
03258     thread = __kmp_threads[ gtid ];
03259 
03260     //th_type = thread -> th.th_team -> t.t_set_sched[ thread->th.th_info.ds.ds_tid ].r_sched_type;
03261     th_type = thread -> th.th_current_task -> td_icvs.sched.r_sched_type;
03262 
03263     switch ( th_type ) {
03264     case kmp_sch_static:
03265     case kmp_sch_static_greedy:
03266     case kmp_sch_static_balanced:
03267         *kind = kmp_sched_static;
03268         *chunk = 0;   // chunk was not set, try to show this fact via zero value
03269         return;
03270     case kmp_sch_static_chunked:
03271         *kind = kmp_sched_static;
03272         break;
03273     case kmp_sch_dynamic_chunked:
03274         *kind = kmp_sched_dynamic;
03275         break;
03276     case kmp_sch_guided_chunked:
03277     case kmp_sch_guided_iterative_chunked:
03278     case kmp_sch_guided_analytical_chunked:
03279         *kind = kmp_sched_guided;
03280         break;
03281     case kmp_sch_auto:
03282         *kind = kmp_sched_auto;
03283         break;
03284     case kmp_sch_trapezoidal:
03285         *kind = kmp_sched_trapezoidal;
03286         break;
03287 /*
03288     case kmp_sch_static_steal:
03289         *kind = kmp_sched_static_steal;
03290         break;
03291 */
03292     default:
03293         KMP_FATAL( UnknownSchedulingType, th_type );
03294     }
03295 
03296     //*chunk = thread -> th.th_team -> t.t_set_sched[ thread->th.th_info.ds.ds_tid ].chunk;
03297     *chunk = thread -> th.th_current_task -> td_icvs.sched.chunk;
03298 }
03299 
03300 int
03301 __kmp_get_ancestor_thread_num( int gtid, int level ) {
03302 
03303     int ii, dd;
03304     kmp_team_t *team;
03305 
03306     KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
03307     KMP_DEBUG_ASSERT( __kmp_init_serial );
03308 
03309     // validate level
03310     if( level == 0 ) return 0;
03311     if( level < 0 ) return -1;
03312     team = __kmp_threads[ gtid ] -> th.th_team;
03313     ii = team -> t.t_level;
03314     if( level > ii ) return -1;
03315     if( ii == level ) return __kmp_tid_from_gtid( gtid );
03316 
03317     dd = team -> t.t_serialized;
03318     level++;
03319     while( ii > level )
03320     {
03321         for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
03322         {
03323         }
03324         if( ( team -> t.t_serialized ) && ( !dd ) ) {
03325             team = team->t.t_parent;
03326             continue;
03327         }
03328         if( ii > level ) {
03329             team = team->t.t_parent;
03330             dd = team -> t.t_serialized;
03331             ii--;
03332         }
03333     }
03334 
03335     return ( dd > 1 ) ? ( 0 ) : ( team -> t.t_master_tid );
03336 }
03337 
03338 int
03339 __kmp_get_team_size( int gtid, int level ) {
03340 
03341     int ii, dd;
03342     kmp_team_t *team;
03343 
03344     KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level ));
03345     KMP_DEBUG_ASSERT( __kmp_init_serial );
03346 
03347     // validate level
03348     if( level == 0 ) return 1;
03349     if( level < 0 ) return -1;
03350     team = __kmp_threads[ gtid ] -> th.th_team;
03351     ii = team -> t.t_level;
03352     if( level > ii ) return -1;
03353 
03354     while( ii > level )
03355     {
03356         for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
03357         {
03358         }
03359         if( team -> t.t_serialized && ( !dd ) ) {
03360             team = team->t.t_parent;
03361             continue;
03362         }
03363         if( ii > level ) {
03364             team = team->t.t_parent;
03365             ii--;
03366         }
03367     }
03368 
03369     return team -> t.t_nproc;
03370 }
03371 
03372 #endif // OMP_30_ENABLED
03373 
03374 kmp_r_sched_t
03375 __kmp_get_schedule_global() {
03376 // This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided)
03377 // may be changed by kmp_set_defaults independently. So one can get the updated schedule here.
03378 
03379     kmp_r_sched_t r_sched;
03380 
03381     // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided
03382     // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times,
03383     // and thus have different run-time schedules in different roots (even in OMP 2.5)
03384     if ( __kmp_sched == kmp_sch_static ) {
03385         r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy)
03386     } else if ( __kmp_sched == kmp_sch_guided_chunked ) {
03387         r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical)
03388     } else {
03389         r_sched.r_sched_type = __kmp_sched;  // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
03390     }
03391 
03392     if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set)
03393         r_sched.chunk = KMP_DEFAULT_CHUNK;
03394     } else {
03395         r_sched.chunk = __kmp_chunk;
03396     }
03397 
03398     return r_sched;
03399 }
03400 
03401 /* ------------------------------------------------------------------------ */
03402 /* ------------------------------------------------------------------------ */
03403 
03404 
03405 /*
03406  * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)
03407  * at least argc number of *t_argv entries for the requested team.
03408  */
03409 static void
03410 __kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc )
03411 {
03412 
03413     KMP_DEBUG_ASSERT( team );
03414     if( !realloc || argc > team -> t.t_max_argc ) {
03415 
03416         KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
03417                          team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
03418 #if (KMP_PERF_V106 == KMP_ON)
03419         /* if previously allocated heap space for args, free them */
03420         if ( realloc && team -> t.t_argv != &team -> t.t_inline_argv[0] )
03421             __kmp_free( (void *) team -> t.t_argv );
03422 
03423         if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
03424             /* use unused space in the cache line for arguments */
03425             team -> t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
03426             KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
03427                              team->t.t_id, team->t.t_max_argc ));
03428             team -> t.t_argv = &team -> t.t_inline_argv[0];
03429             if ( __kmp_storage_map ) {
03430                 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
03431                                          &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
03432                                          (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES),
03433                                          "team_%d.t_inline_argv",
03434                                          team->t.t_id );
03435             }
03436         } else {
03437             /* allocate space for arguments in the heap */
03438             team -> t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
03439                                      KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
03440             KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
03441                              team->t.t_id, team->t.t_max_argc ));
03442             team -> t.t_argv     = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
03443             if ( __kmp_storage_map ) {
03444                 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
03445                                          sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv",
03446                                          team->t.t_id );
03447             }
03448         }
03449 #else /* KMP_PERF_V106 == KMP_OFF */
03450         if ( realloc )
03451             __kmp_free( (void*) team -> t.t_argv );
03452         team -> t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
03453                              KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
03454         KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
03455                          team->t.t_id, team->t.t_max_argc ));
03456         team -> t.t_argv     = __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc );
03457         if ( __kmp_storage_map ) {
03458             __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
03459                                      sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv", team->t.t_id );
03460         }
03461 #endif /* KMP_PERF_V106 */
03462 
03463     }
03464 }
03465 
03466 static void
03467 __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth)
03468 {
03469     int i;
03470     int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2;
03471 #if KMP_USE_POOLED_ALLOC
03472     char *ptr = __kmp_allocate(max_nth *
03473                             ( sizeof(kmp_info_t*) + sizeof(dispatch_shared_info_t)*2
03474                                + sizeof(kmp_disp_t) + sizeof(int)*6
03475 #  if OMP_30_ENABLED
03476                                //+ sizeof(int)
03477                                + sizeof(kmp_r_sched_t)
03478                                + sizeof(kmp_taskdata_t)
03479 #  endif // OMP_30_ENABLED
03480                         )     );
03481 
03482     team -> t.t_threads          = (kmp_info_t**) ptr; ptr += sizeof(kmp_info_t*) * max_nth;
03483     team -> t.t_disp_buffer      = (dispatch_shared_info_t*) ptr;
03484                                    ptr += sizeof(dispatch_shared_info_t) * num_disp_buff;
03485     team -> t.t_dispatch         = (kmp_disp_t*) ptr; ptr += sizeof(kmp_disp_t) * max_nth;
03486     team -> t.t_set_nproc        = (int*) ptr; ptr += sizeof(int) * max_nth;
03487     team -> t.t_set_dynamic      = (int*) ptr; ptr += sizeof(int) * max_nth;
03488     team -> t.t_set_nested       = (int*) ptr; ptr += sizeof(int) * max_nth;
03489     team -> t.t_set_blocktime    = (int*) ptr; ptr += sizeof(int) * max_nth;
03490     team -> t.t_set_bt_intervals = (int*) ptr; ptr += sizeof(int) * max_nth;
03491     team -> t.t_set_bt_set       = (int*) ptr;
03492 #  if OMP_30_ENABLED
03493     ptr += sizeof(int) * max_nth;
03494     //team -> t.t_set_max_active_levels = (int*) ptr; ptr += sizeof(int) * max_nth;
03495     team -> t.t_set_sched        = (kmp_r_sched_t*) ptr;
03496     ptr += sizeof(kmp_r_sched_t) * max_nth;
03497     team -> t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr;
03498     ptr += sizeof(kmp_taskdata_t) * max_nth;
03499 #  endif // OMP_30_ENABLED
03500 #else
03501 
03502     team -> t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth );
03503     team -> t.t_disp_buffer = (dispatch_shared_info_t*)
03504         __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff );
03505     team -> t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth );
03506     #if OMP_30_ENABLED
03507     //team -> t.t_set_max_active_levels = (int*) __kmp_allocate( sizeof(int) * max_nth );
03508     //team -> t.t_set_sched = (kmp_r_sched_t*) __kmp_allocate( sizeof(kmp_r_sched_t) * max_nth );
03509     team -> t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth );
03510     #else
03511     team -> t.t_set_nproc = (int*) __kmp_allocate( sizeof(int) * max_nth );
03512     team -> t.t_set_dynamic = (int*) __kmp_allocate( sizeof(int) * max_nth );
03513     team -> t.t_set_nested = (int*) __kmp_allocate( sizeof(int) * max_nth );
03514     team -> t.t_set_blocktime = (int*) __kmp_allocate( sizeof(int) * max_nth );
03515     team -> t.t_set_bt_intervals = (int*) __kmp_allocate( sizeof(int) * max_nth );
03516     team -> t.t_set_bt_set = (int*) __kmp_allocate( sizeof(int) * max_nth );
03517 #  endif // OMP_30_ENABLED
03518 #endif
03519     team->t.t_max_nproc = max_nth;
03520 
03521     /* setup dispatch buffers */
03522     for(i = 0 ; i < num_disp_buff; ++i)
03523         team -> t.t_disp_buffer[i].buffer_index = i;
03524 }
03525 
03526 static void
03527 __kmp_free_team_arrays(kmp_team_t *team) {
03528     /* Note: this does not free the threads in t_threads (__kmp_free_threads) */
03529     int i;
03530     for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
03531         if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
03532             __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
03533             team->t.t_dispatch[ i ].th_disp_buffer = NULL;
03534         }; // if
03535     }; // for
03536     __kmp_free(team->t.t_threads);
03537     #if !KMP_USE_POOLED_ALLOC
03538         __kmp_free(team->t.t_disp_buffer);
03539         __kmp_free(team->t.t_dispatch);
03540         #if OMP_30_ENABLED
03541         //__kmp_free(team->t.t_set_max_active_levels);
03542         //__kmp_free(team->t.t_set_sched);
03543         __kmp_free(team->t.t_implicit_task_taskdata);
03544         #else
03545         __kmp_free(team->t.t_set_nproc);
03546         __kmp_free(team->t.t_set_dynamic);
03547         __kmp_free(team->t.t_set_nested);
03548         __kmp_free(team->t.t_set_blocktime);
03549         __kmp_free(team->t.t_set_bt_intervals);
03550         __kmp_free(team->t.t_set_bt_set);
03551     #  endif // OMP_30_ENABLED
03552     #endif
03553     team->t.t_threads     = NULL;
03554     team->t.t_disp_buffer = NULL;
03555     team->t.t_dispatch    = NULL;
03556 #if OMP_30_ENABLED
03557     //team->t.t_set_sched   = 0;
03558     //team->t.t_set_max_active_levels = 0;
03559     team->t.t_implicit_task_taskdata = 0;
03560 #else
03561     team->t.t_set_nproc   = 0;
03562     team->t.t_set_dynamic = 0;
03563     team->t.t_set_nested  = 0;
03564     team->t.t_set_blocktime   = 0;
03565     team->t.t_set_bt_intervals = 0;
03566     team->t.t_set_bt_set  = 0;
03567 #endif // OMP_30_ENABLED
03568 }
03569 
03570 static void
03571 __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {
03572     kmp_info_t **oldThreads = team->t.t_threads;
03573 
03574     #if !KMP_USE_POOLED_ALLOC
03575         __kmp_free(team->t.t_disp_buffer);
03576         __kmp_free(team->t.t_dispatch);
03577         #if OMP_30_ENABLED
03578         //__kmp_free(team->t.t_set_max_active_levels);
03579         //__kmp_free(team->t.t_set_sched);
03580         __kmp_free(team->t.t_implicit_task_taskdata);
03581         #else
03582         __kmp_free(team->t.t_set_nproc);
03583         __kmp_free(team->t.t_set_dynamic);
03584         __kmp_free(team->t.t_set_nested);
03585         __kmp_free(team->t.t_set_blocktime);
03586         __kmp_free(team->t.t_set_bt_intervals);
03587         __kmp_free(team->t.t_set_bt_set);
03588     #  endif // OMP_30_ENABLED
03589     #endif
03590     __kmp_allocate_team_arrays(team, max_nth);
03591 
03592     memcpy(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
03593 
03594     __kmp_free(oldThreads);
03595 }
03596 
03597 static kmp_internal_control_t
03598 __kmp_get_global_icvs( void ) {
03599 
03600 #if OMP_30_ENABLED
03601     kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
03602 #endif /* OMP_30_ENABLED */
03603 
03604 #if OMP_40_ENABLED
03605     KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
03606 #endif /* OMP_40_ENABLED */
03607 
03608     kmp_internal_control_t g_icvs = {
03609       0,                            //int serial_nesting_level; //corresponds to the value of the th_team_serialized field
03610       __kmp_dflt_nested,            //int nested;               //internal control for nested parallelism (per thread)
03611       __kmp_global.g.g_dynamic,                                 //internal control for dynamic adjustment of threads (per thread)
03612       __kmp_dflt_team_nth,
03613                                     //int nproc;                //internal control for # of threads for next parallel region (per thread)
03614                                     // (use a max ub on value if __kmp_parallel_initialize not called yet)
03615       __kmp_dflt_blocktime,         //int blocktime;            //internal control for blocktime
03616       __kmp_bt_intervals,           //int bt_intervals;         //internal control for blocktime intervals
03617       __kmp_env_blocktime,          //int bt_set;               //internal control for whether blocktime is explicitly set
03618 #if OMP_30_ENABLED
03619       __kmp_dflt_max_active_levels, //int max_active_levels;    //internal control for max_active_levels
03620       r_sched,                      //kmp_r_sched_t sched;      //internal control for runtime schedule {sched,chunk} pair
03621 #endif /* OMP_30_ENABLED */
03622 #if OMP_40_ENABLED
03623       __kmp_nested_proc_bind.bind_types[0],
03624 #endif /* OMP_40_ENABLED */
03625       NULL                          //struct kmp_internal_control *next;
03626     };
03627 
03628     return g_icvs;
03629 }
03630 
03631 static kmp_internal_control_t
03632 __kmp_get_x_global_icvs( const kmp_team_t *team ) {
03633 
03634     #if OMP_30_ENABLED
03635     kmp_internal_control_t gx_icvs;
03636     gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls
03637     copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
03638     gx_icvs.next = NULL;
03639     #else
03640     kmp_internal_control_t gx_icvs =
03641     {
03642       0,
03643       team->t.t_set_nested[0],
03644       team->t.t_set_dynamic[0],
03645       team->t.t_set_nproc[0],
03646       team->t.t_set_blocktime[0],
03647       team->t.t_set_bt_intervals[0],
03648       team->t.t_set_bt_set[0],
03649       NULL                          //struct kmp_internal_control *next;
03650     };
03651     #endif // OMP_30_ENABLED
03652 
03653     return gx_icvs;
03654 }
03655 
03656 static void
03657 __kmp_initialize_root( kmp_root_t *root )
03658 {
03659     int           f;
03660     kmp_team_t   *root_team;
03661     kmp_team_t   *hot_team;
03662     size_t        disp_size, dispatch_size, bar_size;
03663     int           hot_team_max_nth;
03664 #if OMP_30_ENABLED
03665     kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals
03666     kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
03667 #endif // OMP_30_ENABLED
03668     KMP_DEBUG_ASSERT( root );
03669     KMP_ASSERT( ! root->r.r_begin );
03670 
03671     /* setup the root state structure */
03672     __kmp_init_lock( &root->r.r_begin_lock );
03673     root -> r.r_begin        = FALSE;
03674     root -> r.r_active       = FALSE;
03675     root -> r.r_in_parallel  = 0;
03676     root -> r.r_blocktime    = __kmp_dflt_blocktime;
03677     root -> r.r_nested       = __kmp_dflt_nested;
03678 
03679     /* setup the root team for this task */
03680     /* allocate the root team structure */
03681     KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) );
03682 
03683     int gtid = __kmp_gtid_get_specific();
03684     kmp_info_t *ti = ompt_get_thread_gtid(gtid);
03685     ompt_parallel_id_t ompt_parallel_id_1 = __ompt_parallel_id_new(ti, gtid);
03686 
03687     root_team =
03688         __kmp_allocate_team(
03689             root,
03690             1,                                                         // new_nproc
03691             1,                                                         // max_nproc
03692 #if OMPT_SUPPORT
03693           ompt_parallel_id_1,
03694 #endif
03695 #if OMP_40_ENABLED
03696             __kmp_nested_proc_bind.bind_types[0],
03697 #endif
03698 #if OMP_30_ENABLED
03699             &r_icvs,
03700 #else
03701             __kmp_dflt_team_nth_ub,                                    // num_treads
03702             __kmp_global.g.g_dynamic,                                  // dynamic
03703             __kmp_dflt_nested,                                         // nested
03704             __kmp_dflt_blocktime,                                      // blocktime
03705             __kmp_bt_intervals,                                        // bt_intervals
03706             __kmp_env_blocktime,                                       // bt_set
03707 #endif // OMP_30_ENABLED
03708             0                                                          // argc
03709         );
03710 
03711     KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) );
03712 
03713     root -> r.r_root_team = root_team;
03714     root_team -> t.t_control_stack_top = NULL;
03715 
03716     /* initialize root team */
03717     root_team -> t.t_threads[0] = NULL;
03718     root_team -> t.t_nproc      = 1;
03719     root_team -> t.t_serialized = 1;
03720 #if OMP_30_ENABLED
03721     // TODO???: root_team -> t.t_max_active_levels = __kmp_dflt_max_active_levels;
03722     root_team -> t.t_sched.r_sched_type = r_sched.r_sched_type;
03723     root_team -> t.t_sched.chunk        = r_sched.chunk;
03724 #endif // OMP_30_ENABLED
03725     KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
03726                     root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
03727 
03728     /* setup the  hot team for this task */
03729     /* allocate the hot team structure */
03730     KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) );
03731 
03732     ompt_parallel_id_t ompt_parallel_id_2 = __ompt_parallel_id_new(ti, gtid);
03733 
03734     hot_team =
03735         __kmp_allocate_team(
03736             root,
03737             1,                                                         // new_nproc
03738             __kmp_dflt_team_nth_ub * 2,                                // max_nproc
03739 #if OMPT_SUPPORT
03740           ompt_parallel_id_2,
03741 #endif
03742 #if OMP_40_ENABLED
03743             __kmp_nested_proc_bind.bind_types[0],
03744 #endif
03745 #if OMP_30_ENABLED
03746             &r_icvs,
03747 #else
03748             __kmp_dflt_team_nth_ub,                                    // num_treads
03749             __kmp_global.g.g_dynamic,                                  // dynamic
03750             __kmp_dflt_nested,                                         // nested
03751             __kmp_dflt_blocktime,                                      // blocktime
03752             __kmp_bt_intervals,                                        // bt_intervals
03753             __kmp_env_blocktime,                                       // bt_set
03754 #endif // OMP_30_ENABLED
03755             0                                                          // argc
03756         );
03757     KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
03758 
03759     root -> r.r_hot_team = hot_team;
03760     root_team -> t.t_control_stack_top = NULL;
03761 
03762     /* first-time initialization */
03763     hot_team -> t.t_parent = root_team;
03764 
03765     /* initialize hot team */
03766     hot_team_max_nth = hot_team->t.t_max_nproc;
03767     for ( f = 0; f < hot_team_max_nth; ++ f ) {
03768         hot_team -> t.t_threads[ f ] = NULL;
03769     }; // for
03770     hot_team -> t.t_nproc = 1;
03771 #if OMP_30_ENABLED
03772     // TODO???: hot_team -> t.t_max_active_levels = __kmp_dflt_max_active_levels;
03773     hot_team -> t.t_sched.r_sched_type = r_sched.r_sched_type;
03774     hot_team -> t.t_sched.chunk        = r_sched.chunk;
03775 #endif // OMP_30_ENABLED
03776 #if KMP_MIC
03777     hot_team -> t.t_size_changed = 0;
03778 #endif
03779 
03780 }
03781 
03782 #ifdef KMP_DEBUG
03783 
03784 
03785 typedef struct kmp_team_list_item {
03786     kmp_team_p const *           entry;
03787     struct kmp_team_list_item *  next;
03788 } kmp_team_list_item_t;
03789 typedef kmp_team_list_item_t * kmp_team_list_t;
03790 
03791 
03792 static void
03793 __kmp_print_structure_team_accum(    // Add team to list of teams.
03794     kmp_team_list_t     list,        // List of teams.
03795     kmp_team_p const *  team         // Team to add.
03796 ) {
03797 
03798     // List must terminate with item where both entry and next are NULL.
03799     // Team is added to the list only once.
03800     // List is sorted in ascending order by team id.
03801     // Team id is *not* a key.
03802 
03803     kmp_team_list_t l;
03804 
03805     KMP_DEBUG_ASSERT( list != NULL );
03806     if ( team == NULL ) {
03807         return;
03808     }; // if
03809 
03810     __kmp_print_structure_team_accum( list, team->t.t_parent );
03811     __kmp_print_structure_team_accum( list, team->t.t_next_pool );
03812 
03813     // Search list for the team.
03814     l = list;
03815     while ( l->next != NULL && l->entry != team ) {
03816         l = l->next;
03817     }; // while
03818     if ( l->next != NULL ) {
03819         return;  // Team has been added before, exit.
03820     }; // if
03821 
03822     // Team is not found. Search list again for insertion point.
03823     l = list;
03824     while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
03825         l = l->next;
03826     }; // while
03827 
03828     // Insert team.
03829     {
03830         kmp_team_list_item_t * item =
03831             (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof(  kmp_team_list_item_t ) );
03832         * item = * l;
03833         l->entry = team;
03834         l->next  = item;
03835     }
03836 
03837 }
03838 
03839 static void
03840 __kmp_print_structure_team(
03841     char const *       title,
03842     kmp_team_p const * team
03843 
03844 ) {
03845     __kmp_printf( "%s", title );
03846     if ( team != NULL ) {
03847         __kmp_printf( "%2x %p\n", team->t.t_id, team );
03848     } else {
03849         __kmp_printf( " - (nil)\n" );
03850     }; // if
03851 }
03852 
03853 static void
03854 __kmp_print_structure_thread(
03855     char const *       title,
03856     kmp_info_p const * thread
03857 
03858 ) {
03859     __kmp_printf( "%s", title );
03860     if ( thread != NULL ) {
03861         __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
03862     } else {
03863         __kmp_printf( " - (nil)\n" );
03864     }; // if
03865 }
03866 
03867 static void
03868 __kmp_print_structure(
03869     void
03870 ) {
03871 
03872     kmp_team_list_t list;
03873 
03874     // Initialize list of teams.
03875     list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) );
03876     list->entry = NULL;
03877     list->next  = NULL;
03878 
03879     __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
03880     {
03881         int gtid;
03882         for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
03883             __kmp_printf( "%2d", gtid );
03884             if ( __kmp_threads != NULL ) {
03885                 __kmp_printf( " %p", __kmp_threads[ gtid ] );
03886             }; // if
03887             if ( __kmp_root != NULL ) {
03888                 __kmp_printf( " %p", __kmp_root[ gtid ] );
03889             }; // if
03890             __kmp_printf( "\n" );
03891         }; // for gtid
03892     }
03893 
03894     // Print out __kmp_threads array.
03895     __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" );
03896     if ( __kmp_threads != NULL ) {
03897         int gtid;
03898         for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
03899             kmp_info_t const * thread = __kmp_threads[ gtid ];
03900             if ( thread != NULL ) {
03901                 __kmp_printf( "GTID %2d %p:\n", gtid, thread );
03902                 __kmp_printf(                 "    Our Root:        %p\n", thread->th.th_root );
03903                 __kmp_print_structure_team(   "    Our Team:     ",        thread->th.th_team );
03904                 __kmp_print_structure_team(   "    Serial Team:  ",        thread->th.th_serial_team );
03905                 __kmp_printf(                 "    Threads:      %2d\n",   thread->th.th_team_nproc );
03906                 __kmp_print_structure_thread( "    Master:       ",        thread->th.th_team_master );
03907                 __kmp_printf(                 "    Serialized?:  %2d\n",   thread->th.th_team_serialized );
03908                 __kmp_printf(                 "    Set NProc:    %2d\n",   thread->th.th_set_nproc );
03909 #if OMP_40_ENABLED
03910                 __kmp_printf(                 "    Set Proc Bind: %2d\n",  thread->th.th_set_proc_bind );
03911 #endif
03912                 __kmp_print_structure_thread( "    Next in pool: ",        thread->th.th_next_pool );
03913                 __kmp_printf( "\n" );
03914                 __kmp_print_structure_team_accum( list, thread->th.th_team );
03915                 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
03916             }; // if
03917         }; // for gtid
03918     } else {
03919         __kmp_printf( "Threads array is not allocated.\n" );
03920     }; // if
03921 
03922     // Print out __kmp_root array.
03923     __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" );
03924     if ( __kmp_root != NULL ) {
03925         int gtid;
03926         for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
03927             kmp_root_t const * root = __kmp_root[ gtid ];
03928             if ( root != NULL ) {
03929                 __kmp_printf( "GTID %2d %p:\n", gtid, root );
03930                 __kmp_print_structure_team(   "    Root Team:    ",      root->r.r_root_team );
03931                 __kmp_print_structure_team(   "    Hot Team:     ",      root->r.r_hot_team );
03932                 __kmp_print_structure_thread( "    Uber Thread:  ",      root->r.r_uber_thread );
03933                 __kmp_printf(                 "    Active?:      %2d\n", root->r.r_active );
03934                 __kmp_printf(                 "    Nested?:      %2d\n", root->r.r_nested );
03935                 __kmp_printf(                 "    In Parallel:  %2d\n", root->r.r_in_parallel );
03936                 __kmp_printf( "\n" );
03937                 __kmp_print_structure_team_accum( list, root->r.r_root_team );
03938                 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
03939             }; // if
03940         }; // for gtid
03941     } else {
03942         __kmp_printf( "Ubers array is not allocated.\n" );
03943     }; // if
03944 
03945     __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" );
03946     while ( list->next != NULL ) {
03947         kmp_team_p const * team = list->entry;
03948         int i;
03949         __kmp_printf( "Team %2x %p:\n", team->t.t_id, team );
03950         __kmp_print_structure_team( "    Parent Team:      ",      team->t.t_parent );
03951         __kmp_printf(               "    Master TID:       %2d\n", team->t.t_master_tid );
03952         __kmp_printf(               "    Max threads:      %2d\n", team->t.t_max_nproc );
03953         __kmp_printf(               "    Levels of serial: %2d\n", team->t.t_serialized );
03954         __kmp_printf(               "    Number threads:   %2d\n", team->t.t_nproc );
03955         for ( i = 0; i < team->t.t_nproc; ++ i ) {
03956             __kmp_printf(           "    Thread %2d:      ", i );
03957             __kmp_print_structure_thread( "", team->t.t_threads[ i ] );
03958         }; // for i
03959         __kmp_print_structure_team( "    Next in pool:     ",      team->t.t_next_pool );
03960         __kmp_printf( "\n" );
03961         list = list->next;
03962     }; // while
03963 
03964     // Print out __kmp_thread_pool and __kmp_team_pool.
03965     __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" );
03966     __kmp_print_structure_thread(   "Thread pool:          ", (kmp_info_t *)__kmp_thread_pool );
03967     __kmp_print_structure_team(     "Team pool:            ", (kmp_team_t *)__kmp_team_pool );
03968     __kmp_printf( "\n" );
03969 
03970     // Free team list.
03971     while ( list != NULL ) {
03972         kmp_team_list_item_t * item = list;
03973         list = list->next;
03974         KMP_INTERNAL_FREE( item );
03975     }; // while
03976 
03977 }
03978 
03979 #endif
03980 
03981 
03982 //---------------------------------------------------------------------------
03983 //  Stuff for per-thread fast random number generator
03984 //  Table of primes
03985 
03986 static const unsigned __kmp_primes[] = {
03987   0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
03988   0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
03989   0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
03990   0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
03991   0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
03992   0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
03993   0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
03994   0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
03995   0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
03996   0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
03997   0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
03998   0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
03999   0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
04000   0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
04001   0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
04002   0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
04003 };
04004 
04005 //---------------------------------------------------------------------------
04006 //  __kmp_get_random: Get a random number using a linear congruential method.
04007 
04008 unsigned short
04009 __kmp_get_random( kmp_info_t * thread )
04010 {
04011   unsigned x = thread -> th.th_x;
04012   unsigned short r = x>>16;
04013 
04014   thread -> th.th_x = x*thread->th.th_a+1;
04015 
04016   KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",
04017          thread->th.th_info.ds.ds_tid, r) );
04018 
04019   return r;
04020 }
04021 //--------------------------------------------------------
04022 // __kmp_init_random: Initialize a random number generator
04023 
04024 void
04025 __kmp_init_random( kmp_info_t * thread )
04026 {
04027   unsigned seed = thread->th.th_info.ds.ds_tid;
04028 
04029   thread -> th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))];
04030   thread -> th.th_x = (seed+1)*thread->th.th_a+1;
04031   KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread -> th.th_a) );
04032 }
04033 
04034 
04035 #if KMP_OS_WINDOWS
04036 /* reclaim array entries for root threads that are already dead, returns number reclaimed */
04037 static int
04038 __kmp_reclaim_dead_roots(void) {
04039     int i, r = 0;
04040 
04041     for(i = 0; i < __kmp_threads_capacity; ++i) {
04042         if( KMP_UBER_GTID( i ) &&
04043           !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
04044           !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state
04045             r += __kmp_unregister_root_other_thread(i);
04046         }
04047     }
04048     return r;
04049 }
04050 #endif
04051 
04052 /*
04053    This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of
04054    free entries generated.
04055 
04056    For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are
04057    already dead.
04058 
04059    On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate
04060    update to __kmp_threads_capacity.  Array capacity is increased by doubling with clipping to
04061     __kmp_tp_capacity, if threadprivate cache array has been created.
04062    Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.
04063 
04064    After any dead root reclamation, if the clipping value allows array expansion to result in the generation
04065    of a total of nWish free slots, the function does that expansion.  If not, but the clipping value allows
04066    array expansion to result in the generation of a total of nNeed free slots, the function does that expansion.
04067    Otherwise, nothing is done beyond the possible initial root thread reclamation.  However, if nNeed is zero,
04068    a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create
04069    as many free slots as possible up to nWish.
04070 
04071    If any argument is negative, the behavior is undefined.
04072 */
04073 static int
04074 __kmp_expand_threads(int nWish, int nNeed) {
04075     int added = 0;
04076     int old_tp_cached;
04077     int __kmp_actual_max_nth;
04078 
04079     if(nNeed > nWish) /* normalize the arguments */
04080         nWish = nNeed;
04081 #if KMP_OS_WINDOWS && !defined GUIDEDLL_EXPORTS 
04082 /* only for Windows static library */
04083     /* reclaim array entries for root threads that are already dead */
04084     added = __kmp_reclaim_dead_roots();
04085 
04086     if(nNeed) {
04087         nNeed -= added;
04088         if(nNeed < 0)
04089             nNeed = 0;
04090     }
04091     if(nWish) {
04092         nWish -= added;
04093         if(nWish < 0)
04094             nWish = 0;
04095     }
04096 #endif
04097     if(nWish <= 0)
04098         return added;
04099 
04100     while(1) {
04101         int nTarget;
04102         int minimumRequiredCapacity;
04103         int newCapacity;
04104         kmp_info_t **newThreads;
04105         kmp_root_t **newRoot;
04106 
04107         //
04108         // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth.
04109         // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth
04110         // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may
04111         // become > __kmp_max_nth in one of two ways:
04112         //
04113         // 1) The initialization thread (gtid = 0) exits.  __kmp_threads[0]
04114         //    may not be resused by another thread, so we may need to increase
04115         //    __kmp_threads_capacity to __kmp_max_threads + 1.
04116         //
04117         // 2) New foreign root(s) are encountered.  We always register new
04118         //    foreign roots.  This may cause a smaller # of threads to be
04119         //    allocated at subsequent parallel regions, but the worker threads
04120         //    hang around (and eventually go to sleep) and need slots in the
04121         //    __kmp_threads[] array.
04122         //
04123         // Anyway, that is the reason for moving the check to see if
04124         // __kmp_max_threads was exceeded into __kmp_reseerve_threads()
04125         // instead of having it performed here. -BB
04126         //
04127         old_tp_cached = __kmp_tp_cached;
04128         __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
04129         KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
04130 
04131         /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */
04132         nTarget = nWish;
04133         if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
04134             /* can't fulfil nWish, so try nNeed */
04135             if(nNeed) {
04136                 nTarget = nNeed;
04137                 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
04138                     /* possible expansion too small -- give up */
04139                     break;
04140                 }
04141             } else {
04142                 /* best-effort */
04143                 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
04144                 if(!nTarget) {
04145                     /* can expand at all -- give up */
04146                     break;
04147                 }
04148             }
04149         }
04150         minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
04151 
04152         newCapacity = __kmp_threads_capacity;
04153         do{
04154             newCapacity =
04155                 newCapacity <= (__kmp_actual_max_nth >> 1) ?
04156                 (newCapacity << 1) :
04157                 __kmp_actual_max_nth;
04158         } while(newCapacity < minimumRequiredCapacity);
04159         newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
04160         newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity );
04161         memcpy(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*));
04162         memcpy(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*));
04163         memset(newThreads + __kmp_threads_capacity, 0,
04164                (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*));
04165         memset(newRoot + __kmp_threads_capacity, 0,
04166                (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*));
04167 
04168         if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
04169             /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache
04170                while we were allocating the expanded array, and our new capacity is larger than the threadprivate
04171                cache capacity, so we should deallocate the expanded arrays and try again.  This is the first check
04172                of a double-check pair.
04173             */
04174             __kmp_free(newThreads);
04175             continue; /* start over and try again */
04176         }
04177         __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
04178         if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
04179             /* Same check as above, but this time with the lock so we can be sure if we can succeed. */
04180             __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
04181             __kmp_free(newThreads);
04182             continue; /* start over and try again */
04183         } else {
04184             /* success */
04185             // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated. 
04186             //
04187             // I don't want to put a TCR_PTR macro around every read of the
04188             // __kmp_threads array, so just ingore this write of it.
04189             //
04190             TC_IGNORE({ *(kmp_info_t**volatile*)&__kmp_threads = newThreads; });
04191             TC_IGNORE({ *(kmp_root_t**volatile*)&__kmp_root = newRoot; });
04192             added += newCapacity - __kmp_threads_capacity;
04193             TC_IGNORE({ *(volatile int*)&__kmp_threads_capacity = newCapacity; });
04194             __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
04195             break; /* succeded, so we can exit the loop */
04196         }
04197     }
04198     return added;
04199 }
04200 
04201 /* register the current thread as a root thread and obtain our gtid */
04202 /* we must have the __kmp_initz_lock held at this point */
04203 /* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */
04204 int
04205 __kmp_register_root( int initial_thread )
04206 {
04207     kmp_info_t *root_thread;
04208     kmp_root_t *root;
04209     int         gtid;
04210     int         capacity;
04211     __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
04212     KA_TRACE( 20, ("__kmp_register_root: entered\n"));
04213     KMP_MB();
04214 
04215 
04216     /*
04217         2007-03-02:
04218 
04219         If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one,
04220         "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may
04221         return false (that means there is at least one empty slot in __kmp_threads array), but it
04222         is possible the only free slot is #0, which is reserved for initial thread and so cannot be
04223         used for this one. Following code workarounds this bug.
04224 
04225         However, right solution seems to be not reserving slot #0 for initial thread because:
04226             (1) there is no magic in slot #0,
04227             (2) we cannot detect initial thread reliably (the first thread which does serial
04228                 initialization may be not a real initial thread).
04229     */
04230     capacity = __kmp_threads_capacity;
04231     if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
04232         -- capacity;
04233     }; // if
04234 
04235     /* see if there are too many threads */
04236     if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
04237         if ( __kmp_tp_cached ) {
04238             __kmp_msg(
04239                 kmp_ms_fatal,
04240                 KMP_MSG( CantRegisterNewThread ),
04241                 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
04242                 KMP_HNT( PossibleSystemLimitOnThreads ),
04243                 __kmp_msg_null
04244             );
04245         }
04246         else {
04247             __kmp_msg(
04248                 kmp_ms_fatal,
04249                 KMP_MSG( CantRegisterNewThread ),
04250                 KMP_HNT( SystemLimitOnThreads ),
04251                 __kmp_msg_null
04252             );
04253         }
04254     }; // if
04255 
04256     /* find an available thread slot */
04257     /* Don't reassign the zero slot since we need that to only be used by initial
04258        thread */
04259     for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ );
04260     KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
04261     KMP_ASSERT( gtid < __kmp_threads_capacity );
04262 
04263     /* update global accounting */
04264     __kmp_all_nth ++;
04265     TCW_4(__kmp_nth, __kmp_nth + 1);
04266 
04267     //
04268     // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
04269     // for low numbers of procs, and method #2 (keyed API call) for higher
04270     // numbers of procs.
04271     //
04272     if ( __kmp_adjust_gtid_mode ) {
04273         if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
04274             if ( TCR_4(__kmp_gtid_mode) != 2) {
04275                 TCW_4(__kmp_gtid_mode, 2);
04276             }
04277         }
04278         else {
04279             if (TCR_4(__kmp_gtid_mode) != 1 ) {
04280                 TCW_4(__kmp_gtid_mode, 1);
04281             }
04282         }
04283     }
04284 
04285 #ifdef KMP_ADJUST_BLOCKTIME
04286     /* Adjust blocktime to zero if necessary            */
04287     /* Middle initialization might not have ocurred yet */
04288     if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
04289         if ( __kmp_nth > __kmp_avail_proc ) {
04290             __kmp_zero_bt = TRUE;
04291         }
04292     }
04293 #endif /* KMP_ADJUST_BLOCKTIME */
04294 
04295     /* setup this new hierarchy */
04296     if( ! ( root = __kmp_root[gtid] )) {
04297         root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) );
04298         KMP_DEBUG_ASSERT( ! root->r.r_root_team );
04299     }
04300 
04301     __kmp_initialize_root( root );
04302 
04303     /* setup new root thread structure */
04304     if( root -> r.r_uber_thread ) {
04305         root_thread = root -> r.r_uber_thread;
04306     } else {
04307         root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
04308         if ( __kmp_storage_map ) {
04309             __kmp_print_thread_storage_map( root_thread, gtid );
04310         }
04311         root_thread -> th.th_info .ds.ds_gtid = gtid;
04312         root_thread -> th.th_root =  root;
04313         if( __kmp_env_consistency_check ) {
04314             root_thread -> th.th_cons = __kmp_allocate_cons_stack( gtid );
04315         }
04316         #if USE_FAST_MEMORY
04317             __kmp_initialize_fast_memory( root_thread );
04318         #endif /* USE_FAST_MEMORY */
04319 
04320         #if KMP_USE_BGET
04321             KMP_DEBUG_ASSERT( root_thread -> th.th_local.bget_data == NULL );
04322             __kmp_initialize_bget( root_thread );
04323         #endif
04324         __kmp_init_random( root_thread );  // Initialize random number generator
04325     }
04326 
04327     /* setup the serial team held in reserve by the root thread */
04328     if( ! root_thread -> th.th_serial_team ) {
04329         #if OMP_30_ENABLED
04330             kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
04331         #endif // OMP_30_ENABLED
04332         KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) );
04333 
04334     kmp_info_t *ti = ompt_get_thread_gtid(gtid);
04335     ompt_parallel_id_t ompt_parallel_id_3 = __ompt_parallel_id_new(ti, gtid);
04336 
04337         root_thread -> th.th_serial_team = __kmp_allocate_team( root, 1, 1,
04338 #if OMPT_SUPPORT
04339           ompt_parallel_id_3,
04340 #endif
04341 #if OMP_40_ENABLED
04342           proc_bind_default,
04343 #endif
04344 #if OMP_30_ENABLED
04345           &r_icvs,
04346 #else
04347           __kmp_dflt_team_nth_ub,
04348           __kmp_global.g.g_dynamic,
04349           __kmp_dflt_nested,
04350           __kmp_dflt_blocktime,
04351           __kmp_bt_intervals,
04352           __kmp_env_blocktime,
04353 #endif // OMP_30_ENABLED
04354           0 );
04355     }
04356     KMP_ASSERT( root_thread -> th.th_serial_team );
04357     KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n",
04358       root_thread -> th.th_serial_team ) );
04359 
04360     /* drop root_thread into place */
04361     TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
04362 
04363     root -> r.r_root_team -> t.t_threads[0] = root_thread;
04364     root -> r.r_hot_team  -> t.t_threads[0] = root_thread;
04365     root_thread -> th.th_serial_team -> t.t_threads[0] = root_thread;
04366     root -> r.r_uber_thread = root_thread;
04367 
04368     /* initialize the thread, get it ready to go */
04369     __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
04370 
04371     /* prepare the master thread for get_gtid() */
04372     __kmp_gtid_set_specific( gtid );
04373     #ifdef KMP_TDATA_GTID
04374         __kmp_gtid = gtid;
04375     #endif
04376     __kmp_create_worker( gtid, root_thread, __kmp_stksize );
04377     KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
04378     TCW_4(__kmp_init_gtid, TRUE);
04379 
04380     KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
04381                     gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
04382                     root -> r.r_hot_team -> t.t_id, 0, KMP_INIT_BARRIER_STATE,
04383                     KMP_INIT_BARRIER_STATE ) );
04384     { // Initialize barrier data.
04385         int b;
04386         for ( b = 0; b < bs_last_barrier; ++ b ) {
04387             root_thread->th.th_bar[ b ].bb.b_arrived        = KMP_INIT_BARRIER_STATE;
04388         }; // for
04389     }
04390     KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
04391 
04392 
04393 #if KMP_OS_WINDOWS || KMP_OS_LINUX
04394     if ( TCR_4(__kmp_init_middle) ) {
04395         __kmp_affinity_set_init_mask( gtid, TRUE );
04396     }
04397 #endif /* KMP_OS_WINDOWS || KMP_OS_LINUX */
04398 
04399     __kmp_root_counter ++;
04400 
04401     KMP_MB();
04402     __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
04403 
04404     return gtid;
04405 }
04406 
04407 /* Resets a root thread and clear its root and hot teams.
04408    Returns the number of __kmp_threads entries directly and indirectly freed.
04409 */
04410 static int
04411 __kmp_reset_root(int gtid, kmp_root_t *root)
04412 {
04413     kmp_team_t * root_team = root->r.r_root_team;
04414     kmp_team_t * hot_team  = root->r.r_hot_team;
04415     int          n         = hot_team->t.t_nproc;
04416     int i;
04417 
04418     KMP_DEBUG_ASSERT( ! root->r.r_active );
04419 
04420     root->r.r_root_team = NULL;
04421     root->r.r_hot_team  = NULL;
04422         // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call
04423         // to __kmp_free_team().
04424     __kmp_free_team( root, root_team );
04425     __kmp_free_team( root, hot_team );
04426 
04427 #if OMP_30_ENABLED
04428     //
04429     // Before we can reap the thread, we need to make certain that all
04430     // other threads in the teams that had this root as ancestor have stopped trying to steal tasks.
04431     //
04432     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
04433         __kmp_wait_to_unref_task_teams();
04434     }
04435 #endif /* OMP_30_ENABLED */
04436 
04437     #if KMP_OS_WINDOWS
04438         /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */
04439         KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n",
04440             (LPVOID)&(root->r.r_uber_thread->th),
04441             root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
04442         __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
04443     #endif /* KMP_OS_WINDOWS */
04444 
04445     TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
04446     __kmp_reap_thread( root->r.r_uber_thread, 1 );
04447 
04448         // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing.
04449     root->r.r_uber_thread = NULL;
04450     /* mark root as no longer in use */
04451     root -> r.r_begin = FALSE;
04452 
04453     return n;
04454 }
04455 
04456 void
04457 __kmp_unregister_root_current_thread( int gtid )
04458 {
04459     kmp_root_t *root = __kmp_root[gtid];
04460 
04461     KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
04462     KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
04463     KMP_ASSERT( KMP_UBER_GTID( gtid ));
04464     KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
04465     KMP_ASSERT( root->r.r_active == FALSE );
04466 
04467     /* this lock should be ok, since unregister_root_current_thread is never called during
04468      * and abort, only during a normal close.  furthermore, if you have the
04469      * forkjoin lock, you should never try to get the initz lock */
04470 
04471     __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
04472 
04473     KMP_MB();
04474 
04475     __kmp_reset_root(gtid, root);
04476 
04477     /* free up this thread slot */
04478     __kmp_gtid_set_specific( KMP_GTID_DNE );
04479 #ifdef KMP_TDATA_GTID
04480     __kmp_gtid = KMP_GTID_DNE;
04481 #endif
04482 
04483     KMP_MB();
04484     KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
04485 
04486     __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
04487 }
04488 
04489 /* __kmp_forkjoin_lock must be already held
04490    Unregisters a root thread that is not the current thread.  Returns the number of
04491    __kmp_threads entries freed as a result.
04492  */
04493 static int
04494 __kmp_unregister_root_other_thread( int gtid )
04495 {
04496     kmp_root_t *root = __kmp_root[gtid];
04497     int r;
04498 
04499     KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
04500     KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
04501     KMP_ASSERT( KMP_UBER_GTID( gtid ));
04502     KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
04503     KMP_ASSERT( root->r.r_active == FALSE );
04504 
04505     r = __kmp_reset_root(gtid, root);
04506     KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
04507     return r;
04508 }
04509 
04510 #if OMP_30_ENABLED
04511 
04512 #if KMP_DEBUG
04513 void __kmp_task_info() {
04514 
04515     kmp_int32 gtid       = __kmp_entry_gtid();
04516     kmp_int32 tid        = __kmp_tid_from_gtid( gtid );
04517     kmp_info_t *this_thr = __kmp_threads[ gtid ];
04518     kmp_team_t *steam    = this_thr -> th.th_serial_team;
04519     kmp_team_t *team     = this_thr -> th.th_team;
04520 
04521     __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
04522         gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
04523 }
04524 #endif // KMP_DEBUG
04525 
04526 #endif // OMP_30_ENABLED
04527 
04528 /* TODO optimize with one big memclr, take out what isn't needed,
04529  * split responsility to workers as much as possible, and delay
04530  * initialization of features as much as possible  */
04531 static void
04532 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid )
04533 {
04534     /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker
04535      * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */
04536 
04537     KMP_DEBUG_ASSERT( this_thr != NULL );
04538     KMP_DEBUG_ASSERT( this_thr -> th.th_serial_team );
04539     KMP_DEBUG_ASSERT( team );
04540     KMP_DEBUG_ASSERT( team -> t.t_threads  );
04541     KMP_DEBUG_ASSERT( team -> t.t_dispatch );
04542     KMP_DEBUG_ASSERT( team -> t.t_threads[0] );
04543     KMP_DEBUG_ASSERT( team -> t.t_threads[0] -> th.th_root );
04544 
04545     KMP_MB();
04546 
04547     TCW_SYNC_PTR(this_thr->th.th_team, team);
04548 
04549     this_thr->th.th_info.ds.ds_tid  = tid;
04550     this_thr->th.th_set_nproc       = 0;
04551 #if OMP_40_ENABLED
04552     this_thr->th.th_set_proc_bind   = proc_bind_default;
04553 # if (KMP_OS_WINDOWS || KMP_OS_LINUX)
04554     this_thr->th.th_new_place       = this_thr->th.th_current_place;
04555 # endif
04556 #endif
04557     this_thr->th.th_root            = team -> t.t_threads[0] -> th.th_root;
04558 
04559     /* setup the thread's cache of the team structure */
04560     this_thr->th.th_team_nproc      = team -> t.t_nproc;
04561     this_thr->th.th_team_master     = team -> t.t_threads[0];
04562     this_thr->th.th_team_serialized = team -> t.t_serialized;
04563     TCW_PTR(this_thr->th.th_sleep_loc, NULL);
04564 
04565 #if OMP_30_ENABLED
04566     KMP_DEBUG_ASSERT( team -> t.t_implicit_task_taskdata );
04567     this_thr->th.th_task_state = 0;
04568 
04569     KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
04570                     tid, gtid, this_thr, this_thr->th.th_current_task ) );
04571 
04572     __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
04573 
04574     KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
04575                     tid, gtid, this_thr, this_thr->th.th_current_task ) );
04576     // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()?
04577 #endif // OMP_30_ENABLED
04578 
04579     /* TODO no worksharing in speculative threads */
04580     this_thr -> th.th_dispatch      = &team -> t.t_dispatch[ tid ];
04581 
04582     this_thr->th.th_local.this_construct = 0;
04583     this_thr->th.th_local.last_construct = 0;
04584 
04585 #ifdef BUILD_TV
04586     this_thr->th.th_local.tv_data = 0;
04587 #endif
04588 
04589     if ( ! this_thr->th.th_pri_common ) {
04590         this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) );
04591         if ( __kmp_storage_map ) {
04592             __kmp_print_storage_map_gtid(
04593                 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
04594                 sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid
04595             );
04596         }; // if
04597         this_thr->th.th_pri_head = NULL;
04598     }; // if
04599 
04600     /* Initialize dynamic dispatch */
04601     {
04602         volatile kmp_disp_t *dispatch = this_thr -> th.th_dispatch;
04603         /*
04604          * Use team max_nproc since this will never change for the team.
04605          */
04606         size_t disp_size = sizeof( dispatch_private_info_t ) *
04607             ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF );
04608         KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
04609         KMP_ASSERT( dispatch );
04610         KMP_DEBUG_ASSERT( team -> t.t_dispatch );
04611         KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
04612 
04613         dispatch->th_disp_index = 0;
04614 
04615         if( ! dispatch -> th_disp_buffer )  {
04616             dispatch -> th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
04617 
04618             if ( __kmp_storage_map ) {
04619                 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
04620                                          &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ],
04621                                          disp_size, "th_%d.th_dispatch.th_disp_buffer "
04622                                          "(team_%d.t_dispatch[%d].th_disp_buffer)",
04623                                          gtid, team->t.t_id, gtid );
04624             }
04625         } else {
04626             memset( & dispatch -> th_disp_buffer[0], '\0', disp_size );
04627         }
04628 
04629         dispatch -> th_dispatch_pr_current = 0;
04630         dispatch -> th_dispatch_sh_current = 0;
04631 
04632         dispatch -> th_deo_fcn = 0;             /* ORDERED     */
04633         dispatch -> th_dxo_fcn = 0;             /* END ORDERED */
04634     }
04635 
04636     this_thr->th.th_next_pool = NULL;
04637 
04638     KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
04639     KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
04640 
04641 #if OMPT_SUPPORT
04642     this_thr->th.ompt_thread_info.state = ompt_state_overhead;
04643     this_thr->th.ompt_thread_info.wait_id = 0;
04644     this_thr->th.ompt_thread_info.next_task_id = 1;
04645     this_thr->th.ompt_thread_info.next_parallel_id = tid+1;
04646     //this_thr->th.ompt_thread_info.next_parallel_id = 1;
04647     this_thr->th.ompt_thread_info.lw_taskteam = NULL;
04648 #endif
04649 
04650     KMP_MB();
04651 }
04652 
04653 
04654 /* allocate a new thread for the requesting team.  this is only called from within a
04655  * forkjoin critical section.  we will first try to get an available thread from the
04656  * thread pool.  if none is available, we will fork a new one assuming we are able
04657  * to create a new one.  this should be assured, as the caller should check on this
04658  * first.
04659  */
04660 kmp_info_t *
04661 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid )
04662 {
04663     kmp_team_t  *serial_team;
04664     kmp_info_t  *new_thr;
04665     int          new_gtid;
04666 
04667     KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
04668     KMP_DEBUG_ASSERT( root && team );
04669     KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
04670     KMP_MB();
04671 
04672     /* first, try to get one from the thread pool */
04673     if ( __kmp_thread_pool ) {
04674 
04675         new_thr = (kmp_info_t*)__kmp_thread_pool;
04676         __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool;
04677         if ( new_thr == __kmp_thread_pool_insert_pt ) {
04678             __kmp_thread_pool_insert_pt = NULL;
04679         }
04680         TCW_4(new_thr->th.th_in_pool, FALSE);
04681         //
04682         // Don't touch th_active_in_pool or th_active.
04683         // The worker thread adjusts those flags as it sleeps/awakens.
04684         //
04685 
04686         __kmp_thread_pool_nth--;
04687 
04688         KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",
04689                     __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
04690         KMP_ASSERT(       ! new_thr -> th.th_team );
04691         KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
04692         KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
04693 
04694         /* setup the thread structure */
04695         __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
04696         KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
04697 
04698         TCW_4(__kmp_nth, __kmp_nth + 1);
04699 
04700 #ifdef KMP_ADJUST_BLOCKTIME
04701         /* Adjust blocktime back to zero if necessar      y */
04702         /* Middle initialization might not have ocurred yet */
04703         if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
04704             if ( __kmp_nth > __kmp_avail_proc ) {
04705                 __kmp_zero_bt = TRUE;
04706             }
04707         }
04708 #endif /* KMP_ADJUST_BLOCKTIME */
04709 
04710         KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",
04711                     __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
04712 
04713         KMP_MB();
04714         return new_thr;
04715     }
04716 
04717 
04718     /* no, well fork a new one */
04719     KMP_ASSERT( __kmp_nth    == __kmp_all_nth );
04720     KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
04721 
04722     //
04723     // If this is the first worker thread the RTL is creating, then also
04724     // launch the monitor thread.  We try to do this as early as possible.
04725     //
04726     if ( ! TCR_4( __kmp_init_monitor ) ) {
04727         __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
04728         if ( ! TCR_4( __kmp_init_monitor ) ) {
04729             KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) );
04730             TCW_4( __kmp_init_monitor, 1 );
04731             __kmp_create_monitor( & __kmp_monitor );
04732             KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) );
04733         }
04734         __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
04735     }
04736 
04737     KMP_MB();
04738     for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
04739         KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
04740     }
04741 
04742     /* allocate space for it. */
04743     new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) );
04744 
04745     TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
04746 
04747     if ( __kmp_storage_map ) {
04748         __kmp_print_thread_storage_map( new_thr, new_gtid );
04749     }
04750 
04751     /* add the reserve serialized team, initialized from the team's master thread */
04752     {
04753     #if OMP_30_ENABLED
04754     kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
04755     #endif // OMP_30_ENABLED
04756     KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) );
04757 
04758     kmp_info_t *ti = ompt_get_thread_gtid(new_gtid);
04759     ompt_parallel_id_t ompt_parallel_id_4 = __ompt_parallel_id_new(ti, new_gtid);
04760 
04761     new_thr -> th.th_serial_team = serial_team =
04762         (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
04763 #if OMPT_SUPPORT
04764           ompt_parallel_id_4,
04765 #endif
04766 #if OMP_40_ENABLED
04767                                            proc_bind_default,
04768 #endif
04769 #if OMP_30_ENABLED
04770                                            &r_icvs,
04771 #else
04772                                            team->t.t_set_nproc[0],
04773                                            team->t.t_set_dynamic[0],
04774                                            team->t.t_set_nested[0],
04775                                            team->t.t_set_blocktime[0],
04776                                            team->t.t_set_bt_intervals[0],
04777                                            team->t.t_set_bt_set[0],
04778 #endif // OMP_30_ENABLED
04779                                            0 );
04780     }
04781     KMP_ASSERT ( serial_team );
04782     serial_team -> t.t_threads[0] = new_thr;
04783     KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
04784       new_thr ) );
04785 
04786     /* setup the thread structures */
04787     __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
04788 
04789     #if USE_FAST_MEMORY
04790         __kmp_initialize_fast_memory( new_thr );
04791     #endif /* USE_FAST_MEMORY */
04792 
04793     #if KMP_USE_BGET
04794         KMP_DEBUG_ASSERT( new_thr -> th.th_local.bget_data == NULL );
04795         __kmp_initialize_bget( new_thr );
04796     #endif
04797 
04798     __kmp_init_random( new_thr );  // Initialize random number generator
04799 
04800     /* Initialize these only once when thread is grabbed for a team allocation */
04801     KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
04802                     __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
04803 
04804     new_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
04805     new_thr->th.th_bar[ bs_plain_barrier    ].bb.b_go = KMP_INIT_BARRIER_STATE;
04806     #if KMP_FAST_REDUCTION_BARRIER
04807     new_thr->th.th_bar[ bs_reduction_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
04808     #endif // KMP_FAST_REDUCTION_BARRIER
04809 
04810     new_thr->th.th_spin_here = FALSE;
04811     new_thr->th.th_next_waiting = 0;
04812 
04813 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
04814     new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
04815     new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
04816     new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
04817     new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
04818 #endif
04819 
04820     TCW_4(new_thr->th.th_in_pool, FALSE);
04821     new_thr->th.th_active_in_pool = FALSE;
04822     TCW_4(new_thr->th.th_active, TRUE);
04823 
04824     /* adjust the global counters */
04825     __kmp_all_nth ++;
04826     __kmp_nth ++;
04827 
04828     //
04829     // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search)
04830     // for low numbers of procs, and method #2 (keyed API call) for higher
04831     // numbers of procs.
04832     //
04833     if ( __kmp_adjust_gtid_mode ) {
04834         if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
04835             if ( TCR_4(__kmp_gtid_mode) != 2) {
04836                 TCW_4(__kmp_gtid_mode, 2);
04837             }
04838         }
04839         else {
04840             if (TCR_4(__kmp_gtid_mode) != 1 ) {
04841                 TCW_4(__kmp_gtid_mode, 1);
04842             }
04843         }
04844     }
04845 
04846 #ifdef KMP_ADJUST_BLOCKTIME
04847     /* Adjust blocktime back to zero if necessary       */
04848     /* Middle initialization might not have ocurred yet */
04849     if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
04850         if ( __kmp_nth > __kmp_avail_proc ) {
04851             __kmp_zero_bt = TRUE;
04852         }
04853     }
04854 #endif /* KMP_ADJUST_BLOCKTIME */
04855 
04856     /* actually fork it and create the new worker thread */
04857     KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
04858     __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
04859     KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
04860 
04861 
04862     KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
04863     KMP_MB();
04864     return new_thr;
04865 }
04866 
04867 /*
04868  * reinitialize team for reuse.
04869  *
04870  * The hot team code calls this case at every fork barrier, so EPCC barrier
04871  * test are extremely sensitive to changes in it, esp. writes to the team
04872  * struct, which cause a cache invalidation in all threads.
04873  *
04874  * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!!
04875  */
04876 static void
04877 __kmp_reinitialize_team(
04878     kmp_team_t *  team,
04879     int           new_nproc,
04880     #if OMP_30_ENABLED
04881         kmp_internal_control_t * new_icvs,
04882         ident_t *                loc
04883     #else
04884         int new_set_nproc, int new_set_dynamic, int new_set_nested,
04885         int new_set_blocktime, int new_bt_intervals, int new_bt_set
04886     #endif // OMP_30_ENABLED
04887 ) {
04888     int f;
04889     #if OMP_30_ENABLED
04890         KMP_DEBUG_ASSERT( team && new_nproc && new_icvs );
04891         KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
04892         team->t.t_ident = loc;
04893     #else
04894         KMP_DEBUG_ASSERT( team && new_nproc && new_set_nproc );
04895     #endif // OMP_30_ENABLED
04896 
04897     team->t.t_id = KMP_GEN_TEAM_ID();
04898 
04899 #if KMP_BARRIER_ICV_PULL
04900     //
04901     // Copy the ICV's to the team structure, where all of the worker threads
04902     // can access them and make their own copies after the barrier.
04903     //
04904     copy_icvs( &team->t.t_initial_icvs, new_icvs );
04905 
04906     //
04907     // Set up the master thread's copy of the ICV's.  __kmp_fork_call()
04908     // assumes they are already set in the master thread.
04909     // FIXME - change that code to use the team->t.t_initial_icvs copy
04910     // and eliminate this copy.
04911     //
04912     __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
04913     copy_icvs( &team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs );
04914     KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
04915                     0, team->t.t_threads[0], team ) );
04916 
04917 #elif KMP_BARRIER_ICV_PUSH
04918     //
04919     // Set the ICV's in the master thread only.
04920     // They will be propagated by the fork barrier.
04921     //
04922     __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
04923     copy_icvs( &team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs );
04924     KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
04925                     0, team->t.t_threads[0], team ) );
04926 
04927 #else
04928     //
04929     // Copy the icvs to each of the threads.  This takes O(nthreads) time.
04930     //
04931     for( f=0 ; f<new_nproc ; f++) {
04932 # if OMP_30_ENABLED
04933         // TODO: GEH - pass in better source location info since usually NULL here
04934         KF_TRACE( 10, ( "__kmp_reinitialize_team1: T#%d this_thread=%p team=%p\n",
04935                         f, team->t.t_threads[f], team ) );
04936         __kmp_init_implicit_task( loc, team->t.t_threads[f], team, f, FALSE );
04937         copy_icvs( &team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs );
04938         KF_TRACE( 10, ( "__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
04939                         f, team->t.t_threads[f], team ) );
04940 # else
04941         team -> t.t_set_nproc[f]   = new_set_nproc;
04942         team -> t.t_set_dynamic[f] = new_set_dynamic;
04943         team -> t.t_set_nested[f]  = new_set_nested;
04944         team -> t.t_set_blocktime[f]   = new_set_blocktime;
04945         team -> t.t_set_bt_intervals[f] = new_bt_intervals;
04946         team -> t.t_set_bt_set[f]  = new_bt_set;
04947 # endif // OMP_30_ENABLED
04948     }
04949 
04950 #endif // KMP_BARRIER_ICV_PUSH || KMP_BARRIER_ICV_PULL
04951 
04952 }
04953 
04954 /* initialize the team data structure
04955  * this assumes the t_threads and t_max_nproc are already set
04956  * also, we don't touch the arguments */
04957 static void
04958 __kmp_initialize_team(
04959     kmp_team_t * team,
04960     int          new_nproc,
04961     #if OMP_30_ENABLED
04962         kmp_internal_control_t * new_icvs,
04963         ident_t *                loc
04964     #else
04965         int new_set_nproc, int new_set_dynamic, int new_set_nested,
04966         int new_set_blocktime, int new_bt_intervals, int new_bt_set
04967     #endif // OMP_30_ENABLED
04968 ) {
04969     /* verify */
04970     KMP_DEBUG_ASSERT( team );
04971     KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
04972     KMP_DEBUG_ASSERT( team->t.t_threads );
04973     KMP_MB();
04974 
04975     team -> t.t_master_tid  = 0;    /* not needed */
04976     /* team -> t.t_master_bar;        not needed */
04977     team -> t.t_serialized  = 0;
04978     team -> t.t_nproc       = new_nproc;
04979 
04980     /* team -> t.t_parent     = NULL; TODO not needed & would mess up hot team */
04981     team -> t.t_next_pool   = NULL;
04982     /* memset( team -> t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */
04983 
04984     TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */
04985     team -> t.t_invoke      = NULL; /* not needed */
04986 
04987 #if OMP_30_ENABLED
04988     // TODO???: team -> t.t_max_active_levels       = new_max_active_levels;
04989     team -> t.t_sched       = new_icvs->sched;
04990 #endif // OMP_30_ENABLED
04991 
04992 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
04993     team -> t.t_fp_control_saved = FALSE; /* not needed */
04994     team -> t.t_x87_fpu_control_word = 0; /* not needed */
04995     team -> t.t_mxcsr = 0;                /* not needed */
04996 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
04997 
04998     team -> t.t_construct   = 0;
04999     __kmp_init_lock( & team -> t.t_single_lock );
05000 
05001     team -> t.t_ordered .dt.t_value = 0;
05002     team -> t.t_master_active = FALSE;
05003 
05004     memset( & team -> t.t_taskq, '\0', sizeof( kmp_taskq_t ));
05005 
05006 #ifdef KMP_DEBUG
05007     team -> t.t_copypriv_data = NULL;  /* not necessary, but nice for debugging */
05008 #endif
05009     team -> t.t_copyin_counter = 0;    /* for barrier-free copyin implementation */
05010 
05011     team -> t.t_control_stack_top = NULL;
05012 
05013     __kmp_reinitialize_team(
05014         team, new_nproc,
05015         #if OMP_30_ENABLED
05016             new_icvs,
05017             loc
05018         #else
05019             new_set_nproc, new_set_dynamic, new_set_nested,
05020             new_set_blocktime, new_bt_intervals, new_bt_set
05021         #endif // OMP_30_ENABLED
05022     );
05023 
05024     KMP_MB();
05025 }
05026 
05027 #if KMP_OS_LINUX
05028 /* Sets full mask for thread and returns old mask, no changes to structures. */
05029 static void
05030 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
05031 {
05032     if ( KMP_AFFINITY_CAPABLE() ) {
05033         int status;
05034         if ( old_mask != NULL ) {
05035             status = __kmp_get_system_affinity( old_mask, TRUE );
05036             int error = errno;
05037             if ( status != 0 ) {
05038                 __kmp_msg(
05039                     kmp_ms_fatal,
05040                     KMP_MSG( ChangeThreadAffMaskError ),
05041                     KMP_ERR( error ),
05042                     __kmp_msg_null
05043                 );
05044             }
05045         }
05046         __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE );
05047     }
05048 }
05049 #endif
05050 
05051 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
05052 
05053 //
05054 // __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
05055 // It calculats the worker + master thread's partition based upon the parent
05056 // thread's partition, and binds each worker to a thread in thier partition.
05057 // The master thread's partition should already include its current binding.
05058 //
05059 static void
05060 __kmp_partition_places( kmp_team_t *team )
05061 {
05062     //
05063     // Copy the master thread's place partion to the team struct
05064     //
05065     kmp_info_t *master_th = team->t.t_threads[0];
05066     KMP_DEBUG_ASSERT( master_th != NULL );
05067     kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
05068     int first_place = master_th->th.th_first_place;
05069     int last_place = master_th->th.th_last_place;
05070     int masters_place = master_th->th.th_current_place;
05071     team->t.t_first_place = first_place;
05072     team->t.t_last_place = last_place;
05073 
05074     KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
05075        proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
05076        masters_place, first_place, last_place ) );
05077 
05078     switch ( proc_bind ) {
05079 
05080         case proc_bind_default:
05081         //
05082         // serial teams might have the proc_bind policy set to
05083         // proc_bind_default.  It doesn't matter, as we don't
05084         // rebind the master thread for any proc_bind policy.
05085         //
05086         KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
05087         break;
05088 
05089         case proc_bind_master:
05090         {
05091             int f;
05092             int n_th = team->t.t_nproc;
05093             for ( f = 1; f < n_th; f++ ) {
05094                 kmp_info_t *th = team->t.t_threads[f];
05095                 KMP_DEBUG_ASSERT( th != NULL );
05096                 th->th.th_first_place = first_place;
05097                 th->th.th_last_place = last_place;
05098                 th->th.th_new_place = masters_place;
05099 
05100                 KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
05101                   __kmp_gtid_from_thread( team->t.t_threads[f] ),
05102                   team->t.t_id, f, masters_place, first_place, last_place ) );
05103             }
05104         }
05105         break;
05106 
05107         case proc_bind_close:
05108         {
05109             int f;
05110             int n_th = team->t.t_nproc;
05111             int n_places;
05112             if ( first_place <= last_place ) {
05113                 n_places = last_place - first_place + 1;
05114             }
05115             else {
05116                 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
05117             }
05118             if ( n_th <= n_places ) {
05119                 int place = masters_place;
05120                 for ( f = 1; f < n_th; f++ ) {
05121                     kmp_info_t *th = team->t.t_threads[f];
05122                     KMP_DEBUG_ASSERT( th != NULL );
05123 
05124                     if ( place == last_place ) {
05125                         place = first_place;
05126                     }
05127                     else if ( place == __kmp_affinity_num_masks - 1) {
05128                         place = 0;
05129                     }
05130                     else {
05131                         place++;
05132                     }
05133                     th->th.th_first_place = first_place;
05134                     th->th.th_last_place = last_place;
05135                     th->th.th_new_place = place;
05136 
05137                     KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
05138                        __kmp_gtid_from_thread( team->t.t_threads[f] ),
05139                        team->t.t_id, f, place, first_place, last_place ) );
05140                 }
05141             }
05142             else {
05143                 int S, rem, gap, s_count;
05144                 S = n_th / n_places;
05145                 s_count = 0;
05146                 rem = n_th - ( S * n_places );
05147                 gap = rem > 0 ? n_places/rem : n_places;
05148                 int place = masters_place;
05149                 int gap_ct = gap;
05150                 for ( f = 0; f < n_th; f++ ) {
05151                     kmp_info_t *th = team->t.t_threads[f];
05152                     KMP_DEBUG_ASSERT( th != NULL );
05153 
05154                     th->th.th_first_place = first_place;
05155                     th->th.th_last_place = last_place;
05156                     th->th.th_new_place = place;
05157                     s_count++;
05158 
05159                     if ( (s_count == S) && rem && (gap_ct == gap) ) {
05160                         // do nothing, add an extra thread to place on next iteration
05161                     }
05162                     else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
05163                         // we added an extra thread to this place; move to next place
05164                         if ( place == last_place ) {
05165                             place = first_place;
05166                         }
05167                         else if ( place == __kmp_affinity_num_masks - 1) {
05168                             place = 0;
05169                         }
05170                         else {
05171                             place++;
05172                         }
05173                         s_count = 0;
05174                         gap_ct = 1;
05175                         rem--;
05176                     }
05177                     else if (s_count == S) { // place full; don't add extra
05178                         if ( place == last_place ) {
05179                             place = first_place;
05180                         }
05181                         else if ( place == __kmp_affinity_num_masks - 1) {
05182                             place = 0;
05183                         }
05184                         else {
05185                             place++;
05186                         }
05187                         gap_ct++;
05188                         s_count = 0;
05189                     }
05190 
05191                     KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
05192                       __kmp_gtid_from_thread( team->t.t_threads[f] ),
05193                       team->t.t_id, f, th->th.th_new_place, first_place,
05194                       last_place ) );
05195                 }
05196                 KMP_DEBUG_ASSERT( place == masters_place );
05197             }
05198         }
05199         break;
05200 
05201         case proc_bind_spread:
05202         {
05203             int f;
05204             int n_th = team->t.t_nproc;
05205             int n_places;
05206             if ( first_place <= last_place ) {
05207                 n_places = last_place - first_place + 1;
05208             }
05209             else {
05210                 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
05211             }
05212             if ( n_th <= n_places ) {
05213                 int place = masters_place;
05214                 int S = n_places/n_th;
05215                 int s_count, rem, gap, gap_ct;
05216                 rem = n_places - n_th*S;
05217                 gap = rem ? n_th/rem : 1;
05218                 gap_ct = gap;
05219                 for ( f = 0; f < n_th; f++ ) {
05220                     kmp_info_t *th = team->t.t_threads[f];
05221                     KMP_DEBUG_ASSERT( th != NULL );
05222 
05223                     th->th.th_first_place = place;
05224                     th->th.th_new_place = place;
05225                     s_count = 1;
05226                     while (s_count < S) {
05227                         if ( place == last_place ) {
05228                             place = first_place;
05229                         }
05230                         else if ( place == __kmp_affinity_num_masks - 1) {
05231                             place = 0;
05232                         }
05233                         else {
05234                             place++;
05235                         }
05236                         s_count++;
05237                     }
05238                     if (rem && (gap_ct == gap)) {
05239                         if ( place == last_place ) {
05240                             place = first_place;
05241                         }
05242                         else if ( place == __kmp_affinity_num_masks - 1) {
05243                             place = 0;
05244                         }
05245                         else {
05246                             place++;
05247                         }
05248                         rem--;
05249                         gap_ct = 0;
05250                     }
05251                     th->th.th_last_place = place;
05252                     gap_ct++;
05253 
05254                     if ( place == last_place ) {
05255                         place = first_place;
05256                     }
05257                     else if ( place == __kmp_affinity_num_masks - 1) {
05258                         place = 0;
05259                     }
05260                     else {
05261                         place++;
05262                     }
05263 
05264                     KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
05265                       __kmp_gtid_from_thread( team->t.t_threads[f] ),
05266                       team->t.t_id, f, th->th.th_new_place,
05267                       th->th.th_first_place, th->th.th_last_place ) );
05268                 }
05269                 KMP_DEBUG_ASSERT( place == masters_place );
05270             }
05271             else {
05272                 int S, rem, gap, s_count;
05273                 S = n_th / n_places;
05274                 s_count = 0;
05275                 rem = n_th - ( S * n_places );
05276                 gap = rem > 0 ? n_places/rem : n_places;
05277                 int place = masters_place;
05278                 int gap_ct = gap;
05279                 for ( f = 0; f < n_th; f++ ) {
05280                     kmp_info_t *th = team->t.t_threads[f];
05281                     KMP_DEBUG_ASSERT( th != NULL );
05282 
05283                     th->th.th_first_place = place;
05284                     th->th.th_last_place = place;
05285                     th->th.th_new_place = place;
05286                     s_count++;
05287 
05288                     if ( (s_count == S) && rem && (gap_ct == gap) ) {
05289                         // do nothing, add an extra thread to place on next iteration
05290                     }
05291                     else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
05292                         // we added an extra thread to this place; move on to next place
05293                         if ( place == last_place ) {
05294                             place = first_place;
05295                         }
05296                         else if ( place == __kmp_affinity_num_masks - 1) {
05297                             place = 0;
05298                         }
05299                         else {
05300                             place++;
05301                         }
05302                         s_count = 0;
05303                         gap_ct = 1;
05304                         rem--;
05305                     }
05306                     else if (s_count == S) { // place is full; don't add extra thread
05307                         if ( place == last_place ) {
05308                             place = first_place;
05309                         }
05310                         else if ( place == __kmp_affinity_num_masks - 1) {
05311                             place = 0;
05312                         }
05313                         else {
05314                             place++;
05315                         }
05316                         gap_ct++;
05317                         s_count = 0;
05318                     }
05319 
05320                     KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
05321                        __kmp_gtid_from_thread( team->t.t_threads[f] ),
05322                        team->t.t_id, f, th->th.th_new_place,
05323                        th->th.th_first_place, th->th.th_last_place) );
05324                 }
05325                 KMP_DEBUG_ASSERT( place == masters_place );
05326             }
05327         }
05328         break;
05329 
05330         default:
05331         break;
05332     }
05333 
05334     KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
05335 }
05336 
05337 #endif /* OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX) */
05338 
05339 /* allocate a new team data structure to use.  take one off of the free pool if available */
05340 kmp_team_t *
05341 __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
05342 #if OMPT_SUPPORT
05343     ompt_parallel_id_t ompt_parallel_id,
05344 #endif
05345 #if OMP_40_ENABLED
05346     kmp_proc_bind_t new_proc_bind,
05347 #endif
05348 #if OMP_30_ENABLED
05349     kmp_internal_control_t *new_icvs,
05350 #else
05351     int new_set_nproc, int new_set_dynamic, int new_set_nested,
05352     int new_set_blocktime, int new_bt_intervals, int new_bt_set,
05353 #endif
05354     int argc )
05355 {
05356     int f;
05357     kmp_team_t *team;
05358     char *ptr;
05359     size_t size;
05360 
05361     KA_TRACE( 20, ("__kmp_allocate_team: called\n"));
05362     KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
05363     KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
05364     KMP_MB();
05365 
05366     //
05367     // optimization to use a "hot" team for the top level,
05368     // as it is usually the same
05369     //
05370     if ( ! root->r.r_active  &&  new_nproc > 1 ) {
05371 
05372         KMP_DEBUG_ASSERT( new_nproc == max_nproc );
05373 
05374         team =  root -> r.r_hot_team;
05375 
05376 #if OMP_30_ENABLED && KMP_DEBUG
05377         if ( __kmp_tasking_mode != tskm_immediate_exec ) {
05378             KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team = %p before reinit\n",
05379                            team -> t.t_task_team ));
05380         }
05381 #endif
05382 
05383         /* has the number of threads changed? */
05384         if( team -> t.t_nproc > new_nproc ) {
05385             KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
05386 
05387 #if KMP_MIC
05388             team -> t.t_size_changed = 1;
05389 #endif
05390 #if OMP_30_ENABLED
05391             if ( __kmp_tasking_mode != tskm_immediate_exec ) {
05392                 kmp_task_team_t *task_team = team->t.t_task_team;
05393                 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
05394                     //
05395                     // Signal the worker threads (esp. the extra ones) to stop
05396                     // looking for tasks while spin waiting.  The task teams
05397                     // are reference counted and will be deallocated by the
05398                     // last worker thread.
05399                     //
05400                     KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
05401                     TCW_SYNC_4( task_team->tt.tt_active, FALSE );
05402                     KMP_MB();
05403 
05404                     KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
05405                       &team->t.t_task_team ) );
05406                       team->t.t_task_team = NULL;
05407                 }
05408                 else {
05409                     KMP_DEBUG_ASSERT( task_team == NULL );
05410                 }
05411             }
05412 #endif // OMP_30_ENABLED
05413 
05414             /* release the extra threads we don't need any more */
05415             for( f = new_nproc  ;  f < team->t.t_nproc  ;  f++ ) {
05416                 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
05417                 __kmp_free_thread( team->t.t_threads[ f ] );
05418                 team -> t.t_threads[ f ] =  NULL;
05419             }
05420 
05421             team -> t.t_nproc =  new_nproc;
05422 #if OMP_30_ENABLED
05423             // TODO???: team -> t.t_max_active_levels = new_max_active_levels;
05424             team -> t.t_sched =  new_icvs->sched;
05425 #endif
05426             __kmp_reinitialize_team( team, new_nproc,
05427 #if OMP_30_ENABLED
05428               new_icvs,
05429               root->r.r_uber_thread->th.th_ident
05430 #else
05431               new_set_nproc, new_set_dynamic, new_set_nested,
05432               new_set_blocktime, new_bt_intervals, new_bt_set
05433 #endif
05434             );
05435 
05436 #if OMP_30_ENABLED
05437             if ( __kmp_tasking_mode != tskm_immediate_exec ) {
05438                 kmp_task_team_t *task_team = team->t.t_task_team;
05439                 if ( task_team != NULL ) {
05440                     KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
05441                     task_team->tt.tt_nproc = new_nproc;
05442                     task_team->tt.tt_unfinished_threads = new_nproc;
05443                     task_team->tt.tt_ref_ct = new_nproc - 1;
05444                 }
05445             }
05446 #endif
05447 
05448             /* update the remaining threads */
05449             for( f = 0  ;  f < new_nproc  ;  f++ ) {
05450                 team -> t.t_threads[ f ] -> th.th_team_nproc = team->t.t_nproc;
05451             }
05452 
05453 #if OMP_30_ENABLED
05454             // restore the current task state of the master thread: should be the implicit task
05455             KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
05456                        0, team->t.t_threads[0], team ) );
05457 
05458             __kmp_push_current_task_to_thread( team -> t.t_threads[ 0 ], team, 0 );
05459 #endif
05460 
05461 #ifdef KMP_DEBUG
05462             for ( f = 0; f < team->t.t_nproc; f++ ) {
05463                 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
05464                     team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
05465             }
05466 #endif
05467 
05468 #if OMP_40_ENABLED
05469             team->t.t_proc_bind = new_proc_bind;
05470 # if KMP_OS_WINDOWS || KMP_OS_LINUX
05471             __kmp_partition_places( team );
05472 # endif
05473 #endif
05474 
05475         }
05476         else if ( team -> t.t_nproc < new_nproc ) {
05477 #if KMP_OS_LINUX
05478             kmp_affin_mask_t *old_mask;
05479             if ( KMP_AFFINITY_CAPABLE() ) {
05480                 KMP_CPU_ALLOC(old_mask);
05481             }
05482 #endif
05483 
05484             KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
05485 
05486 #if KMP_MIC
05487             team -> t.t_size_changed = 1;
05488 #endif
05489 
05490 
05491             if(team -> t.t_max_nproc < new_nproc) {
05492                 /* reallocate larger arrays */
05493                 __kmp_reallocate_team_arrays(team, new_nproc);
05494                 __kmp_reinitialize_team( team, new_nproc,
05495 #if OMP_30_ENABLED
05496                   new_icvs,
05497                   NULL  // TODO: !!!
05498 #else
05499                   new_set_nproc, new_set_dynamic, new_set_nested,
05500                   new_set_blocktime, new_bt_intervals, new_bt_set
05501 #endif
05502                 );
05503             }
05504 
05505 #if KMP_OS_LINUX
05506             /* Temporarily set full mask for master thread before
05507                creation of workers. The reason is that workers inherit
05508                the affinity from master, so if a lot of workers are
05509                created on the single core quickly, they don't get
05510                a chance to set their own affinity for a long time.
05511             */
05512             __kmp_set_thread_affinity_mask_full_tmp( old_mask );
05513 #endif
05514 
05515             /* allocate new threads for the hot team */
05516             for( f = team->t.t_nproc  ;  f < new_nproc  ;  f++ ) {
05517                 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
05518                 KMP_DEBUG_ASSERT( new_worker );
05519                 team->t.t_threads[ f ] = new_worker;
05520                 new_worker->th.th_team_nproc = team->t.t_nproc;
05521 
05522                 KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%u, plain=%u\n",
05523                                 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
05524                                 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
05525                                 team->t.t_bar[bs_plain_barrier].b_arrived ) );
05526 
05527                 { // Initialize barrier data for new threads.
05528                     int b;
05529                     kmp_balign_t * balign = new_worker->th.th_bar;
05530                     for ( b = 0; b < bp_last_bar; ++ b ) {
05531                         balign[ b ].bb.b_arrived        = team->t.t_bar[ b ].b_arrived;
05532                     }
05533                 }
05534             }
05535 
05536 #if KMP_OS_LINUX
05537             if ( KMP_AFFINITY_CAPABLE() ) {
05538                 /* Restore initial master thread's affinity mask */
05539                 __kmp_set_system_affinity( old_mask, TRUE );
05540                 KMP_CPU_FREE(old_mask);
05541             }
05542 #endif
05543 
05544             /* make sure everyone is syncronized */
05545             __kmp_initialize_team( team, new_nproc,
05546 #if OMP_30_ENABLED
05547               new_icvs,
05548               root->r.r_uber_thread->th.th_ident
05549 #else
05550               new_set_nproc, new_set_dynamic, new_set_nested,
05551               new_set_blocktime, new_bt_intervals, new_bt_set
05552 #endif
05553             );
05554 
05555 #if OMP_30_ENABLED
05556             if ( __kmp_tasking_mode != tskm_immediate_exec ) {
05557                 kmp_task_team_t *task_team = team->t.t_task_team;
05558                 if ( task_team != NULL ) {
05559                     KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
05560                     task_team->tt.tt_nproc = new_nproc;
05561                     task_team->tt.tt_unfinished_threads = new_nproc;
05562                     task_team->tt.tt_ref_ct = new_nproc - 1;
05563                 }
05564             }
05565 #endif
05566 
05567             /* reinitialize the old threads */
05568             for( f = 0  ;  f < team->t.t_nproc  ;  f++ )
05569                 __kmp_initialize_info( team->t.t_threads[ f ], team, f,
05570                                        __kmp_gtid_from_tid( f, team ) );
05571 #ifdef KMP_DEBUG
05572             for ( f = 0; f < team->t.t_nproc; ++ f ) {
05573                 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
05574                     team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
05575             }
05576 #endif
05577 
05578 #if OMP_40_ENABLED
05579             team->t.t_proc_bind = new_proc_bind;
05580 # if KMP_OS_WINDOWS || KMP_OS_LINUX
05581             __kmp_partition_places( team );
05582 # endif
05583 #endif
05584 
05585          }
05586          else {
05587             KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" ));
05588 #if KMP_MIC
05589             // This case can mean that omp_set_num_threads() was called and the hot team size
05590             // was already reduced, so we check the special flag
05591             if ( team -> t.t_size_changed == -1 ) {
05592                 team -> t.t_size_changed = 1;
05593             } else {
05594                 team -> t.t_size_changed = 0;
05595             }
05596 #endif
05597 
05598 #if OMP_30_ENABLED
05599             // TODO???: team -> t.t_max_active_levels = new_max_active_levels;
05600             team -> t.t_sched =  new_icvs->sched;
05601 #endif
05602 
05603             __kmp_reinitialize_team( team, new_nproc,
05604 #if OMP_30_ENABLED
05605               new_icvs,
05606               root->r.r_uber_thread->th.th_ident
05607 #else
05608               new_set_nproc, new_set_dynamic, new_set_nested,
05609               new_set_blocktime, new_bt_intervals, new_bt_set
05610 #endif
05611             );
05612 
05613 #if OMP_30_ENABLED
05614             KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
05615                            0, team->t.t_threads[0], team ) );
05616             __kmp_push_current_task_to_thread( team -> t.t_threads[ 0 ], team, 0 );
05617 #endif
05618 
05619 #if OMP_40_ENABLED
05620 # if (KMP_OS_WINDOWS || KMP_OS_LINUX)
05621             if ( team->t.t_proc_bind == new_proc_bind ) {
05622                 KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
05623                   team->t.t_id, new_proc_bind, team->t.t_first_place,
05624                   team->t.t_last_place ) );
05625             }
05626             else {
05627                 team->t.t_proc_bind = new_proc_bind;
05628                 __kmp_partition_places( team );
05629             }
05630 # else
05631             if ( team->t.t_proc_bind != new_proc_bind ) {
05632                 team->t.t_proc_bind = new_proc_bind;
05633             }
05634 # endif /* (KMP_OS_WINDOWS || KMP_OS_LINUX) */
05635 #endif /* OMP_40_ENABLED */
05636         }
05637 
05638         /* reallocate space for arguments if necessary */
05639         __kmp_alloc_argv_entries( argc, team, TRUE );
05640         team -> t.t_argc     = argc;
05641         //
05642         // The hot team re-uses the previous task team,
05643         // if untouched during the previous release->gather phase.
05644         //
05645 
05646         KF_TRACE( 10, ( " hot_team = %p\n", team ) );
05647 
05648 #if OMP_30_ENABLED && KMP_DEBUG
05649         if ( __kmp_tasking_mode != tskm_immediate_exec ) {
05650             KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team = %p after reinit\n",
05651               team -> t.t_task_team ));
05652         }
05653 #endif
05654 
05655 #if OMPT_SUPPORT
05656         __ompt_team_assign_id(team, ompt_parallel_id);
05657 #endif
05658 
05659         KMP_MB();
05660 
05661         return team;
05662     }
05663 
05664     /* next, let's try to take one from the team pool */
05665     KMP_MB();
05666     for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
05667     {
05668         /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */
05669         if ( team->t.t_max_nproc >= max_nproc ) {
05670             /* take this team from the team pool */
05671             __kmp_team_pool = team->t.t_next_pool;
05672 
05673             /* setup the team for fresh use */
05674             __kmp_initialize_team( team, new_nproc,
05675 #if OMP_30_ENABLED
05676               new_icvs,
05677               NULL // TODO: !!!
05678 #else
05679               new_set_nproc, new_set_dynamic, new_set_nested,
05680               new_set_blocktime, new_bt_intervals, new_bt_set
05681 #endif
05682             );
05683 
05684 #if OMP_30_ENABLED
05685             KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
05686                             &team->t.t_task_team ) );
05687             team -> t.t_task_team = NULL;
05688 #endif
05689 
05690             /* reallocate space for arguments if necessary */
05691             __kmp_alloc_argv_entries( argc, team, TRUE );
05692             team -> t.t_argc     = argc;
05693 
05694             KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
05695                             team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
05696             { // Initialize barrier data.
05697                 int b;
05698                 for ( b = 0; b < bs_last_barrier; ++ b) {
05699                     team->t.t_bar[ b ].b_arrived        = KMP_INIT_BARRIER_STATE;
05700                 }
05701             }
05702 
05703 #if OMP_40_ENABLED
05704             team->t.t_proc_bind = new_proc_bind;
05705 #endif
05706 
05707             KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
05708 
05709 #if OMPT_SUPPORT
05710         __ompt_team_assign_id(team, ompt_parallel_id);
05711 #endif
05712 
05713             KMP_MB();
05714 
05715             return team;
05716         }
05717 
05718         /* reap team if it is too small, then loop back and check the next one */
05719         /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */
05720         /* TODO: Use technique to find the right size hot-team, don't reap them */
05721         team =  __kmp_reap_team( team );
05722         __kmp_team_pool = team;
05723     }
05724 
05725     /* nothing available in the pool, no matter, make a new team! */
05726     KMP_MB();
05727     team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) );
05728 
05729     /* and set it up */
05730     team -> t.t_max_nproc   = max_nproc;
05731     /* NOTE well, for some reason allocating one big buffer and dividing it
05732      * up seems to really hurt performance a lot on the P4, so, let's not use
05733      * this... */
05734     __kmp_allocate_team_arrays( team, max_nproc );
05735     __kmp_initialize_team( team, new_nproc,
05736 #if OMP_30_ENABLED
05737       new_icvs,
05738       NULL // TODO: !!!
05739 #else
05740       new_set_nproc, new_set_dynamic, new_set_nested,
05741       new_set_blocktime, new_bt_intervals, new_bt_set
05742 #endif
05743     );
05744 
05745 #if OMP_30_ENABLED
05746     KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team %p to NULL\n",
05747                     &team->t.t_task_team ) );
05748     team -> t.t_task_team = NULL;    // to be removed, as __kmp_allocate zeroes memory, no need to duplicate
05749 #endif
05750 
05751     if ( __kmp_storage_map ) {
05752         __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc );
05753     }
05754 
05755     /* allocate space for arguments */
05756     __kmp_alloc_argv_entries( argc, team, FALSE );
05757     team -> t.t_argc        = argc;
05758 
05759     KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
05760                     team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
05761     { // Initialize barrier data.
05762         int b;
05763         for ( b = 0; b < bs_last_barrier; ++ b ) {
05764             team->t.t_bar[ b ].b_arrived        = KMP_INIT_BARRIER_STATE;
05765         }
05766     }
05767 
05768 #if OMP_40_ENABLED
05769     team->t.t_proc_bind = new_proc_bind;
05770 #endif
05771 
05772 #if OMPT_SUPPORT
05773     __ompt_team_assign_id(team, ompt_parallel_id);
05774 #endif
05775 
05776     KMP_MB();
05777 
05778     KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
05779 
05780     return team;
05781 }
05782 
05783 /* TODO implement hot-teams at all levels */
05784 /* TODO implement lazy thread release on demand (disband request) */
05785 
05786 /* free the team.  return it to the team pool.  release all the threads
05787  * associated with it */
05788 void
05789 __kmp_free_team( kmp_root_t *root, kmp_team_t *team )
05790 {
05791     int f;
05792     KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
05793 
05794     /* verify state */
05795     KMP_DEBUG_ASSERT( root );
05796     KMP_DEBUG_ASSERT( team );
05797     KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
05798     KMP_DEBUG_ASSERT( team->t.t_threads );
05799 
05800     /* team is done working */
05801     TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library.
05802     team -> t.t_copyin_counter = 0; // init counter for possible reuse
05803     // Do not reset pointer to parent team to NULL for hot teams.
05804 
05805     /* if we are a nested team, release our threads */
05806     if( team != root->r.r_hot_team ) {
05807 
05808 #if OMP_30_ENABLED
05809         if ( __kmp_tasking_mode != tskm_immediate_exec ) {
05810             kmp_task_team_t *task_team = team->t.t_task_team;
05811             if ( task_team != NULL ) {
05812                 //
05813                 // Signal the worker threads to stop looking for tasks while
05814                 // spin waiting.  The task teams are reference counted and will
05815                 // be deallocated by the last worker thread via the thread's
05816                 // pointer to the task team.
05817                 //
05818                 KA_TRACE( 20, ( "__kmp_free_team: deactivating task_team %p\n",
05819                                 task_team ) );
05820                 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
05821                 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
05822                 KMP_MB();
05823                 team->t.t_task_team = NULL;
05824             }
05825         }
05826 #endif /* OMP_30_ENABLED */
05827 
05828         // Reset pointer to parent team only for non-hot teams.
05829         team -> t.t_parent = NULL;
05830 
05831 
05832         /* free the worker threads */
05833         for ( f = 1; f < team->t.t_nproc; ++ f ) {
05834             KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
05835             __kmp_free_thread( team->t.t_threads[ f ] );
05836             team->t.t_threads[ f ] = NULL;
05837         }
05838 
05839 
05840         /* put the team back in the team pool */
05841         /* TODO limit size of team pool, call reap_team if pool too large */
05842         team -> t.t_next_pool  = (kmp_team_t*) __kmp_team_pool;
05843         __kmp_team_pool        = (volatile kmp_team_t*) team;
05844     }
05845 
05846     KMP_MB();
05847 }
05848 
05849 
05850 /* reap the team.  destroy it, reclaim all its resources and free its memory */
05851 kmp_team_t *
05852 __kmp_reap_team( kmp_team_t *team )
05853 {
05854     kmp_team_t *next_pool = team -> t.t_next_pool;
05855 
05856     KMP_DEBUG_ASSERT( team );
05857     KMP_DEBUG_ASSERT( team -> t.t_dispatch    );
05858     KMP_DEBUG_ASSERT( team -> t.t_disp_buffer );
05859     KMP_DEBUG_ASSERT( team -> t.t_threads     );
05860     #if OMP_30_ENABLED
05861     #else
05862     KMP_DEBUG_ASSERT( team -> t.t_set_nproc   );
05863     #endif
05864     KMP_DEBUG_ASSERT( team -> t.t_argv        );
05865 
05866     /* TODO clean the threads that are a part of this? */
05867 
05868     /* free stuff */
05869 
05870     __kmp_free_team_arrays( team );
05871 #if (KMP_PERF_V106 == KMP_ON)
05872     if ( team -> t.t_argv != &team -> t.t_inline_argv[0] )
05873         __kmp_free( (void*) team -> t.t_argv );
05874 #else
05875     __kmp_free( (void*) team -> t.t_argv );
05876 #endif
05877     __kmp_free( team );
05878 
05879     KMP_MB();
05880     return next_pool;
05881 }
05882 
05883 //
05884 // Free the thread.  Don't reap it, just place it on the pool of available
05885 // threads.
05886 //
05887 // Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid
05888 // binding for the affinity mechanism to be useful.
05889 //
05890 // Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.
05891 // However, we want to avoid a potential performance problem by always
05892 // scanning through the list to find the correct point at which to insert
05893 // the thread (potential N**2 behavior).  To do this we keep track of the
05894 // last place a thread struct was inserted (__kmp_thread_pool_insert_pt).
05895 // With single-level parallelism, threads will always be added to the tail
05896 // of the list, kept track of by __kmp_thread_pool_insert_pt.  With nested
05897 // parallelism, all bets are off and we may need to scan through the entire
05898 // free list.
05899 //
05900 // This change also has a potentially large performance benefit, for some
05901 // applications.  Previously, as threads were freed from the hot team, they
05902 // would be placed back on the free list in inverse order.  If the hot team
05903 // grew back to it's original size, then the freed thread would be placed
05904 // back on the hot team in reverse order.  This could cause bad cache
05905 // locality problems on programs where the size of the hot team regularly
05906 // grew and shrunk.
05907 //
05908 // Now, for single-level parallelism, the OMP tid is alway == gtid.
05909 //
05910 void
05911 __kmp_free_thread( kmp_info_t *this_th )
05912 {
05913     int gtid;
05914     kmp_info_t **scan;
05915 
05916     KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
05917                 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
05918 
05919     KMP_DEBUG_ASSERT( this_th );
05920 
05921 
05922     /* put thread back on the free pool */
05923     TCW_PTR(this_th->th.th_team, NULL);
05924     TCW_PTR(this_th->th.th_root, NULL);
05925     TCW_PTR(this_th->th.th_dispatch, NULL);               /* NOT NEEDED */
05926 
05927     //
05928     // If the __kmp_thread_pool_insert_pt is already past the new insert
05929     // point, then we need to re-scan the entire list.
05930     //
05931     gtid = this_th->th.th_info.ds.ds_gtid;
05932     if ( __kmp_thread_pool_insert_pt != NULL ) {
05933         KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
05934         if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
05935              __kmp_thread_pool_insert_pt = NULL;
05936         }
05937     }
05938 
05939     //
05940     // Scan down the list to find the place to insert the thread.
05941     // scan is the address of a link in the list, possibly the address of
05942     // __kmp_thread_pool itself.
05943     //
05944     // In the absence of nested parallism, the for loop will have 0 iterations.
05945     //
05946     if ( __kmp_thread_pool_insert_pt != NULL ) {
05947         scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
05948     }
05949     else {
05950         scan = (kmp_info_t **)&__kmp_thread_pool;
05951     }
05952     for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
05953       scan = &( (*scan)->th.th_next_pool ) );
05954 
05955     //
05956     // Insert the new element on the list, and set __kmp_thread_pool_insert_pt
05957     // to its address.
05958     //
05959     TCW_PTR(this_th->th.th_next_pool, *scan);
05960     __kmp_thread_pool_insert_pt = *scan = this_th;
05961     KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
05962       || ( this_th->th.th_info.ds.ds_gtid
05963       < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
05964     TCW_4(this_th->th.th_in_pool, TRUE);
05965     __kmp_thread_pool_nth++;
05966 
05967     TCW_4(__kmp_nth, __kmp_nth - 1);
05968 
05969 #ifdef KMP_ADJUST_BLOCKTIME
05970     /* Adjust blocktime back to user setting or default if necessary */
05971     /* Middle initialization might never have ocurred                */
05972     if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
05973         KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
05974         if ( __kmp_nth <= __kmp_avail_proc ) {
05975             __kmp_zero_bt = FALSE;
05976         }
05977     }
05978 #endif /* KMP_ADJUST_BLOCKTIME */
05979 
05980     KMP_MB();
05981 }
05982 
05983 void
05984 __kmp_join_barrier( int gtid )
05985 {
05986     register kmp_info_t   *this_thr       = __kmp_threads[ gtid ];
05987     register kmp_team_t   *team;
05988     register kmp_uint      count;
05989     register kmp_uint      nproc;
05990     kmp_info_t            *master_thread;
05991     int                    tid;
05992     #ifdef KMP_DEBUG
05993         int                    team_id;
05994     #endif /* KMP_DEBUG */
05995 
05996     KMP_MB();
05997 
05998     /* get current info */
05999     team          = this_thr -> th.th_team;
06000     /*    nproc         = team -> t.t_nproc;*/
06001     nproc         = this_thr -> th.th_team_nproc;
06002     KMP_DEBUG_ASSERT( nproc == team->t.t_nproc );
06003     tid           = __kmp_tid_from_gtid(gtid);
06004     #ifdef KMP_DEBUG
06005         team_id       = team -> t.t_id;
06006     #endif /* KMP_DEBUG */
06007     /*    master_thread = team -> t.t_threads[0];*/
06008     master_thread = this_thr -> th.th_team_master;
06009     #ifdef KMP_DEBUG
06010         if ( master_thread != team->t.t_threads[0] ) {
06011             __kmp_print_structure();
06012         }
06013     #endif /* KMP_DEBUG */
06014     KMP_DEBUG_ASSERT( master_thread == team->t.t_threads[0] );
06015     KMP_MB();
06016 
06017     /* verify state */
06018     KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
06019     KMP_DEBUG_ASSERT( TCR_PTR(this_thr->th.th_team) );
06020     KMP_DEBUG_ASSERT( TCR_PTR(this_thr->th.th_root) );
06021     KMP_DEBUG_ASSERT( this_thr == team -> t.t_threads[tid] );
06022 
06023     KA_TRACE( 10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
06024                    gtid, team_id, tid ));
06025 #if OMPT_SUPPORT
06026     if ((ompt_status == ompt_status_track_callback) &&
06027         ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) {
06028       int  tid = __kmp_tid_from_gtid( gtid );
06029       ompt_callbacks.ompt_callback(ompt_event_barrier_begin)
06030         (team->t.ompt_team_info.parallel_id,
06031      team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
06032     }
06033     this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
06034 #endif
06035 
06036     #if OMP_30_ENABLED
06037         if ( __kmp_tasking_mode == tskm_extra_barrier ) {
06038             __kmp_tasking_barrier( team, this_thr, gtid );
06039 
06040             KA_TRACE( 10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n",
06041                            gtid, team_id, tid ));
06042         }
06043         #ifdef KMP_DEBUG
06044         if ( __kmp_tasking_mode != tskm_immediate_exec ) {
06045             KA_TRACE( 20, ( "__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n",
06046                              __kmp_gtid_from_thread( this_thr ), team_id, team -> t.t_task_team,
06047                              this_thr->th.th_task_team ) );
06048             KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
06049         }
06050         #endif /* KMP_DEBUG */
06051     #endif /* OMP_30_ENABLED */
06052 
06053     //
06054     // Copy the blocktime info to the thread, where __kmp_wait_sleep()
06055     // can access it when the team struct is not guaranteed to exist.
06056     //
06057     // Doing these loads causes a cache miss slows down EPCC parallel by 2x.
06058     // As a workaround, we do not perform the copy if blocktime=infinite,
06059     // since the values are not used by __kmp_wait_sleep() in that case.
06060     //
06061     if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
06062         #if OMP_30_ENABLED
06063             this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
06064             this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
06065         #else
06066             this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
06067             this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
06068         #endif // OMP_30_ENABLED
06069     }
06070 
06071     #if KMP_OS_WINDOWS
06072         // AC: wait here until monitor has started. This is a fix for CQ232808.
06073         //     The reason is that if the library is loaded/unloaded in a loop with small (parallel)
06074         //     work in between, then there is high probability that monitor thread started after
06075         //     the library shutdown. At shutdown it is too late to cope with the problem, because
06076         //     when the master is in DllMain (process detach) the monitor has no chances to start
06077         //     (it is blocked), and master has no means to inform the monitor that the library has gone,
06078         //     because all the memory which the monitor can access is going to be released/reset.
06079         //
06080         //     The moment before barrier_gather sounds appropriate, because master needs to
06081         //     wait for all workers anyway, and we want this to happen as late as possible,
06082         //     but before the shutdown which may happen after the barrier.
06083         if( KMP_MASTER_TID( tid ) && TCR_4(__kmp_init_monitor) < 2 ) {
06084             __kmp_wait_sleep( this_thr, (volatile kmp_uint32*)&__kmp_init_monitor, 2, 0
06085                               );
06086         }
06087     #endif
06088 
06089 
06090     if ( __kmp_barrier_gather_pattern[ bs_forkjoin_barrier ] == bp_linear_bar || __kmp_barrier_gather_branch_bits[ bs_forkjoin_barrier ] == 0 ) {
06091         __kmp_linear_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
06092                                      );
06093     } else if ( __kmp_barrier_gather_pattern[ bs_forkjoin_barrier ] == bp_tree_bar ) {
06094         __kmp_tree_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
06095                                    );
06096     } else {
06097         __kmp_hyper_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
06098                                     );
06099     }; // if
06100 
06101 
06102     //
06103     // From this point on, the team data structure may be deallocated
06104     // at any time by the master thread - it is unsafe to reference it
06105     // in any of the worker threads.
06106     //
06107     // Any per-team data items that need to be referenced before the end
06108     // of the barrier should be moved to the kmp_task_team_t structs.
06109     //
06110 
06111     #if OMP_30_ENABLED
06112         if ( KMP_MASTER_TID( tid ) ) {
06113             if ( __kmp_tasking_mode != tskm_immediate_exec ) {
06114                 // Master shouldn't call decrease_load().         // TODO: enable master threads.
06115                 // Master should have th_may_decrease_load == 0.  // TODO: enable master threads.
06116                 __kmp_task_team_wait( this_thr, team
06117                                       );
06118             }
06119         }
06120     #endif /* OMP_30_ENABLED */
06121 
06122     #if KMP_DEBUG
06123         if( KMP_MASTER_TID( tid )) {
06124             KA_TRACE( 15, ( "__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
06125                             gtid, team_id, tid, nproc ));
06126         }
06127     #endif /* KMP_DEBUG */
06128 
06129     /* TODO now, mark worker threads as done so they may be disbanded */
06130 
06131     KMP_MB();       /* Flush all pending memory write invalidates.  */
06132     KA_TRACE( 10, ("__kmp_join_barrier: T#%d(%d:%d) leaving\n",
06133                    gtid, team_id, tid ));
06134 #if OMPT_SUPPORT
06135    if ((ompt_status == ompt_status_track_callback) &&
06136        ompt_callbacks.ompt_callback(ompt_event_barrier_end)) {
06137      int  tid = __kmp_tid_from_gtid( gtid );
06138      ompt_callbacks.ompt_callback(ompt_event_barrier_end)
06139        (team->t.ompt_team_info.parallel_id,
06140     team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
06141    }
06142    // return to default state
06143    this_thr->th.ompt_thread_info.state = ompt_state_overhead;
06144 #endif
06145 
06146 }
06147 
06148 
06149 /* TODO release worker threads' fork barriers as we are ready instead of all at once */
06150 
06151 void
06152 __kmp_fork_barrier( int gtid, int tid )
06153 {
06154     kmp_info_t *this_thr = __kmp_threads[ gtid ];
06155     kmp_team_t *team     = ( tid == 0 ) ? this_thr -> th.th_team : NULL;
06156 
06157     KA_TRACE( 10, ( "__kmp_fork_barrier: T#%d(%d:%d) has arrived\n",
06158                     gtid, ( team != NULL ) ? team->t.t_id : -1, tid ));
06159 
06160     /* th_team pointer only valid for master thread here */
06161     if ( KMP_MASTER_TID( tid ) ) {
06162 
06163 
06164 #ifdef KMP_DEBUG
06165 
06166         register kmp_info_t **other_threads = team -> t.t_threads;
06167         register int          i;
06168 
06169         /* verify state */
06170         KMP_MB();
06171 
06172         for( i = 1; i < team -> t.t_nproc ; i++ ) {
06173             KA_TRACE( 500, ( "__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork "
06174                              "go == %u.\n",
06175                              gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
06176                              team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
06177                              other_threads[i]->th.th_bar[ bs_forkjoin_barrier ].bb.b_go ) );
06178 
06179             KMP_DEBUG_ASSERT( ( TCR_4( other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go )
06180                                 & ~(KMP_BARRIER_SLEEP_STATE) )
06181                                == KMP_INIT_BARRIER_STATE );
06182             KMP_DEBUG_ASSERT( other_threads[i]->th.th_team == team );
06183 
06184         }
06185 #endif
06186 
06187 #if OMP_30_ENABLED
06188         if ( __kmp_tasking_mode != tskm_immediate_exec ) {
06189             __kmp_task_team_setup( this_thr, team );
06190         }
06191 #endif /* OMP_30_ENABLED */
06192 
06193         //
06194         // The master thread may have changed its blocktime between the
06195         // join barrier and the fork barrier.
06196         //
06197         // Copy the blocktime info to the thread, where __kmp_wait_sleep()
06198         // can access it when the team struct is not guaranteed to exist.
06199         //
06200         // See the note about the corresponding code in __kmp_join_barrier()
06201         // being performance-critical.
06202         //
06203         if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
06204 #if OMP_30_ENABLED
06205             this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
06206             this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
06207 #else
06208             this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
06209             this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
06210 #endif // OMP_30_ENABLED
06211         }
06212     } // master
06213 
06214     if ( __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] == 0 ) {
06215         __kmp_linear_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
06216                                       );
06217     } else if ( __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] == bp_tree_bar ) {
06218         __kmp_tree_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
06219                                     );
06220     } else {
06221         __kmp_hyper_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
06222                                      );
06223     }; // if
06224 
06225     //
06226     // early exit for reaping threads releasing forkjoin barrier
06227     //
06228     if ( TCR_4(__kmp_global.g.g_done) ) {
06229 
06230 #if OMP_30_ENABLED
06231         if ( this_thr->th.th_task_team != NULL ) {
06232             if ( KMP_MASTER_TID( tid ) ) {
06233                 TCW_PTR(this_thr->th.th_task_team, NULL);
06234             }
06235             else {
06236                 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
06237             }
06238         }
06239 #endif /* OMP_30_ENABLED */
06240 
06241         KA_TRACE( 10, ( "__kmp_fork_barrier: T#%d is leaving early\n", gtid ));
06242         return;
06243     }
06244 
06245     //
06246     // We can now assume that a valid team structure has been allocated
06247     // by the master and propagated to all worker threads.
06248     //
06249     // The current thread, however, may not be part of the team, so we can't
06250     // blindly assume that the team pointer is non-null.
06251     //
06252     team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
06253     KMP_DEBUG_ASSERT( team != NULL );
06254     tid = __kmp_tid_from_gtid( gtid );
06255 
06256 #if OMP_30_ENABLED
06257 
06258 # if KMP_BARRIER_ICV_PULL
06259     //
06260     // FIXME - after __kmp_fork_call() is modified to not look at the
06261     // master thread's implicit task ICV's, remove the ! KMP_MASTER_TID
06262     // restriction from this if condition.
06263     //
06264     if (! KMP_MASTER_TID( tid ) ) {
06265         //
06266         // Copy the initial ICV's from the team struct to the implicit task
06267         // for this tid.
06268         //
06269         __kmp_init_implicit_task( team->t.t_ident, team->t.t_threads[tid],
06270           team, tid, FALSE );
06271         copy_icvs( &team->t.t_implicit_task_taskdata[tid].td_icvs,
06272           &team->t.t_initial_icvs );
06273     }
06274 # endif // KMP_BARRIER_ICV_PULL
06275 
06276     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
06277         __kmp_task_team_sync( this_thr, team );
06278     }
06279 
06280 #endif /* OMP_30_ENABLED */
06281 
06282 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
06283     kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
06284     if ( proc_bind == proc_bind_intel ) {
06285 #endif
06286 #if KMP_MIC
06287         //
06288         // Call dynamic affinity settings
06289         //
06290         if( __kmp_affinity_type == affinity_balanced && team->t.t_size_changed ) {
06291             __kmp_balanced_affinity( tid, team->t.t_nproc );
06292         }
06293 #endif
06294 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
06295     }
06296     else if ( ( proc_bind != proc_bind_false )
06297               && ( proc_bind != proc_bind_disabled )) {
06298         if ( this_thr->th.th_new_place == this_thr->th.th_current_place ) {
06299             KA_TRACE( 100, ( "__kmp_fork_barrier: T#%d already in correct place %d\n",
06300                              __kmp_gtid_from_thread( this_thr ), this_thr->th.th_current_place ) );
06301         }
06302         else {
06303             __kmp_affinity_set_place( gtid );
06304         }
06305     }
06306 #endif
06307 
06308     KA_TRACE( 10, ( "__kmp_fork_barrier: T#%d(%d:%d) is leaving\n",
06309       gtid, team->t.t_id, tid ));
06310 }
06311 
06312 
06313 /* ------------------------------------------------------------------------ */
06314 /* ------------------------------------------------------------------------ */
06315 
06316 void *
06317 __kmp_launch_thread( kmp_info_t *this_thr )
06318 {
06319     int                   gtid = this_thr->th.th_info.ds.ds_gtid;
06320 /*    void                 *stack_data;*/
06321     kmp_team_t *(*volatile pteam);
06322 
06323     KMP_MB();
06324     KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) );
06325 
06326     if( __kmp_env_consistency_check ) {
06327         this_thr -> th.th_cons = __kmp_allocate_cons_stack( gtid );  // ATT: Memory leak?
06328     }
06329 
06330 #if OMPT_SUPPORT
06331    if (ompt_status & ompt_status_track) {
06332      this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0);
06333      if ((ompt_status == ompt_status_track_callback) &&
06334          ompt_callbacks.ompt_callback(ompt_event_thread_create)) {
06335        ompt_callbacks.ompt_callback(ompt_event_thread_create)();
06336      }
06337    }
06338 #endif
06339 
06340     /* This is the place where threads wait for work */
06341     while( ! TCR_4(__kmp_global.g.g_done) ) {
06342         KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
06343         KMP_MB();
06344 
06345         /* wait for work to do */
06346         KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid ));
06347     
06348 
06349 #if OMPT_SUPPORT
06350     if (ompt_status & ompt_status_track) {
06351       this_thr->th.ompt_thread_info.state = ompt_state_idle;
06352       if ((ompt_status == ompt_status_track_callback) &&
06353           ompt_callbacks.ompt_callback(ompt_event_idle_begin)) {
06354         ompt_callbacks.ompt_callback(ompt_event_idle_begin)();
06355       }
06356     }
06357 #endif
06358 
06359         /* No tid yet since not part of a team */
06360         __kmp_fork_barrier( gtid, KMP_GTID_DNE );
06361 
06362 #if OMPT_SUPPORT
06363     if (ompt_status & ompt_status_track) {
06364       this_thr->th.ompt_thread_info.state = ompt_state_overhead;
06365       if ((ompt_status == ompt_status_track_callback) &&
06366           ompt_callbacks.ompt_callback(ompt_event_idle_end)) {
06367         ompt_callbacks.ompt_callback(ompt_event_idle_end)();
06368       }
06369     }
06370 #endif
06371 
06372         pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
06373 
06374         /* have we been allocated? */
06375         if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
06376             /* we were just waken up, so run our new task */
06377             if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
06378                 int rc;
06379                 KA_TRACE( 20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
06380                     gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn ));
06381 
06382 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
06383                 if ( __kmp_inherit_fp_control && (*pteam)->t.t_fp_control_saved ) {
06384                     __kmp_clear_x87_fpu_status_word();
06385                     __kmp_load_x87_fpu_control_word( &(*pteam)->t.t_x87_fpu_control_word );
06386                     __kmp_load_mxcsr( &(*pteam)->t.t_mxcsr );
06387                 }
06388 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
06389 
06390 #if OMPT_SUPPORT
06391         if (ompt_status & ompt_status_track) {
06392           this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
06393         }
06394 #endif
06395 
06396                 rc = (*pteam) -> t.t_invoke( gtid );
06397                 KMP_ASSERT( rc );
06398 
06399 #if OMPT_SUPPORT
06400         if (ompt_status & ompt_status_track) {
06401           /* no frame set while outside task */
06402           int tid = __kmp_tid_from_gtid(gtid); 
06403           (*pteam)->t.t_implicit_task_taskdata[tid].
06404             ompt_task_info.frame.exit_runtime_frame = 0;
06405 
06406           this_thr->th.ompt_thread_info.state = ompt_state_overhead;
06407         }
06408 #endif
06409                 KMP_MB();
06410                 KA_TRACE( 20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
06411                         gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn ));
06412             }
06413 
06414             /* join barrier after parallel region */
06415             __kmp_join_barrier( gtid );
06416         }
06417     }
06418     TCR_SYNC_PTR(__kmp_global.g.g_done);
06419 
06420 #if OMPT_SUPPORT
06421    if ((ompt_status == ompt_status_track_callback) &&
06422        ompt_callbacks.ompt_callback(ompt_event_thread_exit)) {
06423      ompt_callbacks.ompt_callback(ompt_event_thread_exit)();
06424    }
06425 #endif
06426 
06427 #if OMP_30_ENABLED
06428     if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) {
06429         __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
06430     }
06431 #endif /* OMP_30_ENABLED */
06432 
06433     /* run the destructors for the threadprivate data for this thread */
06434     __kmp_common_destroy_gtid( gtid ); 
06435 
06436     KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) );
06437     KMP_MB();
06438     return this_thr;
06439 }
06440 
06441 /* ------------------------------------------------------------------------ */
06442 /* ------------------------------------------------------------------------ */
06443 
06444 
06445 
06446 void
06447 __kmp_internal_end_dest( void *specific_gtid )
06448 {
06449     #ifdef __INTEL_COMPILER
06450         #pragma warning( push )
06451         #pragma warning( disable:  810 ) // conversion from "void *" to "int" may lose significant bits
06452     #endif
06453     // Make sure no significant bits are lost
06454     int gtid = (kmp_intptr_t)specific_gtid - 1;
06455     #ifdef __INTEL_COMPILER
06456         #pragma warning( pop )
06457     #endif
06458 
06459     KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid));
06460     /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage
06461      * this is because 0 is reserved for the nothing-stored case */
06462 
06463     /* josh: One reason for setting the gtid specific data even when it is being
06464        destroyed by pthread is to allow gtid lookup through thread specific data
06465        (__kmp_gtid_get_specific).  Some of the code, especially stat code,
06466        that gets executed in the call to __kmp_internal_end_thread, actually
06467        gets the gtid through the thread specific data.  Setting it here seems
06468        rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread
06469        to run smoothly.
06470        todo: get rid of this after we remove the dependence on
06471        __kmp_gtid_get_specific
06472     */
06473     if(gtid >= 0 && KMP_UBER_GTID(gtid))
06474         __kmp_gtid_set_specific( gtid );
06475     #ifdef KMP_TDATA_GTID
06476         __kmp_gtid = gtid;
06477     #endif
06478     __kmp_internal_end_thread( gtid );
06479 }
06480 
06481 #if KMP_OS_UNIX && GUIDEDLL_EXPORTS
06482 
06483 // 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work
06484 // perfectly, but in real libiomp5.so I have no evidence it is ever called. However, -fini linker
06485 // option in makefile.mk works fine.
06486 
06487 __attribute__(( destructor ))
06488 void
06489 __kmp_internal_end_dtor( void )
06490 {
06491     __kmp_internal_end_atexit();
06492 }
06493 
06494 void
06495 __kmp_internal_end_fini( void )
06496 {
06497     __kmp_internal_end_atexit();
06498 }
06499 
06500 #endif
06501 
06502 /* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */
06503 void
06504 __kmp_internal_end_atexit( void )
06505 {
06506     KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) );
06507     /* [Windows]
06508        josh: ideally, we want to completely shutdown the library in this atexit handler, but
06509        stat code that depends on thread specific data for gtid fails because that data becomes
06510        unavailable at some point during the shutdown, so we call __kmp_internal_end_thread
06511        instead.  We should eventually remove the dependency on __kmp_get_specific_gtid in the
06512        stat code and use __kmp_internal_end_library to cleanly shutdown the library.
06513 
06514 // TODO: Can some of this comment about GVS be removed?
06515        I suspect that the offending stat code is executed when the calling thread tries to
06516        clean up a dead root thread's data structures, resulting in GVS code trying to close
06517        the GVS structures for that thread, but since the stat code uses
06518        __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is
06519        cleaning up itself instead of another thread, it gets confused.  This happens because
06520        allowing a thread to unregister and cleanup another thread is a recent modification for
06521        addressing an issue with Maxon Cinema4D.  Based on the current design (20050722), a
06522        thread may end up trying to unregister another thread only if thread death does not
06523        trigger the calling of __kmp_internal_end_thread.  For Linux* OS, there is the thread
06524        specific data destructor function to detect thread death.  For Windows dynamic, there
06525        is DllMain(THREAD_DETACH).  For Windows static, there is nothing.  Thus, the
06526        workaround is applicable only for Windows static stat library.
06527     */
06528     __kmp_internal_end_library( -1 );
06529     #if KMP_OS_WINDOWS
06530         __kmp_close_console();
06531     #endif
06532 }
06533 
06534 static void
06535 __kmp_reap_thread(
06536     kmp_info_t * thread,
06537     int is_root
06538 ) {
06539 
06540     // It is assumed __kmp_forkjoin_lock is aquired.
06541 
06542     int gtid;
06543 
06544     KMP_DEBUG_ASSERT( thread != NULL );
06545 
06546     gtid = thread->th.th_info.ds.ds_gtid;
06547 
06548     if ( ! is_root ) {
06549 
06550         if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
06551             /* Assume the threads are at the fork barrier here */
06552             KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
06553             /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */
06554             __kmp_release(
06555                 thread,
06556                 &thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go,
06557                 kmp_release_fence
06558             );
06559         }; // if
06560 
06561 
06562         // Terminate OS thread.
06563         __kmp_reap_worker( thread );
06564 
06565         //
06566         // The thread was killed asynchronously.  If it was actively
06567         // spinning in the in the thread pool, decrement the global count.
06568         //
06569         // There is a small timing hole here - if the worker thread was
06570         // just waking up after sleeping in the pool, had reset it's
06571         // th_active_in_pool flag but not decremented the global counter
06572         // __kmp_thread_pool_active_nth yet, then the global counter
06573         // might not get updated.
06574         //
06575         // Currently, this can only happen as the library is unloaded,
06576         // so there are no harmful side effects.
06577         //
06578         if ( thread->th.th_active_in_pool ) {
06579             thread->th.th_active_in_pool = FALSE;
06580             KMP_TEST_THEN_DEC32(
06581               (kmp_int32 *) &__kmp_thread_pool_active_nth );
06582             KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
06583         }
06584 
06585         // Decrement # of [worker] threads in the pool.
06586         KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
06587         --__kmp_thread_pool_nth;
06588     }; // if
06589 
06590     // Free the fast memory for tasking
06591     #if USE_FAST_MEMORY
06592         __kmp_free_fast_memory( thread );
06593     #endif /* USE_FAST_MEMORY */
06594 
06595     __kmp_suspend_uninitialize_thread( thread );
06596 
06597     KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
06598     TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
06599 
06600     -- __kmp_all_nth;
06601     // __kmp_nth was decremented when thread is added to the pool.
06602 
06603 #ifdef KMP_ADJUST_BLOCKTIME
06604     /* Adjust blocktime back to user setting or default if necessary */
06605     /* Middle initialization might never have ocurred                */
06606     if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
06607         KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
06608         if ( __kmp_nth <= __kmp_avail_proc ) {
06609             __kmp_zero_bt = FALSE;
06610         }
06611     }
06612 #endif /* KMP_ADJUST_BLOCKTIME */
06613 
06614     /* free the memory being used */
06615     if( __kmp_env_consistency_check ) {
06616         if ( thread->th.th_cons ) {
06617             __kmp_free_cons_stack( thread->th.th_cons );
06618             thread->th.th_cons = NULL;
06619         }; // if
06620     }
06621 
06622     if ( thread->th.th_pri_common != NULL ) {
06623         __kmp_free( thread->th.th_pri_common );
06624         thread->th.th_pri_common = NULL;
06625     }; // if
06626 
06627     #if KMP_USE_BGET
06628         if ( thread->th.th_local.bget_data != NULL ) {
06629             __kmp_finalize_bget( thread );
06630         }; // if
06631     #endif
06632 
06633 #if (KMP_OS_WINDOWS || KMP_OS_LINUX)
06634     if ( thread->th.th_affin_mask != NULL ) {
06635         KMP_CPU_FREE( thread->th.th_affin_mask );
06636         thread->th.th_affin_mask = NULL;
06637     }; // if
06638 #endif /* (KMP_OS_WINDOWS || KMP_OS_LINUX) */
06639 
06640     __kmp_reap_team( thread->th.th_serial_team );
06641     thread->th.th_serial_team = NULL;
06642     __kmp_free( thread );
06643 
06644     KMP_MB();
06645 
06646 } // __kmp_reap_thread
06647 
06648 static void
06649 __kmp_internal_end(void)
06650 {
06651     int i;
06652 
06653     /* First, unregister the library */
06654     __kmp_unregister_library();
06655 
06656     #if KMP_OS_WINDOWS
06657         /* In Win static library, we can't tell when a root actually dies, so we
06658            reclaim the data structures for any root threads that have died but not
06659            unregistered themselves, in order to shut down cleanly.
06660            In Win dynamic library we also can't tell when a thread dies.
06661         */
06662         __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots
06663     #endif
06664 
06665     for( i=0 ; i<__kmp_threads_capacity ; i++ )
06666         if( __kmp_root[i] )
06667             if( __kmp_root[i] -> r.r_active )
06668                 break;
06669     KMP_MB();       /* Flush all pending memory write invalidates.  */
06670     TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
06671 
06672     if ( i < __kmp_threads_capacity ) {
06673         // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??
06674         KMP_MB();       /* Flush all pending memory write invalidates.  */
06675 
06676         //
06677         // Need to check that monitor was initialized before reaping it.
06678         // If we are called form __kmp_atfork_child (which sets
06679         // __kmp_init_parallel = 0), then __kmp_monitor will appear to
06680         // contain valid data, but it is only valid in the parent process,
06681         // not the child.
06682         //
06683         // One of the possible fixes for CQ138434 / CQ140126
06684         // (used in 20091103_dreamworks patch)
06685         //
06686         // New behavior (201008): instead of keying off of the flag
06687         // __kmp_init_parallel, the monitor thread creation is keyed off
06688         // of the new flag __kmp_init_monitor.
06689         //
06690         __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
06691         if ( TCR_4( __kmp_init_monitor ) ) {
06692             __kmp_reap_monitor( & __kmp_monitor );
06693             TCW_4( __kmp_init_monitor, 0 );
06694         }
06695         __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
06696         KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
06697     } else {
06698         /* TODO move this to cleanup code */
06699         #ifdef KMP_DEBUG
06700             /* make sure that everything has properly ended */
06701             for ( i = 0; i < __kmp_threads_capacity; i++ ) {
06702                 if( __kmp_root[i] ) {
06703                     KMP_ASSERT( ! KMP_UBER_GTID( i ) );
06704                     KMP_ASSERT( ! __kmp_root[i] -> r.r_active );
06705                 }
06706             }
06707         #endif
06708 
06709         KMP_MB();
06710 
06711         // Reap the worker threads.
06712         // This is valid for now, but be careful if threads are reaped sooner.
06713         while ( __kmp_thread_pool != NULL ) {    // Loop thru all the thread in the pool.
06714             // Get the next thread from the pool.
06715             kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
06716             __kmp_thread_pool = thread->th.th_next_pool;
06717             // Reap it.
06718             thread->th.th_next_pool = NULL;
06719             thread->th.th_in_pool = FALSE;
06720             __kmp_reap_thread( thread, 0 );
06721         }; // while
06722         __kmp_thread_pool_insert_pt = NULL;
06723 
06724         // Reap teams.
06725         while ( __kmp_team_pool != NULL ) {     // Loop thru all the teams in the pool.
06726             // Get the next team from the pool.
06727             kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
06728             __kmp_team_pool = team->t.t_next_pool;
06729             // Reap it.
06730             team->t.t_next_pool = NULL;
06731             __kmp_reap_team( team );
06732         }; // while
06733 
06734         #if OMP_30_ENABLED
06735             __kmp_reap_task_teams( );
06736         #endif /* OMP_30_ENABLED */
06737 
06738         for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
06739             // TBD: Add some checking...
06740             // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );
06741         }
06742 
06743         /* Make sure all threadprivate destructors get run by joining with all worker
06744            threads before resetting this flag */
06745         TCW_SYNC_4(__kmp_init_common, FALSE);
06746 
06747         KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) );
06748         KMP_MB();
06749 
06750         //
06751         // See note above: One of the possible fixes for CQ138434 / CQ140126
06752         //
06753         // FIXME: push both code fragments down and CSE them?
06754         // push them into __kmp_cleanup() ?
06755         //
06756         __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
06757         if ( TCR_4( __kmp_init_monitor ) ) {
06758             __kmp_reap_monitor( & __kmp_monitor );
06759             TCW_4( __kmp_init_monitor, 0 );
06760         }
06761         __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
06762         KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) );
06763 
06764     } /* else !__kmp_global.t_active */
06765     TCW_4(__kmp_init_gtid, FALSE);
06766     KMP_MB();       /* Flush all pending memory write invalidates.  */
06767 
06768 
06769     __kmp_cleanup();
06770 #if OMPT_SUPPORT
06771     ompt_fini();
06772 #endif
06773 }
06774 
06775 void
06776 __kmp_internal_end_library( int gtid_req )
06777 {
06778     int i;
06779 
06780     /* if we have already cleaned up, don't try again, it wouldn't be pretty */
06781     /* this shouldn't be a race condition because __kmp_internal_end() is the
06782      * only place to clear __kmp_serial_init */
06783     /* we'll check this later too, after we get the lock */
06784     // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant,
06785     // because the next check will work in any case. 
06786     if( __kmp_global.g.g_abort ) {
06787         KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" ));
06788         /* TODO abort? */
06789         return;
06790     }
06791     if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
06792         KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" ));
06793         return;
06794     }
06795 
06796 
06797     KMP_MB();       /* Flush all pending memory write invalidates.  */
06798 
06799     /* find out who we are and what we should do */
06800     {
06801         int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
06802         KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d  (%d)\n", gtid, gtid_req ));
06803         if( gtid == KMP_GTID_SHUTDOWN ) {
06804             KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
06805             return;
06806         } else if( gtid == KMP_GTID_MONITOR ) {
06807             KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
06808             return;
06809         } else if( gtid == KMP_GTID_DNE ) {
06810             KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
06811             /* we don't know who we are, but we may still shutdown the library */
06812         } else if( KMP_UBER_GTID( gtid )) {
06813             /* unregister ourselves as an uber thread.  gtid is no longer valid */
06814             if( __kmp_root[gtid] -> r.r_active ) {
06815                 __kmp_global.g.g_abort = -1;
06816                 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
06817                 KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
06818                 return;
06819             } else {
06820                 KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
06821                 __kmp_unregister_root_current_thread( gtid );
06822             }
06823         } else {
06824             /* worker threads may call this function through the atexit handler, if they call exit() */
06825             /* For now, skip the usual subsequent processing and just dump the debug buffer.
06826                TODO: do a thorough shutdown instead
06827             */
06828             #ifdef DUMP_DEBUG_ON_EXIT
06829                 if ( __kmp_debug_buf )
06830                     __kmp_dump_debug_buffer( );
06831             #endif
06832             return;
06833         }
06834     }
06835     /* synchronize the termination process */
06836     __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
06837 
06838     /* have we already finished */
06839     if( __kmp_global.g.g_abort ) {
06840         KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" ));
06841         /* TODO abort? */
06842         __kmp_release_bootstrap_lock( &__kmp_initz_lock );
06843         return;
06844     }
06845     if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
06846         __kmp_release_bootstrap_lock( &__kmp_initz_lock );
06847         return;
06848     }
06849 
06850     /* We need this lock to enforce mutex between this reading of
06851        __kmp_threads_capacity and the writing by __kmp_register_root.
06852        Alternatively, we can use a counter of roots that is
06853        atomically updated by __kmp_get_global_thread_id_reg,
06854        __kmp_do_serial_initialize and __kmp_internal_end_*.
06855     */
06856     __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
06857 
06858     /* now we can safely conduct the actual termination */
06859     __kmp_internal_end();
06860 
06861     __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
06862     __kmp_release_bootstrap_lock( &__kmp_initz_lock );
06863 
06864     KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) );
06865 
06866     #ifdef DUMP_DEBUG_ON_EXIT
06867         if ( __kmp_debug_buf )
06868             __kmp_dump_debug_buffer();
06869     #endif
06870 
06871     #if KMP_OS_WINDOWS
06872         __kmp_close_console();
06873     #endif
06874 
06875     __kmp_fini_allocator();
06876 
06877 } // __kmp_internal_end_library
06878 
06879 void
06880 __kmp_internal_end_thread( int gtid_req )
06881 {
06882     int i;
06883 
06884     /* if we have already cleaned up, don't try again, it wouldn't be pretty */
06885     /* this shouldn't be a race condition because __kmp_internal_end() is the
06886      * only place to clear __kmp_serial_init */
06887     /* we'll check this later too, after we get the lock */
06888     // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant,
06889     // because the next check will work in any case. 
06890     if( __kmp_global.g.g_abort ) {
06891         KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" ));
06892         /* TODO abort? */
06893         return;
06894     }
06895     if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
06896         KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" ));
06897         return;
06898     }
06899 
06900     KMP_MB();       /* Flush all pending memory write invalidates.  */
06901 
06902     /* find out who we are and what we should do */
06903     {
06904         int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
06905         KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d  (%d)\n", gtid, gtid_req ));
06906         if( gtid == KMP_GTID_SHUTDOWN ) {
06907             KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
06908             return;
06909         } else if( gtid == KMP_GTID_MONITOR ) {
06910             KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
06911             return;
06912         } else if( gtid == KMP_GTID_DNE ) {
06913             KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
06914             return;
06915             /* we don't know who we are */
06916         } else if( KMP_UBER_GTID( gtid )) {
06917         /* unregister ourselves as an uber thread.  gtid is no longer valid */
06918             if( __kmp_root[gtid] -> r.r_active ) {
06919                 __kmp_global.g.g_abort = -1;
06920                 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
06921                 KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
06922                 return;
06923             } else {
06924                 KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
06925                 __kmp_unregister_root_current_thread( gtid );
06926             }
06927         } else {
06928             /* just a worker thread, let's leave */
06929             KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
06930 
06931             #if OMP_30_ENABLED
06932                 if ( gtid >= 0 ) {
06933                     kmp_info_t *this_thr = __kmp_threads[ gtid ];
06934                     if (TCR_PTR(this_thr->th.th_task_team) != NULL) {
06935                         __kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
06936                     }
06937                 }
06938             #endif /* OMP_30_ENABLED */
06939 
06940             KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
06941             return;
06942         }
06943     }
06944     #if defined GUIDEDLL_EXPORTS
06945     // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread,
06946     //     because we will better shutdown later in the library destructor.
06947     //     The reason of this change is performance problem when non-openmp thread
06948     //     in a loop forks and joins many openmp threads. We can save a lot of time
06949     //     keeping worker threads alive until the program shutdown.
06950     // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and
06951     //     Windows(DPD200287443) that occurs when using critical sections from foreign threads.
06952         KA_TRACE( 10, ("__kmp_internal_end_thread: exiting\n") );
06953         return;
06954     #endif
06955     /* synchronize the termination process */
06956     __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
06957 
06958     /* have we already finished */
06959     if( __kmp_global.g.g_abort ) {
06960         KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" ));
06961         /* TODO abort? */
06962         __kmp_release_bootstrap_lock( &__kmp_initz_lock );
06963         return;
06964     }
06965     if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
06966         __kmp_release_bootstrap_lock( &__kmp_initz_lock );
06967         return;
06968     }
06969 
06970     /* We need this lock to enforce mutex between this reading of
06971        __kmp_threads_capacity and the writing by __kmp_register_root.
06972        Alternatively, we can use a counter of roots that is
06973        atomically updated by __kmp_get_global_thread_id_reg,
06974        __kmp_do_serial_initialize and __kmp_internal_end_*.
06975     */
06976 
06977     /* should we finish the run-time?  are all siblings done? */
06978     __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
06979 
06980     for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
06981         if ( KMP_UBER_GTID( i ) ) {
06982             KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
06983             __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
06984             __kmp_release_bootstrap_lock( &__kmp_initz_lock );
06985             return;
06986         };
06987     }
06988 
06989     /* now we can safely conduct the actual termination */
06990 
06991     __kmp_internal_end();
06992 
06993     __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
06994     __kmp_release_bootstrap_lock( &__kmp_initz_lock );
06995 
06996     KA_TRACE( 10, ("__kmp_internal_end_thread: exit\n" ) );
06997 
06998     #ifdef DUMP_DEBUG_ON_EXIT
06999         if ( __kmp_debug_buf )
07000             __kmp_dump_debug_buffer();
07001     #endif
07002 } // __kmp_internal_end_thread
07003 
07004 // -------------------------------------------------------------------------------------------------
07005 // Library registration stuff.
07006 
07007 static long   __kmp_registration_flag = 0;
07008     // Random value used to indicate library initialization.
07009 static char * __kmp_registration_str  = NULL;
07010     // Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.
07011 
07012 
07013 static inline
07014 char *
07015 __kmp_reg_status_name() {
07016     /*
07017         On RHEL 3u5 if linked statically, getpid() returns different values in each thread.
07018         If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case),
07019         the name of registered_lib_env env var can not be found, because the name will contain different pid.
07020     */
07021     return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() );
07022 } // __kmp_reg_status_get
07023 
07024 
07025 void
07026 __kmp_register_library_startup(
07027     void
07028 ) {
07029 
07030     char * name   = __kmp_reg_status_name();  // Name of the environment variable.
07031     int    done   = 0;
07032     union {
07033         double dtime;
07034         long   ltime;
07035     } time;
07036     #if KMP_OS_WINDOWS
07037         __kmp_initialize_system_tick();
07038     #endif
07039     __kmp_read_system_time( & time.dtime );
07040     __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
07041     __kmp_registration_str =
07042         __kmp_str_format(
07043             "%p-%lx-%s",
07044             & __kmp_registration_flag,
07045             __kmp_registration_flag,
07046             KMP_LIBRARY_FILE
07047         );
07048 
07049     KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
07050 
07051     while ( ! done ) {
07052 
07053         char * value  = NULL; // Actual value of the environment variable.
07054 
07055         // Set environment variable, but do not overwrite if it is exist.
07056         __kmp_env_set( name, __kmp_registration_str, 0 );
07057         // Check the variable is written.
07058         value = __kmp_env_get( name );
07059         if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
07060 
07061             done = 1;    // Ok, environment variable set successfully, exit the loop.
07062 
07063         } else {
07064 
07065             // Oops. Write failed. Another copy of OpenMP RTL is in memory.
07066             // Check whether it alive or dead.
07067             int    neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.
07068             char * tail          = value;
07069             char * flag_addr_str = NULL;
07070             char * flag_val_str  = NULL;
07071             char const * file_name     = NULL;
07072             __kmp_str_split( tail, '-', & flag_addr_str, & tail );
07073             __kmp_str_split( tail, '-', & flag_val_str,  & tail );
07074             file_name = tail;
07075             if ( tail != NULL ) {
07076                 long * flag_addr = 0;
07077                 long   flag_val  = 0;
07078                 sscanf( flag_addr_str, "%p",  & flag_addr );
07079                 sscanf( flag_val_str,  "%lx", & flag_val  );
07080                 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) {
07081                     // First, check whether environment-encoded address is mapped into addr space.
07082                     // If so, dereference it to see if it still has the right value.
07083 
07084                     if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
07085                         neighbor = 1;
07086                     } else {
07087                         // If not, then we know the other copy of the library is no longer running.
07088                         neighbor = 2;
07089                     }; // if
07090                 }; // if
07091             }; // if
07092             switch ( neighbor ) {
07093                 case 0 :      // Cannot parse environment variable -- neighbor status unknown.
07094                     // Assume it is the incompatible format of future version of the library.
07095                     // Assume the other library is alive.
07096                     // WARN( ... ); // TODO: Issue a warning.
07097                     file_name = "unknown library";
07098                     // Attention! Falling to the next case. That's intentional.
07099                 case 1 : {    // Neighbor is alive.
07100                     // Check it is allowed.
07101                     char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" );
07102                     if ( ! __kmp_str_match_true( duplicate_ok ) ) {
07103                         // That's not allowed. Issue fatal error.
07104                         __kmp_msg(
07105                             kmp_ms_fatal,
07106                             KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
07107                             KMP_HNT( DuplicateLibrary ),
07108                             __kmp_msg_null
07109                         );
07110                     }; // if
07111                     KMP_INTERNAL_FREE( duplicate_ok );
07112                     __kmp_duplicate_library_ok = 1;
07113                     done = 1;    // Exit the loop.
07114                 } break;
07115                 case 2 : {    // Neighbor is dead.
07116                     // Clear the variable and try to register library again.
07117                     __kmp_env_unset( name );
07118                 }  break;
07119                 default : {
07120                     KMP_DEBUG_ASSERT( 0 );
07121                 } break;
07122             }; // switch
07123 
07124         }; // if
07125         KMP_INTERNAL_FREE( (void *) value );
07126 
07127     }; // while
07128     KMP_INTERNAL_FREE( (void *) name );
07129 
07130 } // func __kmp_register_library_startup
07131 
07132 
07133 void
07134 __kmp_unregister_library( void ) {
07135 
07136     char * name  = __kmp_reg_status_name();
07137     char * value = __kmp_env_get( name );
07138 
07139     KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
07140     KMP_DEBUG_ASSERT( __kmp_registration_str  != NULL );
07141     if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
07142         // Ok, this is our variable. Delete it.
07143         __kmp_env_unset( name );
07144     }; // if
07145 
07146     KMP_INTERNAL_FREE( __kmp_registration_str );
07147     KMP_INTERNAL_FREE( value );
07148     KMP_INTERNAL_FREE( name );
07149 
07150     __kmp_registration_flag = 0;
07151     __kmp_registration_str  = NULL;
07152 
07153 } // __kmp_unregister_library
07154 
07155 
07156 // End of Library registration stuff.
07157 // -------------------------------------------------------------------------------------------------
07158 
07159 static void
07160 __kmp_do_serial_initialize( void )
07161 {
07162     int i, gtid;
07163     int size;
07164 
07165     KA_TRACE( 10, ("__kmp_serial_initialize: enter\n" ) );
07166 
07167     KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 );
07168     KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 );
07169     KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 );
07170     KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 );
07171     KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) );
07172 
07173     __kmp_validate_locks();
07174 
07175     /* Initialize internal memory allocator */
07176     __kmp_init_allocator();
07177 
07178     /* Register the library startup via an environment variable
07179        and check to see whether another copy of the library is already
07180        registered. */
07181 
07182     __kmp_register_library_startup( );
07183 
07184     /* TODO reinitialization of library */
07185     if( TCR_4(__kmp_global.g.g_done) ) {
07186        KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) );
07187     }
07188 
07189     __kmp_global.g.g_abort = 0;
07190     TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
07191 
07192     /* initialize the locks */
07193     __kmp_init_lock( & __kmp_global_lock     );
07194     __kmp_init_queuing_lock( & __kmp_dispatch_lock );
07195     __kmp_init_lock( & __kmp_debug_lock      );
07196     __kmp_init_atomic_lock( & __kmp_atomic_lock     );
07197     __kmp_init_atomic_lock( & __kmp_atomic_lock_1i  );
07198     __kmp_init_atomic_lock( & __kmp_atomic_lock_2i  );
07199     __kmp_init_atomic_lock( & __kmp_atomic_lock_4i  );
07200     __kmp_init_atomic_lock( & __kmp_atomic_lock_4r  );
07201     __kmp_init_atomic_lock( & __kmp_atomic_lock_8i  );
07202     __kmp_init_atomic_lock( & __kmp_atomic_lock_8r  );
07203     __kmp_init_atomic_lock( & __kmp_atomic_lock_8c  );
07204     __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
07205     __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
07206     __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
07207     __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
07208     __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
07209     __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock  );
07210     __kmp_init_bootstrap_lock( & __kmp_exit_lock      );
07211     __kmp_init_bootstrap_lock( & __kmp_monitor_lock   );
07212     __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
07213 
07214     /* conduct initialization and initial setup of configuration */
07215 
07216     __kmp_runtime_initialize();
07217 
07218     // Some global variable initialization moved here from kmp_env_initialize()
07219 #ifdef KMP_DEBUG
07220     kmp_diag = 0;
07221 #endif
07222     __kmp_abort_delay = 0;
07223 
07224     // From __kmp_init_dflt_team_nth()
07225     /* assume the entire machine will be used */
07226     __kmp_dflt_team_nth_ub = __kmp_xproc;
07227     if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
07228         __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
07229     }
07230     if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
07231         __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
07232     }
07233     __kmp_max_nth = __kmp_sys_max_nth;
07234     __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
07235 
07236     // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part
07237     __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
07238     __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
07239     __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
07240     // From "KMP_LIBRARY" part of __kmp_env_initialize()
07241     __kmp_library = library_throughput;
07242     // From KMP_SCHEDULE initialization
07243     __kmp_static = kmp_sch_static_balanced;
07244     // AC: do not use analytical here, because it is non-monotonous
07245     //__kmp_guided = kmp_sch_guided_iterative_chunked;
07246     #if OMP_30_ENABLED
07247     //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment
07248     #endif // OMP_30_ENABLED
07249     // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method
07250     // control parts
07251     #if KMP_FAST_REDUCTION_BARRIER
07252         #define kmp_reduction_barrier_gather_bb ((int)1)
07253         #define kmp_reduction_barrier_release_bb ((int)1)
07254         #define kmp_reduction_barrier_gather_pat bp_hyper_bar
07255         #define kmp_reduction_barrier_release_pat bp_hyper_bar
07256     #endif // KMP_FAST_REDUCTION_BARRIER
07257     for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
07258         __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
07259         __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
07260         __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
07261         __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
07262         #if KMP_FAST_REDUCTION_BARRIER
07263         if( i == bs_reduction_barrier ) { // tested and confirmed on ALTIX only ( lin_64 ): hyper,1
07264             __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
07265             __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
07266             __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
07267             __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
07268         }
07269         #endif // KMP_FAST_REDUCTION_BARRIER
07270     }
07271     #if KMP_FAST_REDUCTION_BARRIER
07272         #undef kmp_reduction_barrier_release_pat
07273         #undef kmp_reduction_barrier_gather_pat
07274         #undef kmp_reduction_barrier_release_bb
07275         #undef kmp_reduction_barrier_gather_bb
07276     #endif // KMP_FAST_REDUCTION_BARRIER
07277     #if KMP_MIC
07278         // AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC
07279         __kmp_barrier_gather_branch_bits [ 0 ] = 3;  // plane gather
07280         __kmp_barrier_release_branch_bits[ 1 ] = 1;  // forkjoin release
07281     #endif
07282 
07283     // From KMP_CHECKS initialization
07284 #ifdef KMP_DEBUG
07285     __kmp_env_checks = TRUE;   /* development versions have the extra checks */
07286 #else
07287     __kmp_env_checks = FALSE;  /* port versions do not have the extra checks */
07288 #endif
07289 
07290     // From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization
07291     __kmp_foreign_tp = TRUE;
07292 
07293     __kmp_global.g.g_dynamic = FALSE;
07294     __kmp_global.g.g_dynamic_mode = dynamic_default;
07295 
07296     __kmp_env_initialize( NULL );
07297     // Print all messages in message catalog for testing purposes.
07298     #ifdef KMP_DEBUG
07299         char const * val = __kmp_env_get( "KMP_DUMP_CATALOG" );
07300         if ( __kmp_str_match_true( val ) ) {
07301             kmp_str_buf_t buffer;
07302             __kmp_str_buf_init( & buffer );
07303             __kmp_i18n_dump_catalog( buffer );
07304             __kmp_printf( "%s", buffer.str );
07305             __kmp_str_buf_free( & buffer );
07306         }; // if
07307         __kmp_env_free( & val );
07308     #endif
07309 
07310     // Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part
07311     __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
07312 
07313     //  omalyshe: This initialisation beats env var setting.
07314     //__kmp_load_balance_interval = 1.0;
07315 
07316     // If the library is shut down properly, both pools must be NULL. Just in case, set them
07317     // to NULL -- some memory may leak, but subsequent code will work even if pools are not freed.
07318     KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
07319     KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
07320     KMP_DEBUG_ASSERT( __kmp_team_pool   == NULL );
07321     __kmp_thread_pool = NULL;
07322     __kmp_thread_pool_insert_pt = NULL;
07323     __kmp_team_pool   = NULL;
07324 
07325     /* Allocate all of the variable sized records */
07326     /* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are expandable */
07327     /* Since allocation is cache-aligned, just add extra padding at the end */
07328     size = (sizeof(kmp_info_t*) + sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
07329     __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
07330     __kmp_root    = (kmp_root_t**) ((char*)__kmp_threads + sizeof(kmp_info_t*) * __kmp_threads_capacity );
07331 
07332     /* init thread counts */
07333     KMP_DEBUG_ASSERT( __kmp_all_nth == 0 ); // Asserts fail if the library is reinitializing and
07334     KMP_DEBUG_ASSERT( __kmp_nth == 0 );     // something was wrong in termination. 
07335     __kmp_all_nth = 0;
07336     __kmp_nth     = 0;
07337 
07338     /* setup the uber master thread and hierarchy */
07339     gtid = __kmp_register_root( TRUE );
07340     KA_TRACE( 10, ("__kmp_do_serial_initialize  T#%d\n", gtid ));
07341     KMP_ASSERT( KMP_UBER_GTID( gtid ) );
07342     KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
07343 
07344     KMP_MB();       /* Flush all pending memory write invalidates.  */
07345 
07346     __kmp_common_initialize();
07347 
07348     #if KMP_OS_UNIX
07349         /* invoke the child fork handler */
07350         __kmp_register_atfork();
07351     #endif
07352 
07353     #if ! defined GUIDEDLL_EXPORTS
07354         {
07355             /* Invoke the exit handler when the program finishes, only for static library.
07356                For dynamic library, we already have _fini and DllMain.
07357              */
07358             int rc = atexit( __kmp_internal_end_atexit );
07359             if ( rc != 0 ) {
07360                 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
07361             }; // if
07362         }
07363     #endif
07364 
07365     #if KMP_HANDLE_SIGNALS
07366         #if KMP_OS_UNIX
07367             /* NOTE: make sure that this is called before the user installs
07368              *          their own signal handlers so that the user handlers
07369              *          are called first.  this way they can return false,
07370              *          not call our handler, avoid terminating the library,
07371              *          and continue execution where they left off. */
07372             __kmp_install_signals( FALSE );
07373         #endif /* KMP_OS_UNIX */
07374         #if KMP_OS_WINDOWS
07375             __kmp_install_signals( TRUE );
07376         #endif /* KMP_OS_WINDOWS */
07377     #endif
07378 
07379     /* we have finished the serial initialization */
07380     __kmp_init_counter ++;
07381 
07382     __kmp_init_serial = TRUE;
07383 
07384     if (__kmp_settings) {
07385         __kmp_env_print();
07386     }
07387 
07388     KMP_MB();
07389 
07390     KA_TRACE( 10, ("__kmp_do_serial_initialize: exit\n" ) );
07391 }
07392 
07393 void
07394 __kmp_serial_initialize( void )
07395 {
07396     if ( __kmp_init_serial ) {
07397         return;
07398     }
07399     __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
07400     if ( __kmp_init_serial ) {
07401         __kmp_release_bootstrap_lock( &__kmp_initz_lock );
07402         return;
07403     }
07404     __kmp_do_serial_initialize();
07405     __kmp_release_bootstrap_lock( &__kmp_initz_lock );
07406 }
07407 
07408 static void
07409 __kmp_do_middle_initialize( void )
07410 {
07411     int i, j;
07412     int prev_dflt_team_nth;
07413 
07414     if( !__kmp_init_serial ) {
07415         __kmp_do_serial_initialize();
07416     }
07417 
07418     KA_TRACE( 10, ("__kmp_middle_initialize: enter\n" ) );
07419 
07420     //
07421     // Save the previous value for the __kmp_dflt_team_nth so that
07422     // we can avoid some reinitialization if it hasn't changed.
07423     //
07424     prev_dflt_team_nth = __kmp_dflt_team_nth;
07425 
07426 #if KMP_OS_WINDOWS || KMP_OS_LINUX
07427     //
07428     // __kmp_affinity_initialize() will try to set __kmp_ncores to the
07429     // number of cores on the machine.
07430     //
07431     __kmp_affinity_initialize();
07432 
07433     //
07434     // Run through the __kmp_threads array and set the affinity mask
07435     // for each root thread that is currently registered with the RTL.
07436     //
07437     for ( i = 0; i < __kmp_threads_capacity; i++ ) {
07438         if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
07439             __kmp_affinity_set_init_mask( i, TRUE );
07440         }
07441     }
07442 #endif /* KMP_OS_WINDOWS || KMP_OS_LINUX */
07443 
07444     KMP_ASSERT( __kmp_xproc > 0 );
07445     if ( __kmp_avail_proc == 0 ) {
07446         __kmp_avail_proc = __kmp_xproc;
07447     }
07448 
07449     // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now
07450     j = 0;
07451     while ( __kmp_nested_nth.used && ! __kmp_nested_nth.nth[ j ] ) {
07452         __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
07453         j++;
07454     }
07455 
07456     if ( __kmp_dflt_team_nth == 0 ) {
07457 #ifdef KMP_DFLT_NTH_CORES
07458         //
07459         // Default #threads = #cores
07460         //
07461         __kmp_dflt_team_nth = __kmp_ncores;
07462         KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
07463           __kmp_dflt_team_nth ) );
07464 #else
07465         //
07466         // Default #threads = #available OS procs
07467         //
07468         __kmp_dflt_team_nth = __kmp_avail_proc;
07469         KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
07470           __kmp_dflt_team_nth ) );
07471 #endif /* KMP_DFLT_NTH_CORES */
07472     }
07473 
07474     if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
07475         __kmp_dflt_team_nth = KMP_MIN_NTH;
07476     }
07477     if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
07478         __kmp_dflt_team_nth = __kmp_sys_max_nth;
07479     }
07480 
07481     //
07482     // There's no harm in continuing if the following check fails,
07483     // but it indicates an error in the previous logic.
07484     //
07485     KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
07486 
07487     if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
07488         //
07489         // Run through the __kmp_threads array and set the num threads icv
07490         // for each root thread that is currently registered with the RTL
07491         // (which has not already explicitly set its nthreads-var with a
07492         // call to omp_set_num_threads()).
07493         //
07494         for ( i = 0; i < __kmp_threads_capacity; i++ ) {
07495             kmp_info_t *thread = __kmp_threads[ i ];
07496             if ( thread == NULL ) continue;
07497 #if OMP_30_ENABLED
07498             if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue;
07499 #else
07500             if ( thread->th.th_team->t.t_set_nproc[ thread->th.th_info.ds.ds_tid ]  != 0 ) continue;
07501 #endif /* OMP_30_ENABLED */
07502 
07503             set__nproc_p( __kmp_threads[ i ], __kmp_dflt_team_nth );
07504         }
07505     }
07506     KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
07507       __kmp_dflt_team_nth) );
07508 
07509 #ifdef KMP_ADJUST_BLOCKTIME
07510     /* Adjust blocktime to zero if necessary */
07511     /* now that __kmp_avail_proc is set      */
07512     if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
07513         KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
07514         if ( __kmp_nth > __kmp_avail_proc ) {
07515             __kmp_zero_bt = TRUE;
07516         }
07517     }
07518 #endif /* KMP_ADJUST_BLOCKTIME */
07519 
07520     /* we have finished middle initialization */
07521     TCW_SYNC_4(__kmp_init_middle, TRUE);
07522 
07523     KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) );
07524 }
07525 
07526 void
07527 __kmp_middle_initialize( void )
07528 {
07529     if ( __kmp_init_middle ) {
07530         return;
07531     }
07532     __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
07533     if ( __kmp_init_middle ) {
07534         __kmp_release_bootstrap_lock( &__kmp_initz_lock );
07535         return;
07536     }
07537     __kmp_do_middle_initialize();
07538     __kmp_release_bootstrap_lock( &__kmp_initz_lock );
07539 }
07540 
07541 void
07542 __kmp_parallel_initialize( void )
07543 {
07544     int gtid = __kmp_entry_gtid();      // this might be a new root
07545 
07546     /* syncronize parallel initialization (for sibling) */
07547     if( TCR_4(__kmp_init_parallel) ) return;
07548     __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
07549     if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; }
07550 
07551     /* TODO reinitialization after we have already shut down */
07552     if( TCR_4(__kmp_global.g.g_done) ) {
07553         KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
07554         __kmp_infinite_loop();
07555     }
07556 
07557     /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize
07558            would cause a deadlock.  So we call __kmp_do_serial_initialize directly.
07559     */
07560     if( !__kmp_init_middle ) {
07561         __kmp_do_middle_initialize();
07562     }
07563 
07564     /* begin initialization */
07565     KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) );
07566     KMP_ASSERT( KMP_UBER_GTID( gtid ) );
07567 
07568 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
07569     //
07570     // Save the FP control regs.
07571     // Worker threads will set theirs to these values at thread startup.
07572     //
07573     __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
07574     __kmp_store_mxcsr( &__kmp_init_mxcsr );
07575     __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
07576 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
07577 
07578 #if KMP_OS_UNIX
07579 # if KMP_HANDLE_SIGNALS
07580     /*  must be after __kmp_serial_initialize  */
07581     __kmp_install_signals( TRUE );
07582 # endif
07583 #endif
07584 
07585     __kmp_suspend_initialize();
07586 
07587 #  if defined(USE_LOAD_BALANCE)
07588     if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
07589         __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
07590     }
07591 #else
07592     if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
07593         __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
07594     }
07595 #endif
07596 
07597     if ( __kmp_version ) {
07598         __kmp_print_version_2();
07599     }
07600 
07601     /* we have finished parallel initialization */
07602     TCW_SYNC_4(__kmp_init_parallel, TRUE);
07603 
07604     KMP_MB();
07605     KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) );
07606 
07607     __kmp_release_bootstrap_lock( &__kmp_initz_lock );
07608 }
07609 
07610 
07611 /* ------------------------------------------------------------------------ */
07612 
07613 void
07614 __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
07615   kmp_team_t *team )
07616 {
07617     kmp_disp_t *dispatch;
07618 
07619     KMP_MB();
07620 
07621     /* none of the threads have encountered any constructs, yet. */
07622     this_thr->th.th_local.this_construct = 0;
07623     this_thr->th.th_local.last_construct = 0;
07624 #if KMP_CACHE_MANAGE
07625     KMP_CACHE_PREFETCH( &this_thr -> th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
07626 #endif /* KMP_CACHE_MANAGE */
07627     dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
07628     KMP_DEBUG_ASSERT( dispatch );
07629     KMP_DEBUG_ASSERT( team -> t.t_dispatch );
07630     //KMP_DEBUG_ASSERT( this_thr -> th.th_dispatch == &team -> t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] );
07631 
07632     dispatch -> th_disp_index = 0;    /* reset the dispatch buffer counter */
07633 
07634     if( __kmp_env_consistency_check )
07635         __kmp_push_parallel( gtid, team->t.t_ident );
07636 
07637     KMP_MB();       /* Flush all pending memory write invalidates.  */
07638 }
07639 
07640 void
07641 __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr,
07642   kmp_team_t *team )
07643 {
07644     if( __kmp_env_consistency_check )
07645         __kmp_pop_parallel( gtid, team->t.t_ident );
07646 }
07647 
07648 int
07649 __kmp_invoke_task_func( int gtid )
07650 {
07651     int          rc;
07652     int          tid      = __kmp_tid_from_gtid( gtid );
07653     kmp_info_t  *this_thr = __kmp_threads[ gtid ];
07654     kmp_team_t  *team     = this_thr -> th.th_team;
07655 
07656      
07657 #if OMPT_SUPPORT
07658     void **exit_runtime_p = 
07659       &(team->t.t_implicit_task_taskdata[tid].
07660     ompt_task_info.frame.exit_runtime_frame);
07661 #else
07662     void *dummy;
07663     void **exit_runtime_p = &dummy;
07664 #endif
07665 
07666     __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
07667     rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
07668                  gtid, tid, (int) team->t.t_argc, 
07669                  (void **) team->t.t_argv, exit_runtime_p );
07670 
07671 #if OMPT_SUPPORT
07672     team->t.t_implicit_task_taskdata[tid].
07673       ompt_task_info.frame.exit_runtime_frame = 0;
07674 #endif
07675 
07676     __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
07677 
07678     return rc;
07679 }
07680 
07681 /* this sets the requested number of threads for the next parallel region
07682  * encountered by this team */
07683 /* since this should be enclosed in the forkjoin critical section it
07684  * should avoid race conditions with assymmetrical nested parallelism */
07685 
07686 void
07687 __kmp_push_num_threads( ident_t *id, int gtid, int num_threads )
07688 {
07689     kmp_info_t *thr = __kmp_threads[gtid];
07690 
07691     if( num_threads > 0 )
07692         thr -> th.th_set_nproc = num_threads;
07693 }
07694 
07695 #if OMP_40_ENABLED
07696 
07697 //
07698 // Set the proc_bind var to use in the following parallel region.
07699 //
07700 void
07701 __kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind )
07702 {
07703     kmp_info_t *thr = __kmp_threads[gtid];
07704     thr -> th.th_set_proc_bind = proc_bind;
07705 }
07706 
07707 #endif /* OMP_40_ENABLED */
07708 
07709 /* Launch the worker threads into the microtask. */
07710 
07711 void
07712 __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team )
07713 {
07714     kmp_info_t *this_thr = __kmp_threads[gtid];
07715 
07716 #ifdef KMP_DEBUG
07717     int f;
07718 #endif /* KMP_DEBUG */
07719 
07720     KMP_DEBUG_ASSERT( team );
07721     KMP_DEBUG_ASSERT( this_thr -> th.th_team  ==  team );
07722     KMP_ASSERT(       KMP_MASTER_GTID(gtid) );
07723     KMP_MB();       /* Flush all pending memory write invalidates.  */
07724 
07725     team -> t.t_construct = 0;          /* no single directives seen yet */
07726     team -> t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */
07727 
07728     /* Reset the identifiers on the dispatch buffer */
07729     KMP_DEBUG_ASSERT( team -> t.t_disp_buffer );
07730     if ( team->t.t_max_nproc > 1 ) {
07731         int i;
07732         for (i = 0; i <  KMP_MAX_DISP_BUF; ++i)
07733             team -> t.t_disp_buffer[ i ].buffer_index = i;
07734     } else {
07735         team -> t.t_disp_buffer[ 0 ].buffer_index = 0;
07736     }
07737 
07738     KMP_MB();       /* Flush all pending memory write invalidates.  */
07739     KMP_ASSERT( this_thr -> th.th_team  ==  team );
07740 
07741 #ifdef KMP_DEBUG
07742     for( f=0 ; f<team->t.t_nproc ; f++ ) {
07743         KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
07744                           team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
07745     }
07746 #endif /* KMP_DEBUG */
07747 
07748     /* release the worker threads so they may begin working */
07749     __kmp_fork_barrier( gtid, 0 );
07750 }
07751 
07752 
07753 void
07754 __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team )
07755 {
07756     kmp_info_t *this_thr = __kmp_threads[gtid];
07757 
07758     KMP_DEBUG_ASSERT( team );
07759     KMP_DEBUG_ASSERT( this_thr -> th.th_team  ==  team );
07760     KMP_ASSERT(       KMP_MASTER_GTID(gtid) );
07761     KMP_MB();       /* Flush all pending memory write invalidates.  */
07762 
07763     /* Join barrier after fork */
07764 
07765 #ifdef KMP_DEBUG
07766     if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
07767         __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
07768         __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
07769                      gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
07770         __kmp_print_structure();
07771     }
07772     KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
07773                      __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
07774 #endif /* KMP_DEBUG */
07775 
07776     __kmp_join_barrier( gtid );  /* wait for everyone */
07777 
07778     KMP_MB();       /* Flush all pending memory write invalidates.  */
07779     KMP_ASSERT( this_thr -> th.th_team  ==  team );
07780 }
07781 
07782 
07783 /* ------------------------------------------------------------------------ */
07784 /* ------------------------------------------------------------------------ */
07785 
07786 #ifdef USE_LOAD_BALANCE
07787 
07788 //
07789 // Return the worker threads actively spinning in the hot team, if we
07790 // are at the outermost level of parallelism.  Otherwise, return 0.
07791 //
07792 static int
07793 __kmp_active_hot_team_nproc( kmp_root_t *root )
07794 {
07795     int i;
07796     int retval;
07797     kmp_team_t *hot_team;
07798 
07799     if ( root->r.r_active ) {
07800         return 0;
07801     }
07802     hot_team = root->r.r_hot_team;
07803     if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
07804         return hot_team->t.t_nproc - 1;  // Don't count master thread
07805     }
07806 
07807     //
07808     // Skip the master thread - it is accounted for elsewhere.
07809     //
07810     retval = 0;
07811     for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
07812         if ( hot_team->t.t_threads[i]->th.th_active ) {
07813             retval++;
07814         }
07815     }
07816     return retval;
07817 }
07818 
07819 //
07820 // Perform an automatic adjustment to the number of
07821 // threads used by the next parallel region.
07822 //
07823 static int
07824 __kmp_load_balance_nproc( kmp_root_t *root, int set_nproc )
07825 {
07826     int retval;
07827     int pool_active;
07828     int hot_team_active;
07829     int team_curr_active;
07830     int system_active;
07831 
07832     KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
07833                 root, set_nproc ) );
07834     KMP_DEBUG_ASSERT( root );
07835     #if OMP_30_ENABLED
07836     KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
07837     #else
07838     KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_set_dynamic[0] == TRUE );
07839     #endif
07840     KMP_DEBUG_ASSERT( set_nproc > 1 );
07841 
07842     if ( set_nproc == 1) {
07843         KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) );
07844         return 1;
07845     }
07846 
07847     //
07848     // Threads that are active in the thread pool, active in the hot team
07849     // for this particular root (if we are at the outer par level), and
07850     // the currently executing thread (to become the master) are available
07851     // to add to the new team, but are currently contributing to the system
07852     // load, and must be accounted for.
07853     //
07854     pool_active = TCR_4(__kmp_thread_pool_active_nth);
07855     hot_team_active = __kmp_active_hot_team_nproc( root );
07856     team_curr_active = pool_active + hot_team_active + 1;
07857 
07858     //
07859     // Check the system load.
07860     //
07861     system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
07862     KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
07863       system_active, pool_active, hot_team_active ) );
07864 
07865     if ( system_active < 0 ) {
07866         //
07867         // There was an error reading the necessary info from /proc,
07868         // so use the thread limit algorithm instead.  Once we set
07869         // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit,
07870         // we shouldn't wind up getting back here.
07871         //
07872         __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
07873         KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" );
07874 
07875         //
07876         // Make this call behave like the thread limit algorithm.
07877         //
07878         retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
07879           : root->r.r_hot_team->t.t_nproc);
07880         if ( retval > set_nproc ) {
07881             retval = set_nproc;
07882         }
07883         if ( retval < KMP_MIN_NTH ) {
07884             retval = KMP_MIN_NTH;
07885         }
07886 
07887         KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
07888         return retval;
07889     }
07890 
07891     //
07892     // There is a slight delay in the load balance algorithm in detecting
07893     // new running procs.  The real system load at this instant should be
07894     // at least as large as the #active omp thread that are available to
07895     // add to the team.
07896     //
07897     if ( system_active < team_curr_active ) {
07898         system_active = team_curr_active;
07899     }
07900     retval = __kmp_avail_proc - system_active + team_curr_active;
07901     if ( retval > set_nproc ) {
07902         retval = set_nproc;
07903     }
07904     if ( retval < KMP_MIN_NTH ) {
07905         retval = KMP_MIN_NTH;
07906     }
07907 
07908     KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
07909     return retval;
07910 } // __kmp_load_balance_nproc()
07911 
07912 #endif /* USE_LOAD_BALANCE */
07913 
07914 
07915 /* ------------------------------------------------------------------------ */
07916 /* ------------------------------------------------------------------------ */
07917 
07918 /* NOTE: this is called with the __kmp_init_lock held */
07919 void
07920 __kmp_cleanup( void )
07921 {
07922     int f;
07923 
07924     KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) );
07925 
07926     if (TCR_4(__kmp_init_parallel)) {
07927 #if KMP_HANDLE_SIGNALS
07928         __kmp_remove_signals();
07929 #endif
07930         TCW_4(__kmp_init_parallel, FALSE);
07931     }
07932 
07933     if (TCR_4(__kmp_init_middle)) {
07934 #if KMP_OS_WINDOWS || KMP_OS_LINUX
07935         __kmp_affinity_uninitialize();
07936 #endif /* KMP_OS_WINDOWS || KMP_OS_LINUX */
07937         TCW_4(__kmp_init_middle, FALSE);
07938     }
07939 
07940     KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) );
07941 
07942     if (__kmp_init_serial) {
07943 
07944         __kmp_runtime_destroy();
07945 
07946         __kmp_init_serial = FALSE;
07947     }
07948 
07949     for ( f = 0; f < __kmp_threads_capacity; f++ ) {
07950         if ( __kmp_root[ f ] != NULL ) {
07951             __kmp_free( __kmp_root[ f ] );
07952             __kmp_root[ f ] = NULL;
07953         }
07954     }
07955     __kmp_free( __kmp_threads );
07956     // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in
07957     // freeing __kmp_root.
07958     __kmp_threads = NULL;
07959     __kmp_root    = NULL;
07960     __kmp_threads_capacity = 0;
07961 
07962     __kmp_cleanup_user_locks();
07963 
07964     #if KMP_OS_LINUX || KMP_OS_WINDOWS
07965         KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file );
07966         __kmp_cpuinfo_file = NULL;
07967     #endif /* KMP_OS_LINUX || KMP_OS_WINDOWS */
07968 
07969     KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
07970     __kmp_nested_nth.nth = NULL;
07971     __kmp_nested_nth.size = 0;
07972     __kmp_nested_nth.used = 0;
07973 
07974     __kmp_i18n_catclose();
07975 
07976     KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
07977 }
07978 
07979 /* ------------------------------------------------------------------------ */
07980 /* ------------------------------------------------------------------------ */
07981 
07982 int
07983 __kmp_ignore_mppbeg( void )
07984 {
07985     char *env;
07986 
07987     if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) {
07988         if (__kmp_str_match_false( env ))
07989             return FALSE;
07990     }
07991     // By default __kmpc_begin() is no-op.
07992     return TRUE;
07993 }
07994 
07995 int
07996 __kmp_ignore_mppend( void )
07997 {
07998     char *env;
07999 
08000     if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) {
08001         if (__kmp_str_match_false( env ))
08002             return FALSE;
08003     }
08004     // By default __kmpc_end() is no-op.
08005     return TRUE;
08006 }
08007 
08008 void
08009 __kmp_internal_begin( void )
08010 {
08011     int gtid;
08012     kmp_root_t *root;
08013 
08014     /* this is a very important step as it will register new sibling threads
08015      * and assign these new uber threads a new gtid */
08016     gtid = __kmp_entry_gtid();
08017     root = __kmp_threads[ gtid ] -> th.th_root;
08018     KMP_ASSERT( KMP_UBER_GTID( gtid ));
08019 
08020     if( root->r.r_begin ) return;
08021     __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
08022     if( root->r.r_begin ) {
08023         __kmp_release_lock( & root->r.r_begin_lock, gtid );
08024         return;
08025     }
08026 
08027     root -> r.r_begin = TRUE;
08028 
08029     __kmp_release_lock( & root->r.r_begin_lock, gtid );
08030 }
08031 
08032 
08033 /* ------------------------------------------------------------------------ */
08034 /* ------------------------------------------------------------------------ */
08035 
08036 void
08037 __kmp_user_set_library (enum library_type arg)
08038 {
08039     int gtid;
08040     kmp_root_t *root;
08041     kmp_info_t *thread;
08042 
08043     /* first, make sure we are initialized so we can get our gtid */
08044 
08045     gtid = __kmp_entry_gtid();
08046     thread = __kmp_threads[ gtid ];
08047 
08048     root = thread -> th.th_root;
08049 
08050     KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
08051     if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */
08052         KMP_WARNING( SetLibraryIncorrectCall );
08053         return;
08054     }
08055 
08056     switch ( arg ) {
08057     case library_serial :
08058         thread -> th.th_set_nproc = 0;
08059         set__nproc_p( thread, 1 );
08060         break;
08061     case library_turnaround :
08062         thread -> th.th_set_nproc = 0;
08063         set__nproc_p( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
08064         break;
08065     case library_throughput :
08066         thread -> th.th_set_nproc = 0;
08067         set__nproc_p( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
08068         break;
08069     default:
08070         KMP_FATAL( UnknownLibraryType, arg );
08071     }
08072 
08073     __kmp_aux_set_library ( arg );
08074 }
08075 
08076 void
08077 __kmp_aux_set_stacksize( size_t arg )
08078 {
08079     if (! __kmp_init_serial)
08080         __kmp_serial_initialize();
08081 
08082 #if KMP_OS_DARWIN
08083     if (arg & (0x1000 - 1)) {
08084         arg &= ~(0x1000 - 1);
08085         if(arg + 0x1000) /* check for overflow if we round up */
08086             arg += 0x1000;
08087     }
08088 #endif
08089     __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
08090 
08091     /* only change the default stacksize before the first parallel region */
08092     if (! TCR_4(__kmp_init_parallel)) {
08093         size_t value = arg;       /* argument is in bytes */
08094 
08095         if (value < __kmp_sys_min_stksize )
08096             value = __kmp_sys_min_stksize ;
08097         else if (value > KMP_MAX_STKSIZE)
08098             value = KMP_MAX_STKSIZE;
08099 
08100         __kmp_stksize = value;
08101 
08102         __kmp_env_stksize = TRUE;    /* was KMP_STACKSIZE specified? */
08103     }
08104 
08105     __kmp_release_bootstrap_lock( &__kmp_initz_lock );
08106 }
08107 
08108 /* set the behaviour of the runtime library */
08109 /* TODO this can cause some odd behaviour with sibling parallelism... */
08110 void
08111 __kmp_aux_set_library (enum library_type arg)
08112 {
08113     __kmp_library = arg;
08114 
08115     switch ( __kmp_library ) {
08116     case library_serial :
08117         {
08118             KMP_INFORM( LibraryIsSerial );
08119             (void) __kmp_change_library( TRUE );
08120         }
08121         break;
08122     case library_turnaround :
08123         (void) __kmp_change_library( TRUE );
08124         break;
08125     case library_throughput :
08126         (void) __kmp_change_library( FALSE );
08127         break;
08128     default:
08129         KMP_FATAL( UnknownLibraryType, arg );
08130     }
08131 }
08132 
08133 /* ------------------------------------------------------------------------ */
08134 /* ------------------------------------------------------------------------ */
08135 
08136 void
08137 __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid)
08138 {
08139     int blocktime = arg;        /* argument is in milliseconds */
08140     int bt_intervals;
08141     int bt_set;
08142 
08143     __kmp_save_internal_controls( thread );
08144 
08145     /* Normalize and set blocktime for the teams */
08146     if (blocktime < KMP_MIN_BLOCKTIME)
08147         blocktime = KMP_MIN_BLOCKTIME;
08148     else if (blocktime > KMP_MAX_BLOCKTIME)
08149         blocktime = KMP_MAX_BLOCKTIME;
08150 
08151     set__blocktime_team( thread -> th.th_team, tid, blocktime );
08152     set__blocktime_team( thread -> th.th_serial_team, 0, blocktime );
08153 
08154     /* Calculate and set blocktime intervals for the teams */
08155     bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
08156 
08157     set__bt_intervals_team( thread -> th.th_team, tid, bt_intervals );
08158     set__bt_intervals_team( thread -> th.th_serial_team, 0, bt_intervals );
08159 
08160     /* Set whether blocktime has been set to "TRUE" */
08161     bt_set = TRUE;
08162 
08163     set__bt_set_team( thread -> th.th_team, tid, bt_set );
08164     set__bt_set_team( thread -> th.th_serial_team, 0, bt_set );
08165     KF_TRACE(10, ( "kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
08166                   __kmp_gtid_from_tid(tid, thread->th.th_team),
08167                   thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
08168 }
08169 
08170 void
08171 __kmp_aux_set_defaults(
08172     char const * str,
08173     int          len
08174 ) {
08175     if ( ! __kmp_init_serial ) {
08176         __kmp_serial_initialize();
08177     };
08178     __kmp_env_initialize( str );
08179 
08180     if (__kmp_settings) {
08181         __kmp_env_print();
08182     }
08183 } // __kmp_aux_set_defaults
08184 
08185 /* ------------------------------------------------------------------------ */
08186 
08187 /*
08188  * internal fast reduction routines
08189  */
08190 
08191 // implementation rev. 0.4
08192 // AT: determine CPU, and always use 'critical method' if non-Intel
08193 // AT: test loc != NULL
08194 // AT: what to return if lck == NULL
08195 // AT: tune the cut-off point for atomic reduce method
08196 // AT: tune what to return depending on the CPU and platform configuration
08197 // AT: tune what to return depending on team size
08198 // AT: move this function out to kmp_csupport.c
08199 PACKED_REDUCTION_METHOD_T
08200 __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
08201         kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
08202         kmp_critical_name *lck )
08203 {
08204 
08205     // Default reduction method: critical construct ( lck != NULL, like in current PAROPT )
08206     // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL
08207     // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL
08208     // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT.
08209 
08210     PACKED_REDUCTION_METHOD_T retval;
08211 
08212     int team_size;
08213 
08214     KMP_DEBUG_ASSERT( loc );    // it would be nice to test ( loc != 0 )
08215     KMP_DEBUG_ASSERT( lck );    // it would be nice to test ( lck != 0 )
08216 
08217     #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
08218     #define FAST_REDUCTION_TREE_METHOD_GENERATED   ( ( reduce_data ) && ( reduce_func ) )
08219 
08220     retval = critical_reduce_block;
08221 
08222     team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower
08223 
08224     if( team_size == 1 ) {
08225 
08226         retval = empty_reduce_block;
08227 
08228     } else {
08229 
08230         int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
08231         int tree_available   = FAST_REDUCTION_TREE_METHOD_GENERATED;
08232 
08233         #if KMP_ARCH_X86_64
08234 
08235             #if KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN
08236                 #if KMP_MIC
08237                     #define REDUCTION_TEAMSIZE_CUTOFF 8
08238                 #else // KMP_MIC
08239                     #define REDUCTION_TEAMSIZE_CUTOFF 4
08240                 #endif // KMP_MIC
08241                 if( tree_available ) {
08242                     if( team_size <= REDUCTION_TEAMSIZE_CUTOFF ) {
08243                         if ( atomic_available ) {
08244                             retval = atomic_reduce_block;
08245                         }
08246                     } else {
08247                         retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
08248                     }
08249                 } else if ( atomic_available ) {
08250                     retval = atomic_reduce_block;
08251                 }
08252             #else
08253                 #error "Unknown or unsupported OS"
08254             #endif // KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN
08255 
08256         #elif KMP_ARCH_X86
08257 
08258             #if KMP_OS_LINUX || KMP_OS_WINDOWS
08259 
08260                 // similar to win_32
08261                 // 4x1x2 fxqlin04, the 'linear,linear' barrier
08262 
08263                 // similar to lin_32
08264                 // 4x1x2 fxqwin04, the 'linear,linear' barrier
08265 
08266                 // actual measurement shows that the critical section method is better if team_size <= 8;
08267                 // what happenes when team_size > 8 ? ( no machine to test )
08268 
08269                 // TO DO: need to run a 32-bit code on Intel(R) 64
08270                 // TO DO: test the 'hyper,hyper,1,1' barrier
08271 
08272                 // basic tuning
08273 
08274                 if( atomic_available ) {
08275                     if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ???
08276                         retval = atomic_reduce_block;
08277                     }
08278                 } // otherwise: use critical section
08279 
08280             #elif KMP_OS_DARWIN
08281 
08282 
08283                 if( atomic_available && ( num_vars <= 3 ) ) {
08284                         retval = atomic_reduce_block;
08285                 } else if( tree_available ) {
08286                     if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) {
08287                         retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
08288                     }
08289                 } // otherwise: use critical section
08290 
08291             #else
08292                 #error "Unknown or unsupported OS"
08293             #endif
08294 
08295         #else
08296             #error "Unknown or unsupported architecture"
08297         #endif
08298 
08299     }
08300 
08301     //AT: TO DO: critical block method not implemented by PAROPT
08302     //if( retval == __kmp_critical_reduce_block ) {
08303     //  if( lck == NULL ) { // critical block method not implemented by PAROPT
08304     //  }
08305     //}
08306 
08307     // tune what to return depending on the CPU and platform configuration
08308     //           (sometimes tree method is slower than critical)
08309 
08310     // probably tune what to return depending on team size
08311 
08312 
08313     // KMP_FORCE_REDUCTION
08314 
08315     if( __kmp_force_reduction_method != reduction_method_not_defined ) {
08316 
08317         PACKED_REDUCTION_METHOD_T forced_retval;
08318 
08319         int atomic_available, tree_available;
08320 
08321         switch( ( forced_retval = __kmp_force_reduction_method ) )
08322         {
08323             case critical_reduce_block:
08324                 KMP_ASSERT( lck );              // lck should be != 0
08325                 if( team_size <= 1 ) {
08326                     forced_retval = empty_reduce_block;
08327                 }
08328                 break;
08329 
08330             case atomic_reduce_block:
08331                 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
08332                 KMP_ASSERT( atomic_available ); // atomic_available should be != 0
08333                 break;
08334 
08335             case tree_reduce_block:
08336                 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
08337                 KMP_ASSERT( tree_available );   // tree_available should be != 0
08338                 #if KMP_FAST_REDUCTION_BARRIER
08339                 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
08340                 #endif
08341                 break;
08342 
08343             default:
08344                 KMP_ASSERT( 0 ); // "unsupported method specified"
08345         }
08346 
08347         retval = forced_retval;
08348     }
08349 
08350     KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) );
08351 
08352     #undef FAST_REDUCTION_TREE_METHOD_GENERATED
08353     #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
08354 
08355     return ( retval );
08356 }
08357 
08358 // this function is for testing set/get/determine reduce method
08359 kmp_int32
08360 __kmp_get_reduce_method( void ) {
08361     return ( ( __kmp_entry_thread() -> th.th_local.packed_reduction_method ) >> 8 );
08362 }
08363 
08364 /* ------------------------------------------------------------------------ */

Generated on 25 Aug 2013 for libomp_oss by  doxygen 1.6.1