kmp_csupport.c

Go to the documentation of this file.
00001 /*
00002  * kmp_csupport.c -- kfront linkage support for OpenMP.
00003  * $Revision: 42263 $
00004  * $Date: 2013-04-04 11:03:19 -0500 (Thu, 04 Apr 2013) $
00005  */
00006 
00007 /* <copyright>
00008     Copyright (c) 1997-2013 Intel Corporation.  All Rights Reserved.
00009 
00010     Redistribution and use in source and binary forms, with or without
00011     modification, are permitted provided that the following conditions
00012     are met:
00013 
00014       * Redistributions of source code must retain the above copyright
00015         notice, this list of conditions and the following disclaimer.
00016       * Redistributions in binary form must reproduce the above copyright
00017         notice, this list of conditions and the following disclaimer in the
00018         documentation and/or other materials provided with the distribution.
00019       * Neither the name of Intel Corporation nor the names of its
00020         contributors may be used to endorse or promote products derived
00021         from this software without specific prior written permission.
00022 
00023     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00024     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00025     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00026     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00027     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00028     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00029     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00030     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00031     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00032     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00033     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00034 
00035 
00036 ------------------------------------------------------------------------
00037 
00038     Portions of this software are protected under the following patents:
00039         U.S. Patent 5,812,852
00040         U.S. Patent 6,792,599
00041         U.S. Patent 7,069,556
00042         U.S. Patent 7,328,433
00043         U.S. Patent 7,500,242
00044 
00045 </copyright> */
00046 
00047 #include "omp.h"        /* extern "C" declarations of user-visible routines */
00048 #include "kmp.h"
00049 #include "kmp_i18n.h"
00050 #include "kmp_error.h"
00051 
00052 #include "ompt-internal.h"
00053 #include "ompt-specific.h"
00054 
00055 #define MAX_MESSAGE 512
00056 
00057 /* ------------------------------------------------------------------------ */
00058 /* ------------------------------------------------------------------------ */
00059 
00060 /*  flags will be used in future, e.g., to implement */
00061 /*  openmp_strict library restrictions               */
00062 
00072 void
00073 __kmpc_begin(ident_t *loc, kmp_int32 flags)
00074 {
00075     // By default __kmp_ignore_mppbeg() returns TRUE.
00076     if (__kmp_ignore_mppbeg() == FALSE) {
00077         __kmp_internal_begin();
00078 
00079         KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
00080     }
00081 #if OMPT_SUPPORT
00082     ompt_init();
00083 #endif
00084 }
00085 
00093 void
00094 __kmpc_end(ident_t *loc)
00095 {
00096     // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
00097     // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
00098     // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
00099     // will unregister this root (it can cause library shut down).
00100     if (__kmp_ignore_mppend() == FALSE) {
00101         KC_TRACE( 10, ("__kmpc_end: called\n" ) );
00102         KA_TRACE( 30, ("__kmpc_end\n" ));
00103 
00104         __kmp_internal_end_thread( -1 );
00105     }
00106 }
00107 
00127 kmp_int32
00128 __kmpc_global_thread_num(ident_t *loc)
00129 {
00130     kmp_int32 gtid = __kmp_entry_gtid();
00131 
00132     KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
00133 
00134     return gtid;
00135 }
00136 
00150 kmp_int32
00151 __kmpc_global_num_threads(ident_t *loc)
00152 {
00153     KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
00154 
00155     return TCR_4(__kmp_nth);
00156 }
00157 
00164 kmp_int32
00165 __kmpc_bound_thread_num(ident_t *loc)
00166 {
00167     KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
00168     return __kmp_tid_from_gtid( __kmp_entry_gtid() );
00169 }
00170 
00176 kmp_int32
00177 __kmpc_bound_num_threads(ident_t *loc)
00178 {
00179     KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
00180 
00181     return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
00182 }
00183 
00190 kmp_int32
00191 __kmpc_ok_to_fork(ident_t *loc)
00192 {
00193 #ifndef KMP_DEBUG
00194 
00195     return TRUE;
00196 
00197 #else
00198 
00199     const char *semi2;
00200     const char *semi3;
00201     int line_no;
00202 
00203     if (__kmp_par_range == 0) {
00204         return TRUE;
00205     }
00206     semi2 = loc->psource;
00207     if (semi2 == NULL) {
00208         return TRUE;
00209     }
00210     semi2 = strchr(semi2, ';');
00211     if (semi2 == NULL) {
00212         return TRUE;
00213     }
00214     semi2 = strchr(semi2 + 1, ';');
00215     if (semi2 == NULL) {
00216         return TRUE;
00217     }
00218     if (__kmp_par_range_filename[0]) {
00219         const char *name = semi2 - 1;
00220         while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
00221             name--;
00222         }
00223         if ((*name == '/') || (*name == ';')) {
00224             name++;
00225         }
00226         if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
00227             return __kmp_par_range < 0;
00228         }
00229     }
00230     semi3 = strchr(semi2 + 1, ';');
00231     if (__kmp_par_range_routine[0]) {
00232         if ((semi3 != NULL) && (semi3 > semi2)
00233           && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
00234             return __kmp_par_range < 0;
00235         }
00236     }
00237     if (sscanf(semi3 + 1, "%d", &line_no) == 1) {
00238         if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
00239             return __kmp_par_range > 0;
00240         }
00241         return __kmp_par_range < 0;
00242     }
00243     return TRUE;
00244 
00245 #endif /* KMP_DEBUG */
00246 
00247 }
00248 
00254 kmp_int32
00255 __kmpc_in_parallel( ident_t *loc )
00256 {
00257     return __kmp_entry_thread() -> th.th_root -> r.r_active;
00258 }
00259 
00269 void
00270 __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
00271 {
00272     KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
00273       global_tid, num_threads ) );
00274 
00275     __kmp_push_num_threads( loc, global_tid, num_threads );
00276 }
00277 
00278 void
00279 __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
00280 {
00281     KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
00282 
00283     /* the num_threads are automatically popped */
00284 }
00285 
00286 
00287 #if OMP_40_ENABLED
00288 
00289 void
00290 __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
00291 {
00292     KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
00293       global_tid, proc_bind ) );
00294 
00295     __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
00296 }
00297 
00298 #endif /* OMP_40_ENABLED */
00299 
00300 
00310 void
00311 __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
00312 {
00313   int         gtid = __kmp_entry_gtid();
00314   // maybe to save thr_state is enough here
00315   {
00316     va_list     ap;
00317     va_start(   ap, microtask );
00318 
00319 #if OMPT_SUPPORT
00320     kmp_info_t *master_th = __kmp_threads[ gtid ];
00321     kmp_team_t *parent_team = master_th->th.th_team;
00322     int tid = __kmp_tid_from_gtid( gtid );
00323     parent_team->t.t_implicit_task_taskdata[tid].
00324       ompt_task_info.frame.reenter_runtime_frame = 
00325       __builtin_frame_address(0); 
00326 #endif
00327 
00328     __kmp_fork_call( loc, gtid, TRUE,
00329             argc,
00330             VOLATILE_CAST(microtask_t) microtask,
00331             VOLATILE_CAST(launch_t)    __kmp_invoke_task_func,
00332 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
00333 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
00334             &ap
00335 #else
00336             ap
00337 #endif
00338             );
00339 
00340     __kmp_join_call( loc, gtid );
00341 
00342     va_end( ap );
00343 
00344 #ifdef OMPT_SUPPORT
00345     parent_team->t.t_implicit_task_taskdata[tid].
00346       ompt_task_info.frame.reenter_runtime_frame = 0;
00347 #endif
00348   }
00349 }
00350 
00351 
00352 //
00353 // I don't think this function should ever have been exported.
00354 // The __kmpc_ prefix was misapplied.  I'm fairly certain that no generated
00355 // openmp code ever called it, but it's been exported from the RTL for so
00356 // long that I'm afraid to remove the definition.
00357 //
00358 int
00359 __kmpc_invoke_task_func( int gtid )
00360 {
00361     return __kmp_invoke_task_func( gtid );
00362 }
00363 
00371 void
00372 __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
00373 {
00374     kmp_info_t *this_thr;
00375     kmp_team_t *serial_team;
00376 
00377     KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
00378 
00379     /* skip all this code for autopar serialized loops since it results in
00380        unacceptable overhead */
00381     if( loc == NULL || !(loc->flags & KMP_IDENT_AUTOPAR ) )
00382     {
00383 
00384         if( ! TCR_4( __kmp_init_parallel ) )
00385             __kmp_parallel_initialize();
00386 
00387         this_thr     = __kmp_threads[ global_tid ];
00388         serial_team  = this_thr -> th.th_serial_team;
00389 
00390         /* utilize the serialized team held by this thread */
00391         KMP_DEBUG_ASSERT( serial_team );
00392         KMP_MB();
00393 
00394 #if OMP_30_ENABLED
00395         if ( __kmp_tasking_mode != tskm_immediate_exec ) {
00396             KMP_DEBUG_ASSERT( this_thr -> th.th_task_team == this_thr -> th.th_team -> t.t_task_team );
00397             KMP_DEBUG_ASSERT( serial_team -> t.t_task_team == NULL );
00398             KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
00399                 global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
00400             this_thr -> th.th_task_team = NULL;
00401         }
00402 #endif // OMP_30_ENABLED
00403 
00404 #if OMP_40_ENABLED
00405         kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
00406         if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
00407              proc_bind = proc_bind_false;
00408         }
00409         else if ( proc_bind == proc_bind_default ) {
00410             //
00411             // No proc_bind clause was specified, so use the current value
00412             // of proc-bind-var for this parallel region.
00413             //
00414             proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
00415         }
00416         //
00417         // Reset for next parallel region
00418         //
00419         this_thr->th.th_set_proc_bind = proc_bind_default;
00420 #endif /* OMP_3_ENABLED */
00421 
00422         if( this_thr -> th.th_team != serial_team ) {
00423 #if OMP_30_ENABLED
00424             // Nested level will be an index in the nested nthreads array
00425             int level = this_thr->th.th_team->t.t_level;
00426 #endif
00427             if( serial_team -> t.t_serialized ) {
00428                 /* this serial team was already used
00429                  * TODO increase performance by making this locks more specific */
00430                 kmp_team_t *new_team;
00431                 int tid = this_thr->th.th_info.ds.ds_tid;
00432 
00433                 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
00434 
00435 
00436 
00437         ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(this_thr, global_tid);
00438                 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
00439 #if OMPT_SUPPORT
00440           ompt_parallel_id,
00441 #endif
00442 #if OMP_40_ENABLED
00443                                                proc_bind,
00444 #endif
00445 #if OMP_30_ENABLED
00446                                                & this_thr->th.th_current_task->td_icvs,
00447 #else
00448                                                this_thr->th.th_team->t.t_set_nproc[tid],
00449                                                this_thr->th.th_team->t.t_set_dynamic[tid],
00450                                                this_thr->th.th_team->t.t_set_nested[tid],
00451                                                this_thr->th.th_team->t.t_set_blocktime[tid],
00452                                                this_thr->th.th_team->t.t_set_bt_intervals[tid],
00453                                                this_thr->th.th_team->t.t_set_bt_set[tid],
00454 #endif // OMP_30_ENABLED
00455                                                0);
00456                 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
00457                 KMP_ASSERT( new_team );
00458 
00459                 /* setup new serialized team and install it */
00460                 new_team -> t.t_threads[0] = this_thr;
00461                 new_team -> t.t_parent = this_thr -> th.th_team;
00462                 serial_team = new_team;
00463                 this_thr -> th.th_serial_team = serial_team;
00464 
00465                 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
00466                     global_tid, serial_team ) );
00467 
00468 
00469                 /* TODO the above breaks the requirement that if we run out of
00470                  * resources, then we can still guarantee that serialized teams
00471                  * are ok, since we may need to allocate a new one */
00472             } else {
00473                 KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
00474                     global_tid, serial_team ) );
00475             }
00476 
00477             /* we have to initialize this serial team */
00478             KMP_DEBUG_ASSERT( serial_team->t.t_threads );
00479             KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
00480             KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
00481             serial_team -> t.t_ident         = loc;
00482             serial_team -> t.t_serialized    = 1;
00483             serial_team -> t.t_nproc         = 1;
00484             serial_team -> t.t_parent        = this_thr->th.th_team;
00485 #if OMP_30_ENABLED
00486             serial_team -> t.t_sched         = this_thr->th.th_team->t.t_sched;
00487 #endif // OMP_30_ENABLED
00488             this_thr -> th.th_team           = serial_team;
00489             serial_team -> t.t_master_tid    = this_thr->th.th_info.ds.ds_tid;
00490 
00491 #if OMP_30_ENABLED
00492             KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
00493                 global_tid, this_thr->th.th_current_task ) );
00494             KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
00495             this_thr->th.th_current_task->td_flags.executing = 0;
00496 
00497             __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
00498 
00499             /* TODO: GEH: do the ICVs work for nested serialized teams?  Don't we need an implicit task for
00500                           each serialized task represented by team->t.t_serialized? */
00501             copy_icvs(
00502                 & this_thr->th.th_current_task->td_icvs,
00503                 & this_thr->th.th_current_task->td_parent->td_icvs );
00504 
00505             // Thread value exists in the nested nthreads array for the next nested level
00506             if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
00507                 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
00508             }
00509 
00510 #if OMP_40_ENABLED
00511             if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
00512                 this_thr->th.th_current_task->td_icvs.proc_bind
00513                   = __kmp_nested_proc_bind.bind_types[ level + 1 ];
00514             }
00515 #endif /* OMP_40_ENABLED */
00516 
00517 #else /* pre-3.0 icv's */
00518             serial_team -> t.t_set_nproc[0]  = serial_team->t.t_parent->
00519                                                t.t_set_nproc[serial_team->
00520                                                t.t_master_tid];
00521             serial_team -> t.t_set_dynamic[0] = serial_team->t.t_parent->
00522                                                t.t_set_dynamic[serial_team->
00523                                                t.t_master_tid];
00524             serial_team -> t.t_set_nested[0] = serial_team->t.t_parent->
00525                                                t.t_set_nested[serial_team->
00526                                                t.t_master_tid];
00527             serial_team -> t.t_set_blocktime[0]  = serial_team->t.t_parent->
00528                                                t.t_set_blocktime[serial_team->
00529                                                t.t_master_tid];
00530             serial_team -> t.t_set_bt_intervals[0] = serial_team->t.t_parent->
00531                                                t.t_set_bt_intervals[serial_team->
00532                                                t.t_master_tid];
00533             serial_team -> t.t_set_bt_set[0] = serial_team->t.t_parent->
00534                                                t.t_set_bt_set[serial_team->
00535                                                t.t_master_tid];
00536 #endif // OMP_30_ENABLED
00537             this_thr -> th.th_info.ds.ds_tid = 0;
00538 
00539             /* set thread cache values */
00540             this_thr -> th.th_team_nproc     = 1;
00541             this_thr -> th.th_team_master    = this_thr;
00542             this_thr -> th.th_team_serialized = 1;
00543 
00544 #if OMP_30_ENABLED
00545             serial_team -> t.t_level        = serial_team -> t.t_parent -> t.t_level + 1;
00546             serial_team -> t.t_active_level = serial_team -> t.t_parent -> t.t_active_level;
00547 #endif // OMP_30_ENABLED
00548 
00549 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
00550             if ( __kmp_inherit_fp_control ) {
00551                 __kmp_store_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
00552                 __kmp_store_mxcsr( &serial_team->t.t_mxcsr );
00553                 serial_team->t.t_mxcsr &= KMP_X86_MXCSR_MASK;
00554                 serial_team->t.t_fp_control_saved = TRUE;
00555             } else {
00556                 serial_team->t.t_fp_control_saved = FALSE;
00557             }
00558 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
00559             /* check if we need to allocate dispatch buffers stack */
00560             KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
00561             if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
00562                 serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
00563                         __kmp_allocate( sizeof( dispatch_private_info_t ) );
00564             }
00565             this_thr -> th.th_dispatch = serial_team->t.t_dispatch;
00566 
00567             KMP_MB();
00568 
00569         } else {
00570             /* this serialized team is already being used,
00571              * that's fine, just add another nested level */
00572             KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
00573             KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
00574             KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
00575             ++ serial_team -> t.t_serialized;
00576             this_thr -> th.th_team_serialized = serial_team -> t.t_serialized;
00577 
00578 #if OMP_30_ENABLED
00579             // Nested level will be an index in the nested nthreads array
00580             int level = this_thr->th.th_team->t.t_level;
00581             // Thread value exists in the nested nthreads array for the next nested level
00582             if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
00583                 this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
00584             }
00585             serial_team -> t.t_level++;
00586             KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
00587                  global_tid, serial_team, serial_team -> t.t_level ) );
00588 #else
00589             KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing team %p for nested serialized parallel region\n",
00590                 global_tid, serial_team ) );
00591 #endif // OMP_30_ENABLED
00592 
00593             /* allocate/push dispatch buffers stack */
00594             KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
00595             {
00596                 dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
00597                         __kmp_allocate( sizeof( dispatch_private_info_t ) );
00598                 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
00599                 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
00600             }
00601             this_thr -> th.th_dispatch = serial_team->t.t_dispatch;
00602 
00603             KMP_MB();
00604         }
00605 
00606         if ( __kmp_env_consistency_check )
00607             __kmp_push_parallel( global_tid, NULL );
00608     }
00609 }
00610 
00618 void
00619 __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
00620 {
00621     kmp_internal_control_t *top;
00622     kmp_info_t *this_thr;
00623     kmp_team_t *serial_team;
00624 
00625     KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
00626 
00627     /* skip all this code for autopar serialized loops since it results in
00628        unacceptable overhead */
00629     if( loc == NULL || !(loc->flags & KMP_IDENT_AUTOPAR ) )
00630     {
00631 
00632         if( ! TCR_4( __kmp_init_parallel ) )
00633             __kmp_parallel_initialize();
00634 
00635         this_thr    = __kmp_threads[ global_tid ];
00636         serial_team = this_thr->th.th_serial_team;
00637 
00638         KMP_MB();
00639         KMP_DEBUG_ASSERT( serial_team );
00640         KMP_ASSERT(       serial_team -> t.t_serialized );
00641         KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
00642         KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
00643         KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
00644         KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
00645 
00646         /* If necessary, pop the internal control stack values and replace the team values */
00647         top = serial_team -> t.t_control_stack_top;
00648         if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
00649 #if OMP_30_ENABLED
00650                 copy_icvs(
00651                     &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs,
00652                     top );
00653 #else
00654                 serial_team -> t.t_set_nproc[0]   = top -> nproc;
00655                 serial_team -> t.t_set_dynamic[0] = top -> dynamic;
00656                 serial_team -> t.t_set_nested[0]  = top -> nested;
00657                 serial_team -> t.t_set_blocktime[0]   = top -> blocktime;
00658                 serial_team -> t.t_set_bt_intervals[0] = top -> bt_intervals;
00659                 serial_team -> t.t_set_bt_set[0]  = top -> bt_set;
00660 #endif // OMP_30_ENABLED
00661             serial_team -> t.t_control_stack_top = top -> next;
00662             __kmp_free(top);
00663         }
00664 
00665 #if OMP_30_ENABLED
00666         //if( serial_team -> t.t_serialized > 1 )
00667         serial_team -> t.t_level--;
00668 #endif // OMP_30_ENABLED
00669 
00670         /* pop dispatch buffers stack */
00671         KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
00672         {
00673             dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
00674             serial_team->t.t_dispatch->th_disp_buffer =
00675                 serial_team->t.t_dispatch->th_disp_buffer->next;
00676             __kmp_free( disp_buffer );
00677         }
00678 
00679         -- serial_team -> t.t_serialized;
00680         if ( serial_team -> t.t_serialized == 0 ) {
00681 
00682             /* return to the parallel section */
00683 
00684 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
00685             if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
00686                 __kmp_clear_x87_fpu_status_word();
00687                 __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
00688                 __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
00689             }
00690 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
00691 
00692             this_thr -> th.th_team           = serial_team -> t.t_parent;
00693             this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
00694 
00695             /* restore values cached in the thread */
00696             this_thr -> th.th_team_nproc     = serial_team -> t.t_parent -> t.t_nproc;          /*  JPH */
00697             this_thr -> th.th_team_master    = serial_team -> t.t_parent -> t.t_threads[0];     /* JPH */
00698             this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
00699 
00700             /* TODO the below shouldn't need to be adjusted for serialized teams */
00701             this_thr -> th.th_dispatch       = & this_thr -> th.th_team ->
00702                          t.t_dispatch[ serial_team -> t.t_master_tid ];
00703 
00704 #if OMP_30_ENABLED
00705             __kmp_pop_current_task_from_thread( this_thr );
00706 
00707             KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
00708             this_thr -> th.th_current_task -> td_flags.executing = 1;
00709 
00710             if ( __kmp_tasking_mode != tskm_immediate_exec ) {
00711                 //
00712                 // Copy the task team from the new child / old parent team
00713                 // to the thread.  If non-NULL, copy the state flag also.
00714                 //
00715                 if ( ( this_thr -> th.th_task_team = this_thr -> th.th_team -> t.t_task_team ) != NULL ) {
00716                     this_thr -> th.th_task_state = this_thr -> th.th_task_team -> tt.tt_state;
00717                 }
00718                 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
00719                   global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
00720             }
00721 #endif // OMP_30_ENABLED
00722 
00723         }
00724         else {
00725 
00726 #if OMP_30_ENABLED
00727             if ( __kmp_tasking_mode != tskm_immediate_exec ) {
00728                 KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
00729                   global_tid, serial_team, serial_team -> t.t_serialized ) );
00730             }
00731 #endif // OMP_30_ENABLED
00732 
00733         }
00734 
00735     if ( __kmp_env_consistency_check )
00736         __kmp_pop_parallel( global_tid, NULL );
00737     }
00738 }
00739 
00752 void
00753 __kmpc_flush(ident_t *loc, ...)
00754 {
00755     KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
00756 
00757     /* need explicit __mf() here since use volatile instead in library */
00758     KMP_MB();       /* Flush all pending memory write invalidates.  */
00759 
00760     // This is not an OMP 3.0 feature.
00761     // This macro is used here just not to let the change go to 10.1.
00762     // This change will go to the mainline first.
00763     #if OMP_30_ENABLED
00764         #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
00765             #if KMP_MIC
00766                 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
00767                 // We shouldn't need it, though, since the ABI rules require that
00768                 // * If the compiler generates NGO stores it also generates the fence
00769                 // * If users hand-code NGO stores they should insert the fence
00770                 // therefore no incomplete unordered stores should be visible.
00771             #else
00772                 // C74404
00773                 // This is to address non-temporal store instructions (sfence needed).
00774                 // The clflush instruction is addressed either (mfence needed).
00775                 // Probably the non-temporal load monvtdqa instruction should also be addressed.
00776                 // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
00777                 if ( ! __kmp_cpuinfo.initialized ) {
00778                     __kmp_query_cpuid( & __kmp_cpuinfo );
00779                 }; // if
00780                 if ( ! __kmp_cpuinfo.sse2 ) {
00781                     // CPU cannot execute SSE2 instructions.
00782                 } else {
00783                     #if defined( __GNUC__ ) && !defined( __INTEL_COMPILER )
00784                     __sync_synchronize();
00785                     #else
00786                     _mm_mfence();
00787                     #endif // __GNUC__
00788                 }; // if
00789             #endif // KMP_MIC
00790         #else
00791             #error Unknown or unsupported architecture
00792         #endif
00793     #endif // OMP_30_ENABLED
00794 
00795 }
00796 
00797 /* -------------------------------------------------------------------------- */
00798 
00799 /* -------------------------------------------------------------------------- */
00800 
00808 void
00809 __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
00810 {
00811     int explicit_barrier_flag;
00812     KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
00813 
00814     if (! TCR_4(__kmp_init_parallel))
00815         __kmp_parallel_initialize();
00816 
00817     if ( __kmp_env_consistency_check ) {
00818         if ( loc == 0 ) {
00819             KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
00820         }; // if
00821 
00822         __kmp_check_barrier( global_tid, ct_barrier, loc );
00823     }
00824 
00825     __kmp_threads[ global_tid ]->th.th_ident = loc;
00826     // TODO: explicit barrier_wait_id:
00827     //   this function is called when 'barrier' directive is present or
00828     //   implicit barrier at the end of a worksharing construct.
00829     // 1) better to add a per-thread barrier counter to a thread data structure
00830     // 2) set to 0 when a new team is created
00831     // 4) no sync is required
00832 
00833     __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
00834 }
00835 
00836 /* The BARRIER for a MASTER section is always explicit   */
00843 kmp_int32
00844 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
00845 {
00846     int status = 0;
00847 
00848     KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
00849 
00850     if( ! TCR_4( __kmp_init_parallel ) )
00851         __kmp_parallel_initialize();
00852 
00853     if( KMP_MASTER_GTID( global_tid ))
00854         status = 1;
00855 
00856 #if OMPT_SUPPORT
00857    if (status) {
00858      kmp_info_t  *this_thr        = __kmp_threads[ global_tid ];
00859      kmp_team_t  *team            = this_thr -> th.th_team;
00860      if ((ompt_status == ompt_status_track_callback)) {
00861        if (ompt_callbacks.ompt_callback(ompt_event_master_begin)) {
00862          int  tid = __kmp_tid_from_gtid( global_tid );
00863          ompt_callbacks.ompt_callback(ompt_event_master_begin)(
00864            team->t.ompt_team_info.parallel_id,
00865            team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
00866        }
00867      }
00868    }
00869 #endif
00870 
00871     if ( __kmp_env_consistency_check ) {
00872         if (status)
00873             __kmp_push_sync( global_tid, ct_master, loc, NULL );
00874         else
00875             __kmp_check_sync( global_tid, ct_master, loc, NULL );
00876     }
00877 
00878     return status;
00879 }
00880 
00889 void
00890 __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
00891 {
00892     KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
00893 
00894     KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
00895 
00896 #if OMPT_SUPPORT
00897     kmp_info_t  *this_thr        = __kmp_threads[ global_tid ];
00898     kmp_team_t  *team            = this_thr -> th.th_team;
00899     if ((ompt_status == ompt_status_track_callback)) {
00900       if (ompt_callbacks.ompt_callback(ompt_event_master_end)) {
00901         int  tid = __kmp_tid_from_gtid( global_tid ); 
00902         ompt_callbacks.ompt_callback(ompt_event_master_end)
00903       (team->t.ompt_team_info.parallel_id,
00904        team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
00905       }
00906     }
00907 #endif
00908 
00909     if ( __kmp_env_consistency_check ) {
00910         if( global_tid < 0 )
00911             KMP_WARNING( ThreadIdentInvalid );
00912 
00913         if( KMP_MASTER_GTID( global_tid ))
00914             __kmp_pop_sync( global_tid, ct_master, loc );
00915     }
00916 }
00917 
00925 void
00926 __kmpc_ordered( ident_t * loc, kmp_int32 gtid )
00927 {
00928     int cid = 0;
00929     kmp_info_t *th;
00930     KMP_DEBUG_ASSERT( __kmp_init_serial );
00931 
00932     KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
00933 
00934     if (! TCR_4(__kmp_init_parallel))
00935         __kmp_parallel_initialize();
00936 
00937 
00938     th = __kmp_threads[ gtid ];
00939 
00940 #if OMPT_SUPPORT
00941     if (ompt_status & ompt_status_track) {
00942 
00943       /* OMPT state update */
00944       th->th.ompt_thread_info.wait_id = (uint64_t) loc;
00945       th->th.ompt_thread_info.state = ompt_state_wait_ordered;
00946 
00947 
00948       /* OMPT event callback */
00949       if (ompt_status & ompt_status_track_callback) {
00950     if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) {
00951       ompt_callbacks.ompt_callback(ompt_event_wait_ordered)
00952         (th->th.ompt_thread_info.wait_id);
00953     }
00954       }
00955 
00956     }
00957 #endif // OMPT_SUPPORT
00958 
00959     if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
00960         (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
00961     else
00962         __kmp_parallel_deo( & gtid, & cid, loc );
00963 
00964 
00965 #if OMPT_SUPPORT
00966     if (ompt_status & ompt_status_track) {
00967 
00968       /* OMPT state update */
00969       th->th.ompt_thread_info.state = ompt_state_work_parallel;
00970       th->th.ompt_thread_info.wait_id = 0;
00971 
00972       /* OMPT event callback */
00973       if ((ompt_status & ompt_status_track_callback) &&
00974       ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) {
00975     ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)
00976       (th->th.ompt_thread_info.wait_id);
00977       }
00978 
00979     }
00980 #endif // OMPT_SUPPORT
00981 }
00982 
00990 void
00991 __kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
00992 {
00993     int cid = 0;
00994     kmp_info_t *th;
00995 
00996     KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
00997 
00998 
00999     th = __kmp_threads[ gtid ];
01000 
01001     if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
01002         (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
01003     else
01004         __kmp_parallel_dxo( & gtid, & cid, loc );
01005 
01006 #if OMPT_SUPPORT
01007     if ((ompt_status == ompt_status_track_callback) &&
01008     ompt_callbacks.ompt_callback(ompt_event_release_ordered)) {
01009       ompt_callbacks.ompt_callback(ompt_event_release_ordered)
01010     (th->th.ompt_thread_info.wait_id);
01011     }
01012 #endif // OMPT_SUPPORT
01013 }
01014 
01015 inline void
01016 __kmp_static_yield( int arg ) { // AC: needed in macro __kmp_acquire_user_lock_with_checks
01017     __kmp_yield( arg );
01018 }
01019 
01020 static kmp_user_lock_p
01021 __kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
01022 {
01023     kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
01024 
01025     //
01026     // Because of the double-check, the following load
01027     // doesn't need to be volatile.
01028     //
01029     kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
01030 
01031     if ( lck == NULL ) {
01032         void * idx;
01033 
01034         // Allocate & initialize the lock.
01035         // Remember allocated locks in table in order to free them in __kmp_cleanup()
01036         lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
01037         __kmp_init_user_lock_with_checks( lck );
01038         __kmp_set_user_lock_location( lck, loc );
01039 
01040         //
01041         // Use a cmpxchg instruction to slam the start of the critical
01042         // section with the lock pointer.  If another thread beat us
01043         // to it, deallocate the lock, and use the lock that the other
01044         // thread allocated.
01045         //
01046         int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
01047 
01048         if ( status == 0 ) {
01049             // Deallocate the lock and reload the value.
01050             __kmp_destroy_user_lock_with_checks( lck );
01051             __kmp_user_lock_free( &idx, gtid, lck );
01052             lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
01053             KMP_DEBUG_ASSERT( lck != NULL );
01054         }
01055     }
01056     return lck;
01057 }
01058 
01069 void
01070 __kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
01071 
01072     kmp_user_lock_p lck;
01073 
01074     KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
01075 
01076     //TODO: add THR_OVHD_STATE
01077 
01078     KMP_CHECK_USER_LOCK_INIT();
01079 
01080     if ( ( __kmp_user_lock_kind == lk_tas )
01081       && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
01082         lck = (kmp_user_lock_p)crit;
01083     }
01084 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01085     else if ( ( __kmp_user_lock_kind == lk_futex )
01086       && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
01087         lck = (kmp_user_lock_p)crit;
01088     }
01089 #endif
01090     else { // ticket, queuing or drdpa
01091         lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
01092     }
01093     
01094     if ( __kmp_env_consistency_check )
01095         __kmp_push_sync( global_tid, ct_critical, loc, lck );
01096 
01097     /* since the critical directive binds to all threads, not just
01098      * the current team we have to check this even if we are in a
01099      * serialized team */
01100     /* also, even if we are the uber thread, we still have to conduct the lock,
01101      * as we have to contend with sibling threads */
01102 
01103     // Value of 'crit' should be good for using as a critical_id of the critical section directive.
01104 
01105     __kmp_acquire_user_lock_with_checks( lck, global_tid );
01106 
01107 
01108     KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
01109 } // __kmpc_critical
01110 
01120 void
01121 __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
01122 {
01123     kmp_user_lock_p lck;
01124 
01125     KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
01126 
01127     if ( ( __kmp_user_lock_kind == lk_tas )
01128       && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
01129         lck = (kmp_user_lock_p)crit;
01130     }
01131 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01132     else if ( ( __kmp_user_lock_kind == lk_futex )
01133       && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
01134         lck = (kmp_user_lock_p)crit;
01135     }
01136 #endif
01137     else { // ticket, queuing or drdpa
01138         lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
01139     }
01140 
01141     KMP_ASSERT(lck != NULL);
01142 
01143     if ( __kmp_env_consistency_check )
01144         __kmp_pop_sync( global_tid, ct_critical, loc );
01145 
01146     // Value of 'crit' should be good for using as a critical_id of the critical section directive.
01147 
01148     __kmp_release_user_lock_with_checks( lck, global_tid );
01149 
01150 #if OMPT_SUPPORT
01151     if ((ompt_status == ompt_status_track_callback) &&
01152     (ompt_callbacks.ompt_callback(ompt_event_release_critical))) {
01153       ompt_callbacks.ompt_callback(ompt_event_release_critical)((uint64_t) lck);
01154     }
01155 #endif
01156 
01157     KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
01158 }
01159 
01168 kmp_int32
01169 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
01170 {
01171     int status;
01172 
01173     KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
01174 
01175     if (! TCR_4(__kmp_init_parallel))
01176         __kmp_parallel_initialize();
01177 
01178     if ( __kmp_env_consistency_check )
01179         __kmp_check_barrier( global_tid, ct_barrier, loc );
01180 
01181     status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
01182 
01183     return (status != 0) ? 0 : 1;
01184 }
01185 
01195 void
01196 __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
01197 {
01198     KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
01199 
01200     __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
01201 }
01202 
01213 kmp_int32
01214 __kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
01215 {
01216     kmp_int32 ret;
01217 
01218     KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
01219 
01220     if (! TCR_4(__kmp_init_parallel))
01221         __kmp_parallel_initialize();
01222 
01223     if ( __kmp_env_consistency_check ) {
01224         if ( loc == 0 ) {
01225             KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
01226         }
01227         __kmp_check_barrier( global_tid, ct_barrier, loc );
01228     }
01229 
01230     __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
01231 
01232     ret = __kmpc_master (loc, global_tid);
01233 
01234     if ( __kmp_env_consistency_check ) {
01235         /*  there's no __kmpc_end_master called; so the (stats) */
01236         /*  actions of __kmpc_end_master are done here          */
01237 
01238         if ( global_tid < 0 ) {
01239             KMP_WARNING( ThreadIdentInvalid );
01240         }
01241         if (ret) {
01242             /* only one thread should do the pop since only */
01243             /* one did the push (see __kmpc_master())       */
01244 
01245             __kmp_pop_sync( global_tid, ct_master, loc );
01246         }
01247     }
01248 
01249     return (ret);
01250 }
01251 
01252 /* The BARRIER for a SINGLE process section is always explicit   */
01264 kmp_int32
01265 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
01266 {
01267     kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
01268 #if OMPT_SUPPORT
01269     kmp_info_t  *this_thr        = __kmp_threads[ global_tid ];
01270     kmp_team_t  *team            = this_thr -> th.th_team;
01271     int  tid = __kmp_tid_from_gtid( global_tid );
01272     if ((ompt_status == ompt_status_track_callback)) {
01273       if (rc) {
01274         if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) {
01275           ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)
01276         (team->t.ompt_team_info.parallel_id,
01277          team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
01278         }
01279       } else {
01280         if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) {
01281           ompt_callbacks.ompt_callback(ompt_event_single_others_begin)
01282         (team->t.ompt_team_info.parallel_id,
01283          team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
01284         }
01285         this_thr->th.ompt_thread_info.state = ompt_state_wait_single;
01286       }
01287     }
01288 #endif
01289 
01290     return rc;
01291 }
01292 
01302 void
01303 __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
01304 {
01305     __kmp_exit_single( global_tid );
01306 #if OMPT_SUPPORT
01307      kmp_info_t  *this_thr        = __kmp_threads[ global_tid ];
01308      kmp_team_t  *team            = this_thr -> th.th_team;
01309      if ((ompt_status == ompt_status_track_callback)) {
01310        if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) {
01311          int  tid = __kmp_tid_from_gtid( global_tid );
01312          ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)
01313        (team->t.ompt_team_info.parallel_id,
01314         team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
01315        }
01316      }
01317 #endif
01318 }
01319 
01327 void
01328 __kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
01329 {
01330     KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
01331 #if OMPT_SUPPORT
01332      kmp_info_t  *this_thr        = __kmp_threads[ global_tid ];
01333      kmp_team_t  *team            = this_thr -> th.th_team;
01334      int  tid = __kmp_tid_from_gtid( global_tid );
01335      if ((ompt_status == ompt_status_track_callback)) {
01336        if (ompt_callbacks.ompt_callback(ompt_event_loop_end)) {
01337          ompt_callbacks.ompt_callback(ompt_event_loop_end)
01338        (team->t.ompt_team_info.parallel_id,
01339         team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id);
01340        }
01341      }
01342 #endif
01343 
01344     if ( __kmp_env_consistency_check )
01345      __kmp_pop_workshare( global_tid, ct_pdo, loc );
01346 }
01347 
01348 /*
01349  * User routines which take C-style arguments (call by value)
01350  * different from the Fortran equivalent routines
01351  */
01352 
01353 void
01354 ompc_set_num_threads( int arg )
01355 {
01356 // !!!!! TODO: check the per-task binding
01357     __kmp_set_num_threads( arg, __kmp_entry_gtid() );
01358 }
01359 
01360 void
01361 ompc_set_dynamic( int flag )
01362 {
01363     kmp_info_t *thread;
01364 
01365     /* For the thread-private implementation of the internal controls */
01366     thread = __kmp_entry_thread();
01367 
01368     __kmp_save_internal_controls( thread );
01369 
01370     set__dynamic( thread, flag ? TRUE : FALSE );
01371 }
01372 
01373 void
01374 ompc_set_nested( int flag )
01375 {
01376     kmp_info_t *thread;
01377 
01378     /* For the thread-private internal controls implementation */
01379     thread = __kmp_entry_thread();
01380 
01381     __kmp_save_internal_controls( thread );
01382 
01383     set__nested( thread, flag ? TRUE : FALSE );
01384 }
01385 
01386 #if OMP_30_ENABLED
01387 
01388 void
01389 ompc_set_max_active_levels( int max_active_levels )
01390 {
01391     /* TO DO */
01392     /* we want per-task implementation of this internal control */
01393 
01394     /* For the per-thread internal controls implementation */
01395     __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
01396 }
01397 
01398 void
01399 ompc_set_schedule( omp_sched_t kind, int modifier )
01400 {
01401 // !!!!! TODO: check the per-task binding
01402     __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
01403 }
01404 
01405 int
01406 ompc_get_ancestor_thread_num( int level )
01407 {
01408     return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
01409 }
01410 
01411 int
01412 ompc_get_team_size( int level )
01413 {
01414     return __kmp_get_team_size( __kmp_entry_gtid(), level );
01415 }
01416 
01417 #endif // OMP_30_ENABLED
01418 
01419 void
01420 kmpc_set_stacksize( int arg )
01421 {
01422     // __kmp_aux_set_stacksize initializes the library if needed
01423     __kmp_aux_set_stacksize( arg );
01424 }
01425 
01426 void
01427 kmpc_set_stacksize_s( size_t arg )
01428 {
01429     // __kmp_aux_set_stacksize initializes the library if needed
01430     __kmp_aux_set_stacksize( arg );
01431 }
01432 
01433 void
01434 kmpc_set_blocktime( int arg )
01435 {
01436     int gtid, tid;
01437     kmp_info_t *thread;
01438 
01439     gtid = __kmp_entry_gtid();
01440     tid = __kmp_tid_from_gtid(gtid);
01441     thread = __kmp_thread_from_gtid(gtid);
01442 
01443     __kmp_aux_set_blocktime( arg, thread, tid );
01444 }
01445 
01446 void
01447 kmpc_set_library( int arg )
01448 {
01449     // __kmp_user_set_library initializes the library if needed
01450     __kmp_user_set_library( (enum library_type)arg );
01451 }
01452 
01453 void
01454 kmpc_set_defaults( char const * str )
01455 {
01456     // __kmp_aux_set_defaults initializes the library if needed
01457     __kmp_aux_set_defaults( str, strlen( str ) );
01458 }
01459 
01460 #ifdef OMP_30_ENABLED
01461 
01462 int
01463 kmpc_set_affinity_mask_proc( int proc, void **mask )
01464 {
01465 #if defined(KMP_STUB) || !(KMP_OS_WINDOWS || KMP_OS_LINUX)
01466     return -1;
01467 #else
01468     if ( ! TCR_4(__kmp_init_middle) ) {
01469         __kmp_middle_initialize();
01470     }
01471     return __kmp_aux_set_affinity_mask_proc( proc, mask );
01472 #endif
01473 }
01474 
01475 int
01476 kmpc_unset_affinity_mask_proc( int proc, void **mask )
01477 {
01478 #if defined(KMP_STUB) || !(KMP_OS_WINDOWS || KMP_OS_LINUX)
01479     return -1;
01480 #else
01481     if ( ! TCR_4(__kmp_init_middle) ) {
01482         __kmp_middle_initialize();
01483     }
01484     return __kmp_aux_unset_affinity_mask_proc( proc, mask );
01485 #endif
01486 }
01487 
01488 int
01489 kmpc_get_affinity_mask_proc( int proc, void **mask )
01490 {
01491 #if defined(KMP_STUB) || !(KMP_OS_WINDOWS || KMP_OS_LINUX)
01492     return -1;
01493 #else
01494     if ( ! TCR_4(__kmp_init_middle) ) {
01495         __kmp_middle_initialize();
01496     }
01497     return __kmp_aux_get_affinity_mask_proc( proc, mask );
01498 #endif
01499 }
01500 
01501 #endif /* OMP_30_ENABLED */
01502 
01503 /* -------------------------------------------------------------------------- */
01544 void
01545 __kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
01546 {
01547     void **data_ptr;
01548 
01549     KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
01550 
01551     KMP_MB();
01552 
01553     data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
01554 
01555     if ( __kmp_env_consistency_check ) {
01556         if ( loc == 0 ) {
01557             KMP_WARNING( ConstructIdentInvalid );
01558         }
01559     }
01560 
01561     /* ToDo: Optimize the following two barriers into some kind of split barrier */
01562 
01563     if (didit) *data_ptr = cpy_data;
01564 
01565     /* This barrier is not a barrier region boundary */
01566     __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
01567 
01568     if (! didit) (*cpy_func)( cpy_data, *data_ptr );
01569 
01570     /* Consider next barrier the user-visible barrier for barrier region boundaries */
01571     /* Nesting checks are already handled by the single construct checks */
01572 
01573     __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
01574 }
01575 
01576 /* -------------------------------------------------------------------------- */
01577 
01578 #define INIT_LOCK                 __kmp_init_user_lock_with_checks
01579 #define INIT_NESTED_LOCK          __kmp_init_nested_user_lock_with_checks
01580 #define ACQUIRE_LOCK              __kmp_acquire_user_lock_with_checks
01581 #define ACQUIRE_LOCK_TIMED        __kmp_acquire_user_lock_with_checks_timed
01582 #define ACQUIRE_NESTED_LOCK       __kmp_acquire_nested_user_lock_with_checks
01583 #define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
01584 #define RELEASE_LOCK              __kmp_release_user_lock_with_checks
01585 #define RELEASE_NESTED_LOCK       __kmp_release_nested_user_lock_with_checks
01586 #define TEST_LOCK                 __kmp_test_user_lock_with_checks
01587 #define TEST_NESTED_LOCK          __kmp_test_nested_user_lock_with_checks
01588 #define DESTROY_LOCK              __kmp_destroy_user_lock_with_checks
01589 #define DESTROY_NESTED_LOCK       __kmp_destroy_nested_user_lock_with_checks
01590 
01591 
01592 /*
01593  * TODO: Make check abort messages use location info & pass it
01594  * into with_checks routines
01595  */
01596 
01597 /* initialize the lock */
01598 void
01599 __kmpc_init_lock( ident_t * loc, kmp_int32 gtid,  void ** user_lock ) {
01600     static char const * const func = "omp_init_lock";
01601     kmp_user_lock_p lck;
01602     KMP_DEBUG_ASSERT( __kmp_init_serial );
01603 
01604     if ( __kmp_env_consistency_check ) {
01605         if ( user_lock == NULL ) {
01606             KMP_FATAL( LockIsUninitialized, func );
01607         }
01608     }
01609 
01610     KMP_CHECK_USER_LOCK_INIT();
01611 
01612     if ( ( __kmp_user_lock_kind == lk_tas )
01613       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
01614         lck = (kmp_user_lock_p)user_lock;
01615     }
01616 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01617     else if ( ( __kmp_user_lock_kind == lk_futex )
01618       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
01619         lck = (kmp_user_lock_p)user_lock;
01620     }
01621 #endif
01622     else {
01623         lck = __kmp_user_lock_allocate( user_lock, gtid );
01624     }
01625     INIT_LOCK( lck );
01626     __kmp_set_user_lock_location( lck, loc );
01627 
01628 } // __kmpc_init_lock
01629 
01630 /* initialize the lock */
01631 void
01632 __kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
01633     static char const * const func = "omp_init_nest_lock";
01634     kmp_user_lock_p lck;
01635     KMP_DEBUG_ASSERT( __kmp_init_serial );
01636 
01637     if ( __kmp_env_consistency_check ) {
01638         if ( user_lock == NULL ) {
01639             KMP_FATAL( LockIsUninitialized, func );
01640         }
01641     }
01642 
01643     KMP_CHECK_USER_LOCK_INIT();
01644 
01645     if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
01646       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
01647         lck = (kmp_user_lock_p)user_lock;
01648     }
01649 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01650     else if ( ( __kmp_user_lock_kind == lk_futex )
01651      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
01652      <= OMP_NEST_LOCK_T_SIZE ) ) {
01653         lck = (kmp_user_lock_p)user_lock;
01654     }
01655 #endif
01656     else {
01657         lck = __kmp_user_lock_allocate( user_lock, gtid );
01658     }
01659 
01660     INIT_NESTED_LOCK( lck );
01661     __kmp_set_user_lock_location( lck, loc );
01662 
01663 } // __kmpc_init_nest_lock
01664 
01665 void
01666 __kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
01667 
01668     kmp_user_lock_p lck;
01669 
01670     if ( ( __kmp_user_lock_kind == lk_tas )
01671       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
01672         lck = (kmp_user_lock_p)user_lock;
01673     }
01674 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01675     else if ( ( __kmp_user_lock_kind == lk_futex )
01676       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
01677         lck = (kmp_user_lock_p)user_lock;
01678     }
01679 #endif
01680     else {
01681         lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
01682     }
01683 
01684     DESTROY_LOCK( lck );
01685 
01686     if ( ( __kmp_user_lock_kind == lk_tas )
01687       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
01688         ;
01689     }
01690 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01691     else if ( ( __kmp_user_lock_kind == lk_futex )
01692       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
01693         ;
01694     }
01695 #endif
01696     else {
01697         __kmp_user_lock_free( user_lock, gtid, lck );
01698     }
01699 } // __kmpc_destroy_lock
01700 
01701 /* destroy the lock */
01702 void
01703 __kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
01704 
01705     kmp_user_lock_p lck;
01706 
01707     if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
01708       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
01709         lck = (kmp_user_lock_p)user_lock;
01710     }
01711 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01712     else if ( ( __kmp_user_lock_kind == lk_futex )
01713      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
01714      <= OMP_NEST_LOCK_T_SIZE ) ) {
01715         lck = (kmp_user_lock_p)user_lock;
01716     }
01717 #endif
01718     else {
01719         lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
01720     }
01721 
01722 
01723     DESTROY_NESTED_LOCK( lck );
01724 
01725     if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
01726      + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
01727         ;
01728     }
01729 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01730     else if ( ( __kmp_user_lock_kind == lk_futex )
01731      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
01732      <= OMP_NEST_LOCK_T_SIZE ) ) {
01733         ;
01734     }
01735 #endif
01736     else {
01737         __kmp_user_lock_free( user_lock, gtid, lck );
01738     }
01739 } // __kmpc_destroy_nest_lock
01740 
01741 void
01742 __kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
01743     kmp_user_lock_p lck;
01744 
01745     if ( ( __kmp_user_lock_kind == lk_tas )
01746       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
01747         lck = (kmp_user_lock_p)user_lock;
01748     }
01749 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01750     else if ( ( __kmp_user_lock_kind == lk_futex )
01751       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
01752         lck = (kmp_user_lock_p)user_lock;
01753     }
01754 #endif
01755     else {
01756         lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
01757     }
01758 
01759 
01760     ACQUIRE_LOCK( lck, gtid );
01761 
01762 }
01763 
01764 
01765 void
01766 __kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
01767     kmp_user_lock_p lck;
01768 
01769     if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
01770       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
01771         lck = (kmp_user_lock_p)user_lock;
01772     }
01773 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01774     else if ( ( __kmp_user_lock_kind == lk_futex )
01775      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
01776      <= OMP_NEST_LOCK_T_SIZE ) ) {
01777         lck = (kmp_user_lock_p)user_lock;
01778     }
01779 #endif
01780     else {
01781         lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
01782     }
01783 
01784 
01785     ACQUIRE_NESTED_LOCK( lck, gtid );
01786 
01787 }
01788 
01789 void
01790 __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
01791 {
01792     kmp_user_lock_p lck;
01793 
01794     /* Can't use serial interval since not block structured */
01795     /* release the lock */
01796 
01797     if ( ( __kmp_user_lock_kind == lk_tas )
01798       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
01799 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01800         // "fast" path implemented to fix customer performance issue
01801         TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
01802         KMP_MB();
01803         return;
01804 #else
01805         lck = (kmp_user_lock_p)user_lock;
01806 #endif
01807     }
01808 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01809     else if ( ( __kmp_user_lock_kind == lk_futex )
01810       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
01811         lck = (kmp_user_lock_p)user_lock;
01812     }
01813 #endif
01814     else {
01815         lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
01816     }
01817 
01818     RELEASE_LOCK( lck, gtid );
01819 
01820 #if OMPT_SUPPORT
01821     if ((ompt_status == ompt_status_track_callback) &&
01822     (ompt_callbacks.ompt_callback(ompt_event_release_lock))) {
01823       ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck);
01824     }
01825 #endif
01826 }
01827 
01828 /* release the lock */
01829 void
01830 __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
01831 {
01832     kmp_user_lock_p lck;
01833 
01834     /* Can't use serial interval since not block structured */
01835 
01836     if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
01837       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
01838 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01839         // "fast" path implemented to fix customer performance issue
01840         kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
01841         if ( --(tl->lk.depth_locked) == 0 ) {
01842             TCW_4(tl->lk.poll, 0);
01843         }
01844         KMP_MB();
01845         return;
01846 #else
01847         lck = (kmp_user_lock_p)user_lock;
01848 #endif
01849     }
01850 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01851     else if ( ( __kmp_user_lock_kind == lk_futex )
01852      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
01853      <= OMP_NEST_LOCK_T_SIZE ) ) {
01854         lck = (kmp_user_lock_p)user_lock;
01855     }
01856 #endif
01857     else {
01858         lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
01859     }
01860 
01861     int release_status = RELEASE_NESTED_LOCK( lck, gtid );
01862 
01863 #if OMPT_SUPPORT
01864     if ((release_status == KMP_NESTED_LOCK_RELEASED) &&
01865     (ompt_status == ompt_status_track_callback) &&
01866     (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last))) {
01867       ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)((uint64_t) lck);
01868     }
01869 #endif
01870 }
01871 
01872 /* try to acquire the lock */
01873 int
01874 __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
01875 {
01876     kmp_user_lock_p lck;
01877     int          rc;
01878 
01879     if ( ( __kmp_user_lock_kind == lk_tas )
01880       && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
01881         lck = (kmp_user_lock_p)user_lock;
01882     }
01883 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01884     else if ( ( __kmp_user_lock_kind == lk_futex )
01885       && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
01886         lck = (kmp_user_lock_p)user_lock;
01887     }
01888 #endif
01889     else {
01890         lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
01891     }
01892 
01893 
01894     rc = TEST_LOCK( lck, gtid );
01895     return ( rc ? FTN_TRUE : FTN_FALSE );
01896 
01897     /* Can't use serial interval since not block structured */
01898 }
01899 
01900 /* try to acquire the lock */
01901 int
01902 __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
01903 {
01904     kmp_user_lock_p lck;
01905     int          rc;
01906 
01907     if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
01908       + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
01909         lck = (kmp_user_lock_p)user_lock;
01910     }
01911 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
01912     else if ( ( __kmp_user_lock_kind == lk_futex )
01913      && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
01914      <= OMP_NEST_LOCK_T_SIZE ) ) {
01915         lck = (kmp_user_lock_p)user_lock;
01916     }
01917 #endif
01918     else {
01919         lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
01920     }
01921 
01922 
01923     rc = TEST_NESTED_LOCK( lck, gtid );
01924     return rc;
01925 
01926     /* Can't use serial interval since not block structured */
01927 }
01928 
01929 
01930 /*--------------------------------------------------------------------------------------------------------------------*/
01931 
01932 /*
01933  * Interface to fast scalable reduce methods routines
01934  */
01935 
01936 // keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
01937 // another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
01938 // AT: which solution is better?
01939 #define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
01940                    ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
01941 
01942 #define __KMP_GET_REDUCTION_METHOD(gtid) \
01943                    ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
01944 
01945 // description of the packed_reduction_method variable: look at the macros in kmp.h
01946 
01947 
01948 // used in a critical section reduce block
01949 static __forceinline void
01950 __kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
01951 
01952     // this lock was visible to a customer and to the thread profiler as a serial overhead span
01953     //            (although it's used for an internal purpose only)
01954     //            why was it visible in previous implementation?
01955     //            should we keep it visible in new reduce block?
01956     kmp_user_lock_p lck;
01957 
01958     // We know that the fast reduction code is only emitted by Intel compilers
01959     // with 32 byte critical sections. If there isn't enough space, then we
01960     // have to use a pointer.
01961     if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
01962         lck = (kmp_user_lock_p)crit;
01963     }
01964     else {
01965         lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
01966     }
01967     KMP_DEBUG_ASSERT( lck != NULL );
01968 
01969     if ( __kmp_env_consistency_check )
01970         __kmp_push_sync( global_tid, ct_critical, loc, lck );
01971 
01972     __kmp_acquire_user_lock_with_checks( lck, global_tid );
01973 }
01974 
01975 // used in a critical section reduce block
01976 static __forceinline void
01977 __kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
01978 
01979     kmp_user_lock_p lck;
01980 
01981     // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
01982     // sections. If there isn't enough space, then we have to use a pointer.
01983     if ( __kmp_base_user_lock_size > 32 ) {
01984         lck = *( (kmp_user_lock_p *) crit );
01985         KMP_ASSERT( lck != NULL );
01986     } else {
01987         lck = (kmp_user_lock_p) crit;
01988     }
01989 
01990     if ( __kmp_env_consistency_check )
01991         __kmp_pop_sync( global_tid, ct_critical, loc );
01992 
01993     __kmp_release_user_lock_with_checks( lck, global_tid );
01994 
01995 } // __kmp_end_critical_section_reduce_block
01996 
01997 
01998 /* 2.a.i. Reduce Block without a terminating barrier */
02012 kmp_int32
02013 __kmpc_reduce_nowait(
02014     ident_t *loc, kmp_int32 global_tid,
02015     kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
02016     kmp_critical_name *lck ) {
02017 
02018     int retval;
02019     PACKED_REDUCTION_METHOD_T packed_reduction_method;
02020 
02021     KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
02022 
02023     // why do we need this initialization here at all?
02024     // Reduction clause can not be used as a stand-alone directive.
02025 
02026     // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
02027     // possible detection of false-positive race by the threadchecker ???
02028     if( ! TCR_4( __kmp_init_parallel ) )
02029         __kmp_parallel_initialize();
02030 
02031     // check correctness of reduce block nesting
02032     if ( __kmp_env_consistency_check )
02033         __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
02034 
02035     // it's better to check an assertion ASSERT( thr_state == THR_WORK_STATE )
02036 
02037     // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
02038     // the variable should be either a construct-specific or thread-specific property, not a team specific property
02039     //     (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
02040     // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
02041     //     (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
02042     // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
02043     // a thread-specific "th_local.reduction_method" variable is used currently
02044     // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
02045 
02046     packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
02047     __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
02048 
02049     if( packed_reduction_method == critical_reduce_block ) {
02050 
02051         __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
02052         retval = 1;
02053 
02054     } else if( packed_reduction_method == empty_reduce_block ) {
02055 
02056         // usage: if team size == 1, no synchronization is required ( Intel platforms only )
02057         retval = 1;
02058 
02059     } else if( packed_reduction_method == atomic_reduce_block ) {
02060 
02061         retval = 2;
02062 
02063         // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
02064         //     (it's not quite good, because the checking block has been closed by this 'pop',
02065         //      but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
02066         if ( __kmp_env_consistency_check )
02067             __kmp_pop_sync( global_tid, ct_reduce, loc );
02068 
02069     } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
02070 
02071         //AT: performance issue: a real barrier here
02072         //AT:     (if master goes slow, other threads are blocked here waiting for the master to come and release them)
02073         //AT:     (it's not what a customer might expect specifying NOWAIT clause)
02074         //AT:     (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
02075         //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
02076         //        and be more in line with sense of NOWAIT
02077         //AT: TO DO: do epcc test and compare times
02078 
02079         // this barrier should be invisible to a customer and to the thread profiler
02080         //              (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
02081         retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
02082         retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
02083 
02084         // all other workers except master should do this pop here
02085         //     ( none of other workers will get to __kmpc_end_reduce_nowait() )
02086         if ( __kmp_env_consistency_check ) {
02087             if( retval == 0 ) {
02088                 __kmp_pop_sync( global_tid, ct_reduce, loc );
02089             }
02090         }
02091 
02092     } else {
02093 
02094         // should never reach this block
02095         KMP_ASSERT( 0 ); // "unexpected method"
02096 
02097     }
02098 
02099     KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
02100 
02101     return retval;
02102 }
02103 
02112 void
02113 __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
02114 
02115     PACKED_REDUCTION_METHOD_T packed_reduction_method;
02116 
02117     KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
02118 
02119     packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
02120 
02121     if( packed_reduction_method == critical_reduce_block ) {
02122 
02123         __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
02124 
02125     } else if( packed_reduction_method == empty_reduce_block ) {
02126 
02127         // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
02128 
02129     } else if( packed_reduction_method == atomic_reduce_block ) {
02130 
02131         // neither master nor other workers should get here
02132         //     (code gen does not generate this call in case 2: atomic reduce block)
02133         // actually it's better to remove this elseif at all;
02134         // after removal this value will checked by the 'else' and will assert
02135 
02136     } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
02137 
02138         // only master gets here
02139 
02140     } else {
02141 
02142         // should never reach this block
02143         KMP_ASSERT( 0 ); // "unexpected method"
02144 
02145     }
02146 
02147     if ( __kmp_env_consistency_check )
02148         __kmp_pop_sync( global_tid, ct_reduce, loc );
02149 
02150     KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
02151 
02152     return;
02153 }
02154 
02155 /* 2.a.ii. Reduce Block with a terminating barrier */
02156 
02170 kmp_int32
02171 __kmpc_reduce(
02172     ident_t *loc, kmp_int32 global_tid,
02173     kmp_int32 num_vars, size_t reduce_size, void *reduce_data, 
02174     void (*reduce_func)(void *lhs_data, void *rhs_data), 
02175     kmp_critical_name *lck ) 
02176 {
02177     int retval;
02178     PACKED_REDUCTION_METHOD_T packed_reduction_method;
02179 
02180     KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
02181 
02182     // why do we need this initialization here at all?
02183     // Reduction clause can not be a stand-alone directive.
02184 
02185     // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
02186     // possible detection of false-positive race by the threadchecker ???
02187     if( ! TCR_4( __kmp_init_parallel ) )
02188         __kmp_parallel_initialize();
02189 
02190     // check correctness of reduce block nesting
02191     if ( __kmp_env_consistency_check )
02192         __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
02193 
02194     // it's better to check an assertion ASSERT( thr_state == THR_WORK_STATE )
02195 
02196     packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
02197     __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
02198 
02199     if( packed_reduction_method == critical_reduce_block ) {
02200 
02201         __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
02202         retval = 1;
02203 
02204     } else if( packed_reduction_method == empty_reduce_block ) {
02205 
02206         // usage: if team size == 1, no synchronization is required ( Intel platforms only )
02207         retval = 1;
02208 
02209     } else if( packed_reduction_method == atomic_reduce_block ) {
02210 
02211         retval = 2;
02212 
02213     } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
02214 
02215         //case tree_reduce_block:
02216         // this barrier should be visible to a customer and to the thread profiler
02217         //              (it's a terminating barrier on constructs if NOWAIT not specified)
02218         retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
02219         retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
02220 
02221         // all other workers except master should do this pop here
02222         //     ( none of other workers except master will enter __kmpc_end_reduce() )
02223         if ( __kmp_env_consistency_check ) {
02224             if( retval == 0 ) { // 0: all other workers; 1: master
02225                 __kmp_pop_sync( global_tid, ct_reduce, loc );
02226             }
02227         }
02228 
02229     } else {
02230 
02231         // should never reach this block
02232         KMP_ASSERT( 0 ); // "unexpected method"
02233 
02234     }
02235 
02236     KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
02237 
02238     return retval;
02239 }
02240 
02250 void
02251 __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
02252 
02253     PACKED_REDUCTION_METHOD_T packed_reduction_method;
02254 
02255     KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
02256 
02257     packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
02258 
02259     // this barrier should be visible to a customer and to the thread profiler
02260     //              (it's a terminating barrier on constructs if NOWAIT not specified)
02261 
02262     if( packed_reduction_method == critical_reduce_block ) {
02263 
02264         __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
02265 
02266         // TODO: implicit barrier: should be exposed
02267         __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
02268 
02269     } else if( packed_reduction_method == empty_reduce_block ) {
02270 
02271         // usage: if team size == 1, no synchronization is required ( Intel platforms only )
02272 
02273         // TODO: implicit barrier: should be exposed
02274         __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
02275 
02276     } else if( packed_reduction_method == atomic_reduce_block ) {
02277 
02278         // TODO: implicit barrier: should be exposed
02279         __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
02280 
02281     } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
02282 
02283         // only master executes here (master releases all other workers)
02284         __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
02285 
02286     } else {
02287 
02288         // should never reach this block
02289         KMP_ASSERT( 0 ); // "unexpected method"
02290 
02291     }
02292 
02293     if ( __kmp_env_consistency_check )
02294         __kmp_pop_sync( global_tid, ct_reduce, loc );
02295 
02296     KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
02297 
02298     return;
02299 }
02300 
02301 #undef __KMP_GET_REDUCTION_METHOD
02302 #undef __KMP_SET_REDUCTION_METHOD
02303 
02304 /*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
02305 
02306 kmp_uint64
02307 __kmpc_get_taskid() {
02308 
02309     #if OMP_30_ENABLED
02310 
02311         kmp_int32    gtid;
02312         kmp_info_t * thread;
02313 
02314         gtid = __kmp_get_gtid();
02315         if ( gtid < 0 ) {
02316             return 0;
02317         }; // if
02318         thread = __kmp_thread_from_gtid( gtid );
02319         return thread->th.th_current_task->td_task_id;
02320 
02321     #else
02322 
02323         return 0;
02324 
02325     #endif
02326 
02327 } // __kmpc_get_taskid
02328 
02329 
02330 kmp_uint64
02331 __kmpc_get_parent_taskid() {
02332 
02333     #if OMP_30_ENABLED
02334 
02335         kmp_int32        gtid;
02336         kmp_info_t *     thread;
02337         kmp_taskdata_t * parent_task;
02338 
02339         gtid = __kmp_get_gtid();
02340         if ( gtid < 0 ) {
02341             return 0;
02342         }; // if
02343         thread      = __kmp_thread_from_gtid( gtid );
02344         parent_task = thread->th.th_current_task->td_parent;
02345         return ( parent_task == NULL ? 0 : parent_task->td_task_id );
02346 
02347     #else
02348 
02349         return 0;
02350 
02351     #endif
02352 
02353 } // __kmpc_get_parent_taskid
02354 
02355 void __kmpc_place_threads(int nC, int nT, int nO)
02356 {
02357 #if KMP_MIC
02358     if ( ! __kmp_init_serial ) {
02359         __kmp_serial_initialize();
02360     }
02361     __kmp_place_num_cores = nC;
02362     __kmp_place_num_threads_per_core = nT;
02363     __kmp_place_core_offset = nO;
02364 #endif
02365 }
02366 
02367 // end of file //
02368 

Generated on 25 Aug 2013 for libomp_oss by  doxygen 1.6.1