kmp_tasking.c

Go to the documentation of this file.
00001 /*
00002  * kmp_tasking.c -- OpenMP 3.0 tasking support.
00003  * $Revision: 42181 $
00004  * $Date: 2013-03-26 15:04:45 -0500 (Tue, 26 Mar 2013) $
00005  */
00006 
00007 /* <copyright>
00008     Copyright (c) 1997-2013 Intel Corporation.  All Rights Reserved.
00009 
00010     Redistribution and use in source and binary forms, with or without
00011     modification, are permitted provided that the following conditions
00012     are met:
00013 
00014       * Redistributions of source code must retain the above copyright
00015         notice, this list of conditions and the following disclaimer.
00016       * Redistributions in binary form must reproduce the above copyright
00017         notice, this list of conditions and the following disclaimer in the
00018         documentation and/or other materials provided with the distribution.
00019       * Neither the name of Intel Corporation nor the names of its
00020         contributors may be used to endorse or promote products derived
00021         from this software without specific prior written permission.
00022 
00023     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00024     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00025     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00026     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00027     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00028     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00029     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00030     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00031     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00032     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00033     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00034 
00035 
00036 ------------------------------------------------------------------------
00037 
00038     Portions of this software are protected under the following patents:
00039         U.S. Patent 5,812,852
00040         U.S. Patent 6,792,599
00041         U.S. Patent 7,069,556
00042         U.S. Patent 7,328,433
00043         U.S. Patent 7,500,242
00044 
00045 </copyright> */
00046 
00047 #include "kmp.h"
00048 #include "kmp_i18n.h"
00049 
00050 
00051 #if OMP_30_ENABLED
00052 
00053 /* ------------------------------------------------------------------------ */
00054 /* ------------------------------------------------------------------------ */
00055 
00056 
00057 /* forward declaration */
00058 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
00059 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
00060 static int  __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
00061 
00062 #ifndef KMP_DEBUG
00063 # define __kmp_static_delay( arg )     /* nothing to do */
00064 #else
00065 
00066 static void
00067 __kmp_static_delay( int arg )
00068 {
00069 /* Work around weird code-gen bug that causes assert to trip */
00070 # if KMP_ARCH_X86_64 && KMP_OS_LINUX
00071     KMP_ASSERT( arg != 0 );
00072 # else
00073     KMP_ASSERT( arg >= 0 );
00074 # endif
00075 }
00076 #endif /* KMP_DEBUG */
00077 
00078 static void
00079 __kmp_static_yield( int arg )
00080 {
00081     __kmp_yield( arg );
00082 }
00083 
00084 #ifdef BUILD_TIED_TASK_STACK
00085 
00086 //---------------------------------------------------------------------------
00087 //  __kmp_trace_task_stack: print the tied tasks from the task stack in order
00088 //     from top do bottom
00089 //
00090 //  gtid: global thread identifier for thread containing stack
00091 //  thread_data: thread data for task team thread containing stack
00092 //  threshold: value above which the trace statement triggers
00093 //  location: string identifying call site of this function (for trace)
00094 
00095 static void
00096 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
00097 {
00098     kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
00099     kmp_taskdata_t **stack_top = task_stack -> ts_top;
00100     kmp_int32 entries = task_stack -> ts_entries;
00101     kmp_taskdata_t *tied_task;
00102 
00103     KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
00104                          "first_block = %p, stack_top = %p \n",
00105                          location, gtid, entries, task_stack->ts_first_block, stack_top ) );
00106 
00107     KMP_DEBUG_ASSERT( stack_top != NULL );
00108     KMP_DEBUG_ASSERT( entries > 0 );
00109 
00110     while ( entries != 0 )
00111     {
00112         KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
00113         // fix up ts_top if we need to pop from previous block
00114         if ( entries & TASK_STACK_INDEX_MASK == 0 )
00115         {
00116             kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
00117 
00118             stack_block = stack_block -> sb_prev;
00119             stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
00120         }
00121 
00122         // finish bookkeeping
00123         stack_top--;
00124         entries--;
00125 
00126         tied_task = * stack_top;
00127 
00128         KMP_DEBUG_ASSERT( tied_task != NULL );
00129         KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
00130 
00131         KA_TRACE(threshold, ("__kmp_trace_task_stack(%s):             gtid=%d, entry=%d, "
00132                              "stack_top=%p, tied_task=%p\n",
00133                              location, gtid, entries, stack_top, tied_task ) );
00134     }
00135     KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
00136 
00137     KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
00138                          location, gtid ) );
00139 }
00140 
00141 //---------------------------------------------------------------------------
00142 //  __kmp_init_task_stack: initialize the task stack for the first time
00143 //    after a thread_data structure is created.
00144 //    It should not be necessary to do this again (assuming the stack works).
00145 //
00146 //  gtid: global thread identifier of calling thread
00147 //  thread_data: thread data for task team thread containing stack
00148 
00149 static void
00150 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
00151 {
00152     kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
00153     kmp_stack_block_t *first_block;
00154 
00155     // set up the first block of the stack
00156     first_block = & task_stack -> ts_first_block;
00157     task_stack -> ts_top = (kmp_taskdata_t **) first_block;
00158     memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
00159 
00160     // initialize the stack to be empty
00161     task_stack  -> ts_entries = TASK_STACK_EMPTY;
00162     first_block -> sb_next = NULL;
00163     first_block -> sb_prev = NULL;
00164 }
00165 
00166 
00167 //---------------------------------------------------------------------------
00168 //  __kmp_free_task_stack: free the task stack when thread_data is destroyed.
00169 //
00170 //  gtid: global thread identifier for calling thread
00171 //  thread_data: thread info for thread containing stack
00172 
00173 static void
00174 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
00175 {
00176     kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
00177     kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
00178 
00179     KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
00180     // free from the second block of the stack
00181     while ( stack_block != NULL ) {
00182         kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
00183 
00184         stack_block -> sb_next = NULL;
00185         stack_block -> sb_prev = NULL;
00186         if (stack_block != & task_stack -> ts_first_block) {
00187             __kmp_thread_free( thread, stack_block );  // free the block, if not the first
00188         }
00189         stack_block = next_block;
00190     }
00191     // initialize the stack to be empty
00192     task_stack -> ts_entries = 0;
00193     task_stack -> ts_top = NULL;
00194 }
00195 
00196 
00197 //---------------------------------------------------------------------------
00198 //  __kmp_push_task_stack: Push the tied task onto the task stack.
00199 //     Grow the stack if necessary by allocating another block.
00200 //
00201 //  gtid: global thread identifier for calling thread
00202 //  thread: thread info for thread containing stack
00203 //  tied_task: the task to push on the stack
00204 
00205 static void
00206 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
00207 {
00208     // GEH - need to consider what to do if tt_threads_data not allocated yet
00209     kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
00210                                         tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
00211     kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
00212 
00213     if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
00214         return;  // Don't push anything on stack if team or team tasks are serialized
00215     }
00216 
00217     KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
00218     KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
00219 
00220     KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
00221                   gtid, thread, tied_task ) );
00222     // Store entry
00223     * (task_stack -> ts_top) = tied_task;
00224 
00225     // Do bookkeeping for next push
00226     task_stack -> ts_top++;
00227     task_stack -> ts_entries++;
00228 
00229     if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
00230     {
00231         // Find beginning of this task block
00232         kmp_stack_block_t *stack_block =
00233              (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
00234 
00235         // Check if we already have a block
00236         if ( stack_block -> sb_next != NULL )
00237         {    // reset ts_top to beginning of next block
00238             task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
00239         }
00240         else
00241         {   // Alloc new block and link it up
00242             kmp_stack_block_t *new_block = (kmp_stack_block_t *)
00243               __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
00244 
00245             task_stack -> ts_top  = & new_block -> sb_block[0];
00246             stack_block -> sb_next = new_block;
00247             new_block  -> sb_prev = stack_block;
00248             new_block  -> sb_next = NULL;
00249 
00250             KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
00251                           gtid, tied_task, new_block ) );
00252         }
00253     }
00254     KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
00255 }
00256 
00257 //---------------------------------------------------------------------------
00258 //  __kmp_pop_task_stack: Pop the tied task from the task stack.  Don't return
00259 //     the task, just check to make sure it matches the ending task passed in.
00260 //
00261 //  gtid: global thread identifier for the calling thread
00262 //  thread: thread info structure containing stack
00263 //  tied_task: the task popped off the stack
00264 //  ending_task: the task that is ending (should match popped task)
00265 
00266 static void
00267 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
00268 {
00269     // GEH - need to consider what to do if tt_threads_data not allocated yet
00270     kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
00271     kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
00272     kmp_taskdata_t *tied_task;
00273 
00274     if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
00275         return;  // Don't pop anything from stack if team or team tasks are serialized
00276     }
00277 
00278     KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
00279     KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
00280 
00281     KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
00282 
00283     // fix up ts_top if we need to pop from previous block
00284     if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
00285     {
00286         kmp_stack_block_t *stack_block =
00287            (kmp_stack_block_t *) (task_stack -> ts_top) ;
00288 
00289         stack_block = stack_block -> sb_prev;
00290         task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
00291     }
00292 
00293     // finish bookkeeping
00294     task_stack -> ts_top--;
00295     task_stack -> ts_entries--;
00296 
00297     tied_task = * (task_stack -> ts_top );
00298 
00299     KMP_DEBUG_ASSERT( tied_task != NULL );
00300     KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
00301     KMP_DEBUG_ASSERT( tied_task == ending_task );  // If we built the stack correctly
00302 
00303     KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
00304     return;
00305 }
00306 #endif /* BUILD_TIED_TASK_STACK */
00307 
00308 //---------------------------------------------------
00309 //  __kmp_push_task: Add a task to the thread's deque
00310 
00311 static kmp_int32
00312 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
00313 {
00314     kmp_info_t *        thread = __kmp_threads[ gtid ];
00315     kmp_taskdata_t *    taskdata = KMP_TASK_TO_TASKDATA(task);
00316     kmp_task_team_t *   task_team = thread->th.th_task_team;
00317     kmp_int32           tid = __kmp_tid_from_gtid( gtid );
00318     kmp_thread_data_t * thread_data;
00319 
00320     KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
00321 
00322     // The first check avoids building task_team thread data if serialized
00323     if ( taskdata->td_flags.task_serial ) {
00324         KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
00325                        gtid, taskdata ) );
00326         return TASK_NOT_PUSHED;
00327     }
00328 
00329     // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
00330     KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
00331     if ( ! KMP_TASKING_ENABLED( task_team, thread->th.th_task_state ) ) {
00332          __kmp_enable_tasking( task_team, thread );
00333     }
00334     KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
00335     KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
00336 
00337     // Find tasking deque specific to encountering thread
00338     thread_data = & task_team -> tt.tt_threads_data[ tid ];
00339 
00340     // No lock needed since only owner can allocate
00341     if (thread_data -> td.td_deque == NULL ) {
00342         __kmp_alloc_task_deque( thread, thread_data );
00343     }
00344 
00345     // Check if deque is full
00346     if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
00347     {
00348         KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
00349                        gtid, taskdata ) );
00350         return TASK_NOT_PUSHED;
00351     }
00352 
00353     // Lock the deque for the task push operation
00354     __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
00355 
00356     // Must have room since no thread can add tasks but calling thread
00357     KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
00358 
00359     thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;  // Push taskdata
00360     // Wrap index.
00361     thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
00362     TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);             // Adjust task count
00363 
00364     __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
00365 
00366     KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
00367                   "task=%p ntasks=%d head=%u tail=%u\n",
00368                   gtid, taskdata, thread_data->td.td_deque_ntasks,
00369                   thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
00370 
00371     return TASK_SUCCESSFULLY_PUSHED;
00372 }
00373 
00374 
00375 //-----------------------------------------------------------------------------------------
00376 // __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
00377 // this_thr: thread structure to set current_task in.
00378 
00379 void
00380 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
00381 {
00382     KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
00383                    "curtask_parent=%p\n",
00384                    0, this_thr, this_thr -> th.th_current_task,
00385                    this_thr -> th.th_current_task -> td_parent ) );
00386 
00387     this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
00388 
00389     KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
00390                    "curtask_parent=%p\n",
00391                    0, this_thr, this_thr -> th.th_current_task,
00392                    this_thr -> th.th_current_task -> td_parent ) );
00393 }
00394 
00395 
00396 //---------------------------------------------------------------------------------------
00397 // __kmp_push_current_task_to_thread: set up current task in called thread for a new team
00398 // this_thr: thread structure to set up
00399 // team: team for implicit task data
00400 // tid: thread within team to set up
00401 
00402 void
00403 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
00404 {
00405     // current task of the thread is a parent of the new just created implicit tasks of new team
00406     KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
00407                     "parent_task=%p\n",
00408                     tid, this_thr, this_thr->th.th_current_task,
00409                     team->t.t_implicit_task_taskdata[tid].td_parent ) );
00410 
00411     KMP_DEBUG_ASSERT (this_thr != NULL);
00412 
00413     if( tid == 0 ) {
00414         if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
00415             team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
00416             this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
00417         }
00418     } else {
00419         team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
00420         this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
00421     }
00422 
00423     KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
00424                     "parent_task=%p\n",
00425                     tid, this_thr, this_thr->th.th_current_task,
00426                     team->t.t_implicit_task_taskdata[tid].td_parent ) );
00427 }
00428 
00429 
00430 //----------------------------------------------------------------------
00431 // __kmp_task_start: bookkeeping for a task starting execution
00432 // GTID: global thread id of calling thread
00433 // task: task starting execution
00434 // current_task: task suspending
00435 
00436 static void
00437 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
00438 {
00439     kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
00440     kmp_info_t * thread = __kmp_threads[ gtid ];
00441 
00442     KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
00443                   gtid, taskdata, current_task) );
00444 
00445     KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
00446 
00447     // mark currently executing task as suspended
00448     // TODO: GEH - make sure root team implicit task is initialized properly.
00449     // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
00450     current_task -> td_flags.executing = 0;
00451 
00452     // Add task to stack if tied
00453 #ifdef BUILD_TIED_TASK_STACK
00454     if ( taskdata -> td_flags.tiedness == TASK_TIED )
00455     {
00456         __kmp_push_task_stack( gtid, thread, taskdata );
00457     }
00458 #endif /* BUILD_TIED_TASK_STACK */
00459 
00460     // mark starting task as executing and as current task
00461     thread -> th.th_current_task = taskdata;
00462 
00463     KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
00464     KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
00465     taskdata -> td_flags.started = 1;
00466     taskdata -> td_flags.executing = 1;
00467     KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
00468     KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
00469 
00470     // GEH TODO: shouldn't we pass some sort of location identifier here?
00471     // APT: yes, we will pass location here.
00472     // need to store current thread state (in a thread or taskdata structure)
00473     // before setting work_state, otherwise wrong state is set after end of task
00474 
00475     KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
00476                   gtid, taskdata ) );
00477 
00478 #if OMPT_SUPPORT
00479    if ((ompt_status == ompt_status_track_callback)) {
00480      if (ompt_callbacks.ompt_callback(ompt_event_task_create)) {
00481        kmp_taskdata_t *parent = current_task->td_parent;
00482        ompt_callbacks.ompt_callback(ompt_event_task_create)
00483      (parent ? parent->ompt_task_info.task_id : ompt_task_id_none,
00484       parent ? &(parent->ompt_task_info.frame) : NULL,
00485       current_task->ompt_task_info.task_id,
00486       (void *) task->routine);
00487      }
00488    }
00489 #endif
00490 
00491     return;
00492 }
00493 
00494 
00495 //----------------------------------------------------------------------
00496 // __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
00497 // loc_ref: source location information; points to beginning of task block.
00498 // gtid: global thread number.
00499 // task: task thunk for the started task.
00500 
00501 void
00502 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
00503 {
00504     kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
00505     kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
00506 
00507     KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
00508                   gtid, loc_ref, taskdata, current_task ) );
00509 
00510     taskdata -> td_flags.task_serial = 1;  // Execute this task immediately, not deferred.
00511     __kmp_task_start( gtid, task, current_task );
00512 
00513     KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
00514                   gtid, loc_ref, taskdata ) );
00515 
00516     return;
00517 }
00518 
00519 #ifdef TASK_UNUSED
00520 //----------------------------------------------------------------------
00521 // __kmpc_omp_task_begin: report that a given task has started execution
00522 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
00523 
00524 void
00525 __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
00526 {
00527     kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
00528 
00529     KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
00530                   gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
00531 
00532     __kmp_task_start( gtid, task, current_task );
00533 
00534     KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
00535                   gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
00536 
00537     return;
00538 }
00539 #endif // TASK_UNUSED
00540 
00541 
00542 //-------------------------------------------------------------------------------------
00543 // __kmp_free_task: free the current task space and the space for shareds
00544 // gtid: Global thread ID of calling thread
00545 // taskdata: task to free
00546 // thread: thread data structure of caller
00547 
00548 static void
00549 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
00550 {
00551     KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
00552                   gtid, taskdata) );
00553 
00554     // Check to make sure all flags and counters have the correct values
00555     KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
00556     KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
00557     KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
00558     KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
00559     KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0  || taskdata->td_flags.task_serial == 1);
00560     KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
00561 
00562     taskdata->td_flags.freed = 1;
00563     // deallocate the taskdata and shared variable blocks associated with this task
00564     #if USE_FAST_MEMORY
00565         __kmp_fast_free( thread, taskdata );
00566     #else /* ! USE_FAST_MEMORY */
00567         __kmp_thread_free( thread, taskdata );
00568     #endif
00569 
00570     KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
00571                   gtid, taskdata) );
00572 }
00573 
00574 //-------------------------------------------------------------------------------------
00575 // __kmp_free_task_and_ancestors: free the current task and ancestors without children
00576 //
00577 // gtid: Global thread ID of calling thread
00578 // taskdata: task to free
00579 // thread: thread data structure of caller
00580 
00581 static void
00582 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
00583 {
00584     kmp_int32 children = 0;
00585     kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
00586 
00587     KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
00588 
00589     if ( !team_or_tasking_serialized ) {
00590         children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
00591         KMP_DEBUG_ASSERT( children >= 0 );
00592     }
00593 
00594     // Now, go up the ancestor tree to see if any ancestors can now be freed.
00595     while ( children == 0 )
00596     {
00597         kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
00598 
00599         KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
00600                       "and freeing itself\n", gtid, taskdata) );
00601 
00602         // --- Deallocate my ancestor task ---
00603         __kmp_free_task( gtid, taskdata, thread );
00604 
00605         taskdata = parent_taskdata;
00606 
00607         // Stop checking ancestors at implicit task or if tasking serialized
00608         // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
00609         if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
00610             return;
00611 
00612         if ( !team_or_tasking_serialized ) {
00613             // Predecrement simulated by "- 1" calculation
00614             children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
00615             KMP_DEBUG_ASSERT( children >= 0 );
00616         }
00617     }
00618 
00619     KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
00620                   "not freeing it yet\n", gtid, taskdata, children) );
00621 }
00622 
00623 //---------------------------------------------------------------------
00624 // __kmp_task_finish: bookkeeping to do when a task finishes execution
00625 // gtid: global thread ID for calling thread
00626 // task: task to be finished
00627 // resumed_task: task to be resumed.  (may be NULL if task is serialized)
00628 
00629 static void
00630 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
00631 {
00632     kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
00633     kmp_info_t * thread = __kmp_threads[ gtid ];
00634     kmp_int32 children = 0;
00635 
00636 #if OMPT_SUPPORT
00637    if ((ompt_status == ompt_status_track_callback)) {
00638      if (ompt_callbacks.ompt_callback(ompt_event_task_exit)) {
00639        ompt_callbacks.ompt_callback(ompt_event_task_exit)(taskdata->ompt_task_info.task_id);
00640      }
00641    }
00642 #endif
00643 
00644     KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
00645                   gtid, taskdata, resumed_task) );
00646 
00647     KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
00648 
00649     // Pop task from stack if tied
00650 #ifdef BUILD_TIED_TASK_STACK
00651     if ( taskdata -> td_flags.tiedness == TASK_TIED )
00652     {
00653         __kmp_pop_task_stack( gtid, thread, taskdata );
00654     }
00655 #endif /* BUILD_TIED_TASK_STACK */
00656 
00657     KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
00658     KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
00659     taskdata -> td_flags.executing = 0;  // suspend the finishing task
00660     taskdata -> td_flags.complete = 1;   // mark the task as completed
00661     KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
00662     KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
00663 
00664     // Only need to keep track of count if team parallel and tasking not serialized
00665     if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
00666         // Predecrement simulated by "- 1" calculation
00667         children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
00668         KMP_DEBUG_ASSERT( children >= 0 );
00669 #if OMP_40_ENABLED
00670         if ( taskdata->td_taskgroup )
00671             KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
00672 #endif
00673     }
00674 
00675     KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
00676                   gtid, taskdata, children) );
00677 
00678     // bookkeeping for resuming task:
00679     // GEH - note tasking_ser => task_serial
00680     KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
00681                        taskdata->td_flags.task_serial);
00682     if ( taskdata->td_flags.task_serial )
00683     {
00684         if (resumed_task == NULL) {
00685             resumed_task = taskdata->td_parent;  // In a serialized task, the resumed task is the parent
00686         }
00687         else {
00688             // verify resumed task passed in points to parent
00689             KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
00690         }
00691     }
00692     else {
00693         KMP_DEBUG_ASSERT( resumed_task != NULL );        // verify that resumed task is passed as arguemnt
00694     }
00695 
00696     // Free this task and then ancestor tasks if they have no children.
00697     __kmp_free_task_and_ancestors(gtid, taskdata, thread);
00698 
00699     __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
00700 
00701     // TODO: GEH - make sure root team implicit task is initialized properly.
00702     // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
00703     resumed_task->td_flags.executing = 1;  // resume previous task
00704 
00705     KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
00706                   gtid, taskdata, resumed_task) );
00707 
00708     return;
00709 }
00710 
00711 //---------------------------------------------------------------------
00712 // __kmpc_omp_task_complete_if0: report that a task has completed execution
00713 // loc_ref: source location information; points to end of task block.
00714 // gtid: global thread number.
00715 // task: task thunk for the completed task.
00716 
00717 void
00718 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
00719 {
00720     KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
00721                   gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
00722 
00723     __kmp_task_finish( gtid, task, NULL );  // this routine will provide task to resume
00724 
00725     KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
00726                   gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
00727 
00728     return;
00729 }
00730 
00731 #ifdef TASK_UNUSED
00732 //---------------------------------------------------------------------
00733 // __kmpc_omp_task_complete: report that a task has completed execution
00734 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
00735 
00736 void
00737 __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
00738 {
00739     KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
00740                   gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
00741 
00742     __kmp_task_finish( gtid, task, NULL );  // Not sure how to find task to resume
00743 
00744     KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
00745                   gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
00746     return;
00747 }
00748 #endif // TASK_UNUSED
00749 
00750 
00751 //----------------------------------------------------------------------------------------------------
00752 // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
00753 //
00754 // loc_ref:  reference to source location of parallel region
00755 // this_thr:  thread data structure corresponding to implicit task
00756 // team: team for this_thr
00757 // tid: thread id of given thread within team
00758 // set_curr_task: TRUE if need to push current task to thread
00759 // NOTE: Routine does not set up the implicit task ICVS.  This is assumed to have already been done elsewhere.
00760 // TODO: Get better loc_ref.  Value passed in may be NULL
00761 
00762 void
00763 __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
00764 {
00765     kmp_taskdata_t * task   = & team->t.t_implicit_task_taskdata[ tid ];
00766 
00767     KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
00768                   tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
00769 
00770     task->td_task_id  = KMP_GEN_TASK_ID();
00771     task->td_team     = team;
00772 //    task->td_parent   = NULL;  // fix for CQ230101 (broken parent task info in debugger)
00773     task->td_ident    = loc_ref;
00774     task->td_taskwait_ident   = NULL;
00775     task->td_taskwait_counter = 0;
00776     task->td_taskwait_thread  = 0;
00777 
00778     task->td_flags.tiedness    = TASK_TIED;
00779     task->td_flags.tasktype    = TASK_IMPLICIT;
00780     // All implicit tasks are executed immediately, not deferred
00781     task->td_flags.task_serial = 1;
00782     task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
00783     task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
00784 
00785     task->td_flags.started     = 1;
00786     task->td_flags.executing   = 1;
00787     task->td_flags.complete    = 0;
00788     task->td_flags.freed       = 0;
00789 
00790     if (set_curr_task) {  // only do this initialization the first time a thread is created
00791         task->td_incomplete_child_tasks = 0;
00792         task->td_allocated_child_tasks  = 0; // Not used because do not need to deallocate implicit task
00793 #if OMP_40_ENABLED
00794         task->td_taskgroup = NULL;           // An implicit task does not have taskgroup
00795 #endif
00796         __kmp_push_current_task_to_thread( this_thr, team, tid );
00797     } else {
00798         KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
00799         KMP_DEBUG_ASSERT(task->td_allocated_child_tasks  == 0);
00800     }
00801 
00802     KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
00803                   tid, team, task ) );
00804 }
00805 
00806 // Round up a size to a power of two specified by val
00807 // Used to insert padding between structures co-allocated using a single malloc() call
00808 static size_t
00809 __kmp_round_up_to_val( size_t size, size_t val ) {
00810     if ( size & ( val - 1 ) ) {
00811         size &= ~ ( val - 1 );
00812         if ( size <= KMP_SIZE_T_MAX - val ) {
00813             size += val;    // Round up if there is no overflow.
00814         }; // if
00815     }; // if
00816     return size;
00817 } // __kmp_round_up_to_va
00818 
00819 
00820 //---------------------------------------------------------------------------------
00821 // __kmp_task_alloc: Allocate the taskdata and task data structures for a task
00822 //
00823 // loc_ref: source location information
00824 // gtid: global thread number.
00825 // flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
00826 //        Converted from kmp_int32 to kmp_tasking_flags_t in routine.
00827 // sizeof_kmp_task_t:  Size in bytes of kmp_task_t data structure including private vars accessed in task.
00828 // sizeof_shareds:  Size in bytes of array of pointers to shared vars accessed in task.
00829 // task_entry: Pointer to task code entry point generated by compiler.
00830 // returns: a pointer to the allocated kmp_task_t structure (task).
00831 
00832 kmp_task_t *
00833 __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
00834                   size_t sizeof_kmp_task_t, size_t sizeof_shareds,
00835                   kmp_routine_entry_t task_entry )
00836 {
00837     kmp_task_t *task;
00838     kmp_taskdata_t *taskdata;
00839     kmp_info_t *thread = __kmp_threads[ gtid ];
00840     kmp_team_t *team = thread->th.th_team;
00841     kmp_taskdata_t *parent_task = thread->th.th_current_task;
00842     size_t shareds_offset;
00843 
00844     KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
00845                   "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
00846                   gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
00847                   sizeof_shareds, task_entry) );
00848 
00849     if ( parent_task->td_flags.final ) {
00850         if (flags->merged_if0) {
00851         }
00852         flags->final = 1;
00853     }
00854 
00855     // Calculate shared structure offset including padding after kmp_task_t struct
00856     // to align pointers in shared struct
00857     shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
00858     shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
00859 
00860     // Allocate a kmp_taskdata_t block and a kmp_task_t block.
00861     KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
00862                   gtid, shareds_offset) );
00863     KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
00864                   gtid, sizeof_shareds) );
00865 
00866     // Avoid double allocation here by combining shareds with taskdata
00867     #if USE_FAST_MEMORY
00868     taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
00869     #else /* ! USE_FAST_MEMORY */
00870     taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
00871     #endif /* USE_FAST_MEMORY */
00872 
00873     task                      = KMP_TASKDATA_TO_TASK(taskdata);
00874 
00875     // Make sure task & taskdata are aligned appropriately
00876 #if KMP_ARCH_X86
00877     KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
00878     KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
00879 #else
00880     KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
00881     KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
00882 #endif
00883     if (sizeof_shareds > 0) {
00884         // Avoid double allocation here by combining shareds with taskdata
00885         task->shareds         = & ((char *) taskdata)[ shareds_offset ];
00886         // Make sure shareds struct is aligned to pointer size
00887         KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
00888     } else {
00889         task->shareds         = NULL;
00890     }
00891     task->routine             = task_entry;
00892     task->part_id             = 0;      // AC: Always start with 0 part id
00893 
00894     taskdata->td_task_id      = KMP_GEN_TASK_ID();
00895     taskdata->td_team         = team;
00896     taskdata->td_alloc_thread = thread; 
00897     taskdata->td_parent       = parent_task;
00898     taskdata->td_level        = parent_task->td_level + 1; // increment nesting level
00899     taskdata->td_ident        = loc_ref;
00900     taskdata->td_taskwait_ident   = NULL;
00901     taskdata->td_taskwait_counter = 0;
00902     taskdata->td_taskwait_thread  = 0;
00903     KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
00904     copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
00905 
00906     taskdata->td_flags.tiedness    = flags->tiedness;
00907     taskdata->td_flags.final       = flags->final;
00908     taskdata->td_flags.merged_if0  = flags->merged_if0;
00909     taskdata->td_flags.tasktype    = TASK_EXPLICIT;
00910 
00911     // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
00912     taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
00913 
00914     // GEH - TODO: fix this to copy parent task's value of team_serial flag
00915     taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
00916 
00917     // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
00918     //       tasks are not left until program termination to execute.  Also, it helps locality to execute
00919     //       immediately.
00920     taskdata->td_flags.task_serial = ( taskdata->td_flags.final
00921       || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
00922 
00923     taskdata->td_flags.started     = 0;
00924     taskdata->td_flags.executing   = 0;
00925     taskdata->td_flags.complete    = 0;
00926     taskdata->td_flags.freed       = 0;
00927 
00928     taskdata->td_flags.native      = flags->native;
00929 
00930     taskdata->td_incomplete_child_tasks = 0;
00931     taskdata->td_allocated_child_tasks  = 1; // start at one because counts current task and children
00932 #if OMP_40_ENABLED
00933     taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
00934 #endif
00935     // Only need to keep track of child task counts if team parallel and tasking not serialized
00936     if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
00937         KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
00938 #if OMP_40_ENABLED
00939         if ( parent_task->td_taskgroup )
00940             KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
00941 #endif
00942         // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
00943         if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
00944             KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
00945         }
00946     }
00947 
00948     KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
00949                   gtid, taskdata, taskdata->td_parent) );
00950 
00951     return task;
00952 }
00953 
00954 
00955 kmp_task_t *
00956 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
00957                        size_t sizeof_kmp_task_t, size_t sizeof_shareds,
00958                        kmp_routine_entry_t task_entry )
00959 {
00960     kmp_task_t *retval;
00961     kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
00962 
00963     input_flags->native = FALSE;
00964     // __kmp_task_alloc() sets up all other runtime flags
00965 
00966     KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
00967                   "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
00968                   gtid, loc_ref, input_flags->tiedness ? "tied  " : "untied",
00969                   sizeof_kmp_task_t, sizeof_shareds, task_entry) );
00970 
00971     retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
00972                                sizeof_shareds, task_entry );
00973 
00974     KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
00975 
00976     return retval;
00977 }
00978 
00979 //-----------------------------------------------------------
00980 //  __kmp_invoke_task: invoke the specified task
00981 //
00982 // gtid: global thread ID of caller
00983 // task: the task to invoke
00984 // current_task: the task to resume after task invokation
00985 
00986 static void
00987 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
00988 {
00989     kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
00990     KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
00991                   gtid, taskdata, current_task) );
00992 
00993     __kmp_task_start( gtid, task, current_task );
00994 
00995     //
00996     // Invoke the task routine and pass in relevant data.
00997     // Thunks generated by gcc take a different argument list.
00998     //
00999 #ifdef KMP_GOMP_COMPAT
01000     if (taskdata->td_flags.native) {
01001         ((void (*)(void *))(*(task->routine)))(task->shareds);
01002     }
01003     else
01004 #endif /* KMP_GOMP_COMPAT */
01005     {
01006         (*(task->routine))(gtid, task);
01007     }
01008 
01009     __kmp_task_finish( gtid, task, current_task );
01010 
01011     KA_TRACE(30, ("__kmp_inovke_task(exit): T#%d completed task %p, resuming task %p\n",
01012                   gtid, taskdata, current_task) );
01013     return;
01014 }
01015 
01016 //-----------------------------------------------------------------------
01017 // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
01018 //
01019 // loc_ref: location of original task pragma (ignored)
01020 // gtid: Global Thread ID of encountering thread
01021 // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
01022 // Returns:
01023 //    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
01024 //    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
01025 
01026 kmp_int32
01027 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
01028 {
01029     kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
01030 
01031     KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
01032                   gtid, loc_ref, new_taskdata ) );
01033 
01034     /* Should we execute the new task or queue it?   For now, let's just always try to
01035        queue it.  If the queue fills up, then we'll execute it.  */
01036 
01037     if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
01038     {                                                           // Execute this task immediately
01039         kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
01040         new_taskdata->td_flags.task_serial = 1;
01041         __kmp_invoke_task( gtid, new_task, current_task );
01042     }
01043 
01044     KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
01045                   "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
01046                   new_taskdata ) );
01047 
01048     return TASK_CURRENT_NOT_QUEUED;
01049 }
01050 
01051 
01052 //---------------------------------------------------------------------
01053 // __kmpc_omp_task: Schedule a non-thread-switchable task for execution
01054 // loc_ref: location of original task pragma (ignored)
01055 // gtid: Global Thread ID of encountering thread
01056 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
01057 // returns:
01058 //
01059 //    TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
01060 //    TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
01061 
01062 kmp_int32
01063 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
01064 {
01065     kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
01066     kmp_int32 rc;
01067 
01068     KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
01069                   gtid, loc_ref, new_taskdata ) );
01070 
01071     /* Should we execute the new task or queue it?   For now, let's just always try to
01072        queue it.  If the queue fills up, then we'll execute it.  */
01073 
01074     if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
01075     {                                                           // Execute this task immediately
01076         kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
01077         new_taskdata -> td_flags.task_serial = 1;
01078         __kmp_invoke_task( gtid, new_task, current_task );
01079     }
01080 
01081     KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
01082                   gtid, loc_ref, new_taskdata ) );
01083 
01084     return TASK_CURRENT_NOT_QUEUED;
01085 }
01086 
01087 
01088 //-------------------------------------------------------------------------------------
01089 // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
01090 
01091 kmp_int32
01092 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
01093 {
01094     kmp_taskdata_t * taskdata;
01095     kmp_info_t * thread;
01096     int thread_finished = FALSE;
01097 
01098     KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
01099                   gtid, loc_ref) );
01100 
01101     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
01102         // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
01103 
01104         thread = __kmp_threads[ gtid ];
01105         taskdata = thread -> th.th_current_task;
01106         taskdata->td_taskwait_counter += 1;
01107         taskdata->td_taskwait_ident    = loc_ref;
01108         taskdata->td_taskwait_thread   = gtid + 1;
01109 
01110 
01111         if ( ! taskdata->td_flags.team_serial ) {
01112             // GEH: if team serialized, avoid reading the volatile variable below.
01113             while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
01114                 __kmp_execute_tasks( thread, gtid, &(taskdata->td_incomplete_child_tasks),
01115                                      0, FALSE, &thread_finished, 
01116                                      __kmp_task_stealing_constraint );
01117             }
01118         }
01119 
01120         // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
01121         taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
01122     }
01123 
01124     KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
01125                   "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
01126 
01127     return TASK_CURRENT_NOT_QUEUED;
01128 }
01129 
01130 
01131 //-------------------------------------------------
01132 // __kmpc_omp_taskyield: switch to a different task
01133 
01134 kmp_int32
01135 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
01136 {
01137     kmp_taskdata_t * taskdata;
01138     kmp_info_t * thread;
01139     int thread_finished = FALSE;
01140 
01141     KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
01142                   gtid, loc_ref, end_part) );
01143 
01144     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
01145         // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
01146 
01147         thread = __kmp_threads[ gtid ];
01148         taskdata = thread -> th.th_current_task;
01149         // Should we model this as a task wait or not?
01150         taskdata->td_taskwait_counter += 1;
01151         taskdata->td_taskwait_ident    = loc_ref;
01152         taskdata->td_taskwait_thread   = gtid + 1;
01153 
01154         if ( ! taskdata->td_flags.team_serial ) {
01155             __kmp_execute_tasks( thread, gtid, NULL, 0, FALSE, &thread_finished,
01156                                  __kmp_task_stealing_constraint );
01157         }
01158 
01159 
01160         // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
01161         taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
01162     }
01163 
01164     KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
01165                   "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
01166 
01167     return TASK_CURRENT_NOT_QUEUED;
01168 }
01169 
01170 
01171 #if OMP_40_ENABLED
01172 //-------------------------------------------------------------------------------------
01173 // __kmpc_taskgroup: Start a new taskgroup
01174 
01175 void
01176 __kmpc_taskgroup( ident* loc, int gtid )
01177 {
01178     kmp_info_t      * thread = __kmp_threads[ gtid ];
01179     kmp_taskdata_t  * taskdata = thread->th.th_current_task;
01180     kmp_taskgroup_t * tg_new =
01181         (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
01182     KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
01183     tg_new->count = 0;
01184     tg_new->parent = taskdata->td_taskgroup;
01185     taskdata->td_taskgroup = tg_new;
01186 }
01187 
01188 
01189 //-------------------------------------------------------------------------------------
01190 // __kmpc_end_taskgroup: Wait until all tasks generated by the current task
01191 //                       and its descendants are complete
01192 
01193 void
01194 __kmpc_end_taskgroup( ident* loc, int gtid )
01195 {
01196     kmp_info_t      * thread = __kmp_threads[ gtid ];
01197     kmp_taskdata_t  * taskdata = thread->th.th_current_task;
01198     kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
01199     int thread_finished = FALSE;
01200 
01201     KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
01202     KMP_DEBUG_ASSERT( taskgroup != NULL );
01203 
01204     if ( __kmp_tasking_mode != tskm_immediate_exec ) {
01205 
01206         if ( ! taskdata->td_flags.team_serial ) {
01207             while ( TCR_4(taskgroup->count) != 0 ) {
01208                 __kmp_execute_tasks( thread, gtid, &(taskgroup->count),
01209                                      0, FALSE, &thread_finished, 
01210                                      __kmp_task_stealing_constraint );
01211             }
01212         }
01213 
01214     }
01215     KMP_DEBUG_ASSERT( taskgroup->count == 0 );
01216 
01217     // Restore parent taskgroup for the current task
01218     taskdata->td_taskgroup = taskgroup->parent;
01219     __kmp_thread_free( thread, taskgroup );
01220 
01221     KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
01222 }
01223 #endif
01224 
01225 
01226 //------------------------------------------------------
01227 // __kmp_remove_my_task: remove a task from my own deque
01228 
01229 static kmp_task_t *
01230 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
01231                       kmp_int32 is_constrained )
01232 {
01233     kmp_task_t * task;
01234     kmp_taskdata_t * taskdata;
01235     kmp_thread_data_t *thread_data;
01236     kmp_uint32 tail;
01237 
01238     KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
01239     KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
01240 
01241         thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
01242 
01243     KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
01244                   gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
01245                   thread_data->td.td_deque_tail) );
01246 
01247     if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
01248         KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
01249                       gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
01250                       thread_data->td.td_deque_tail) );
01251         return NULL;
01252     }
01253 
01254     __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
01255 
01256     if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
01257         __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
01258         KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
01259                       gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
01260                       thread_data->td.td_deque_tail) );
01261         return NULL;
01262     }
01263 
01264     tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;  // Wrap index.
01265     taskdata = thread_data -> td.td_deque[ tail ];
01266 
01267     if (is_constrained) {
01268         // we need to check if the candidate obeys task scheduling constraint:
01269         // only child of current task can be scheduled
01270         kmp_taskdata_t * current = thread->th.th_current_task;
01271         kmp_int32        level = current->td_level;
01272         kmp_taskdata_t * parent = taskdata->td_parent;
01273         while ( parent != current && parent->td_level > level ) {
01274             parent = parent->td_parent;  // check generation up to the level of the current task
01275             KMP_DEBUG_ASSERT(parent != NULL);
01276         }
01277         if ( parent != current ) {
01278             // If the tail task is not a child, then no other childs can appear in the deque.
01279             __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
01280             KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
01281                           gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
01282                           thread_data->td.td_deque_tail) );
01283             return NULL;
01284         }
01285     }
01286 
01287     thread_data -> td.td_deque_tail = tail;
01288     TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
01289 
01290     __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
01291 
01292     KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
01293                   gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
01294                   thread_data->td.td_deque_tail) );
01295 
01296     task = KMP_TASKDATA_TO_TASK( taskdata );
01297     return task;
01298 }
01299 
01300 
01301 //-----------------------------------------------------------
01302 // __kmp_steal_task: remove a task from another thread's deque
01303 // Assume that calling thread has already checked existence of
01304 // task_team thread_data before calling this routine.
01305 
01306 static kmp_task_t *
01307 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
01308                   volatile kmp_uint32 *unfinished_threads, int *thread_finished,
01309                   kmp_int32 is_constrained )
01310 {
01311     kmp_task_t * task;
01312     kmp_taskdata_t * taskdata;
01313     kmp_thread_data_t *victim_td, *threads_data;
01314     kmp_int32 victim_tid, thread_tid;
01315 
01316     KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
01317 
01318     threads_data = task_team -> tt.tt_threads_data;
01319     KMP_DEBUG_ASSERT( threads_data != NULL );  // Caller should check this condition
01320 
01321     victim_tid = victim->th.th_info.ds.ds_tid;
01322     victim_td = & threads_data[ victim_tid ];
01323 
01324     KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
01325                   "head=%u tail=%u\n",
01326                   gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
01327                   victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
01328 
01329     if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
01330          (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
01331     {
01332         KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
01333                       "ntasks=%d head=%u tail=%u\n",
01334                       gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
01335                       victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
01336         return NULL;
01337     }
01338 
01339     __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
01340 
01341     // Check again after we acquire the lock
01342     if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
01343          (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
01344     {
01345         __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
01346         KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
01347                       "ntasks=%d head=%u tail=%u\n",
01348                       gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
01349                       victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
01350         return NULL;
01351     }
01352 
01353     KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
01354 
01355     if ( !is_constrained ) {
01356         taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
01357         // Bump head pointer and Wrap.
01358         victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
01359     } else {
01360         // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
01361         kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;  // Wrap index.
01362         taskdata = victim_td -> td.td_deque[ tail ];
01363         // we need to check if the candidate obeys task scheduling constraint:
01364         // only child of current task can be scheduled
01365         kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
01366         kmp_int32        level = current->td_level;
01367         kmp_taskdata_t * parent = taskdata->td_parent;
01368         while ( parent != current && parent->td_level > level ) {
01369             parent = parent->td_parent;  // check generation up to the level of the current task
01370             KMP_DEBUG_ASSERT(parent != NULL);
01371         }
01372         if ( parent != current ) {
01373             // If the tail task is not a child, then no other childs can appear in the deque (?).
01374             __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
01375             KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
01376                           "ntasks=%d head=%u tail=%u\n",
01377                           gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
01378                           task_team, victim_td->td.td_deque_ntasks,
01379                           victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
01380             return NULL;
01381         }
01382         victim_td -> td.td_deque_tail = tail;
01383     }
01384     if (*thread_finished) {
01385         // We need to un-mark this victim as a finished victim.  This must be done before
01386         // releasing the lock, or else other threads (starting with the master victim)
01387         // might be prematurely released from the barrier!!!
01388         kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
01389 
01390         KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
01391                       gtid, count + 1, task_team) );
01392 
01393         *thread_finished = FALSE;
01394     }
01395     TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
01396 
01397     __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
01398 
01399     KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#d: task_team=%p "
01400                   "ntasks=%d head=%u tail=%u\n",
01401                   gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
01402                   victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
01403                   victim_td->td.td_deque_tail) );
01404 
01405     task = KMP_TASKDATA_TO_TASK( taskdata );
01406     return task;
01407 }
01408 
01409 
01410 //-----------------------------------------------------------------------------
01411 // __kmp_execute_tasks: Choose and execute tasks until either the condition
01412 // is statisfied (return true) or there are none left (return false).
01413 // final_spin is TRUE if this is the spin at the release barrier.
01414 // thread_finished indicates whether the thread is finished executing all
01415 // the tasks it has on its deque, and is at the release barrier.
01416 // spinner is the location on which to spin.
01417 // spinner == NULL means only execute a single task and return.
01418 // checker is the value to check to terminate the spin.
01419 
01420 int
01421 __kmp_execute_tasks( kmp_info_t *thread, 
01422                      kmp_int32 gtid, 
01423                      volatile kmp_uint *spinner,
01424                      kmp_uint checker,
01425                      int final_spin, 
01426                      int *thread_finished, 
01427                      kmp_int32 is_constrained )
01428 {
01429     kmp_task_team_t *     task_team;
01430     kmp_team_t *          team;
01431     kmp_thread_data_t *   threads_data;
01432     kmp_task_t *          task;
01433     kmp_taskdata_t *      current_task = thread -> th.th_current_task;
01434     volatile kmp_uint32 * unfinished_threads;
01435     kmp_int32             nthreads, last_stolen, k, tid;
01436 
01437     KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
01438     KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
01439 
01440     task_team = thread -> th.th_task_team;
01441     KMP_DEBUG_ASSERT( task_team != NULL );
01442 
01443     KA_TRACE(15, ("__kmp_execute_tasks(enter): T#%d final_spin=%d *thread_finished=%d\n",
01444                   gtid, final_spin, *thread_finished) );
01445 
01446     threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
01447     KMP_DEBUG_ASSERT( threads_data != NULL );
01448 
01449     nthreads = task_team -> tt.tt_nproc;
01450     unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
01451     KMP_DEBUG_ASSERT( nthreads > 1 );
01452     KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
01453 
01454     // Choose tasks from our own work queue.
01455     start:
01456     while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
01457         __kmp_invoke_task( gtid, task, current_task );
01458 
01459         // If this thread is only partway through the barrier and the condition
01460         // is met, then return now, so that the barrier gather/release pattern can proceed.
01461         // If this thread is in the last spin loop in the barrier, waiting to be
01462         // released, we know that the termination condition will not be satisified,
01463         // so don't waste any cycles checking it.
01464         if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
01465             KA_TRACE(15, ("__kmp_execute_tasks(exit #1): T#%d spin condition satisfied\n", gtid) );
01466             return TRUE;
01467         }
01468         KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task
01469     }
01470 
01471     // This thread's work queue is empty.  If we are in the final spin loop
01472     // of the barrier, check and see if the termination condition is satisfied.
01473     if (final_spin) {
01474         // First, decrement the #unfinished threads, if that has not already
01475         // been done.  This decrement might be to the spin location, and
01476         // result in the termination condition being satisfied.
01477         if (! *thread_finished) {
01478             kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
01479             KA_TRACE(20, ("__kmp_execute_tasks(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
01480                           gtid, count, task_team) );
01481             *thread_finished = TRUE;
01482         }
01483 
01484         // It is now unsafe to reference thread->th.th_team !!!
01485         // Decrementing task_team->tt.tt_unfinished_threads can allow the master
01486         // thread to pass through the barrier, where it might reset each thread's
01487         // th.th_team field for the next parallel region.
01488         // If we can steal more work, we know that this has not happened yet.
01489         if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
01490             KA_TRACE(15, ("__kmp_execute_tasks(exit #2): T#%d spin condition satisfied\n", gtid) );
01491             return TRUE;
01492         }
01493     }
01494 
01495     // Try to steal from the last place I stole from successfully.
01496     tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
01497     last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
01498 
01499     if (last_stolen != -1) {
01500         kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
01501 
01502         while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
01503                                          thread_finished, is_constrained )) != NULL)
01504         {
01505             __kmp_invoke_task( gtid, task, current_task );
01506 
01507             // Check to see if this thread can proceed.
01508             if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
01509                 KA_TRACE(15, ("__kmp_execute_tasks(exit #3): T#%d spin condition satisfied\n",
01510                               gtid) );
01511                 return TRUE;
01512             }
01513 
01514             KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task
01515             // If the execution of the stolen task resulted in more tasks being
01516             // placed on our run queue, then restart the whole process.
01517             if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
01518                 KA_TRACE(20, ("__kmp_execute_tasks: T#%d stolen task spawned other tasks, restart\n",
01519                               gtid) );
01520                 goto start;
01521             }
01522         }
01523 
01524         // Don't give priority to stealing from this thread anymore.
01525         threads_data[ tid ].td.td_deque_last_stolen = -1;
01526 
01527         // The victims's work queue is empty.  If we are in the final spin loop
01528         // of the barrier, check and see if the termination condition is satisfied.
01529         if (final_spin) {
01530             // First, decrement the #unfinished threads, if that has not already
01531             // been done.  This decrement might be to the spin location, and
01532             // result in the termination condition being satisfied.
01533             if (! *thread_finished) {
01534                 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
01535                 KA_TRACE(20, ("__kmp_execute_tasks(dec #2): T#%d dec unfinished_threads to %d "
01536                               "task_team=%p\n", gtid, count, task_team) );
01537                 *thread_finished = TRUE;
01538             }
01539 
01540             // If __kmp_tasking_mode != tskm_immediate_exec
01541             // then it is now unsafe to reference thread->th.th_team !!!
01542             // Decrementing task_team->tt.tt_unfinished_threads can allow the master
01543             // thread to pass through the barrier, where it might reset each thread's
01544             // th.th_team field for the next parallel region.
01545             // If we can steal more work, we know that this has not happened yet.
01546             if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
01547                 KA_TRACE(15, ("__kmp_execute_tasks(exit #4): T#%d spin condition satisfied\n",
01548                               gtid) );
01549                 return TRUE;
01550             }
01551         }
01552     }
01553 
01554     // Find a different thread to steal work from.  Pick a random thread.
01555     // My initial plan was to cycle through all the threads, and only return
01556     // if we tried to steal from every thread, and failed.  Arch says that's
01557     // not such a great idea.
01558     // GEH - need yield code in this loop for throughput library mode?
01559     new_victim:
01560     k = __kmp_get_random( thread ) % (nthreads - 1);
01561     if ( k >= thread -> th.th_info.ds.ds_tid ) {
01562         ++k;               // Adjusts random distribution to exclude self
01563     }
01564     {
01565         kmp_info_t *other_thread = threads_data[k].td.td_thr;
01566         int first;
01567 
01568         // There is a slight chance that __kmp_enable_tasking() did not wake up
01569         // all threads waiting at the barrier.  If this thread is sleeping, then
01570         // then wake it up.  Since we weree going to pay the cache miss penalty
01571         // for referenceing another thread's kmp_info_t struct anyway, the check
01572         // shouldn't cost too much performance at this point.
01573         // In extra barrier mode, tasks do not sleep at the separate tasking
01574         // barrier, so this isn't a problem.
01575         if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
01576              (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
01577              (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
01578         {
01579             __kmp_resume( __kmp_gtid_from_thread( other_thread ), NULL );
01580 
01581             // A sleeping thread should not have any tasks on it's queue.
01582             // There is a slight possiblility that it resumes, steals a task from
01583             // another thread, which spawns more tasks, all in the that it takes
01584             // this thread to check => don't write an assertion that the victim's
01585             // queue is empty.  Try stealing from a different thread.
01586             goto new_victim;
01587         }
01588 
01589         // Now try to steal work from the selected thread
01590         first = TRUE;
01591         while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
01592                                          thread_finished, is_constrained )) != NULL)
01593         {
01594             __kmp_invoke_task( gtid, task, current_task );
01595 
01596             // Try stealing from this victim again, in the future.
01597             if (first) {
01598                 threads_data[ tid ].td.td_deque_last_stolen = k;
01599                 first = FALSE;
01600             }
01601 
01602             // Check to see if this thread can proceed.
01603             if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
01604                 KA_TRACE(15, ("__kmp_execute_tasks(exit #5): T#%d spin condition satisfied\n",
01605                               gtid) );
01606                 return TRUE;
01607             }
01608             KMP_YIELD( __kmp_library == library_throughput );   // Yield before executing next task
01609 
01610             // If the execution of the stolen task resulted in more tasks being
01611             // placed on our run queue, then restart the whole process.
01612             if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
01613                 KA_TRACE(20, ("__kmp_execute_tasks: T#%d stolen task spawned other tasks, restart\n",
01614                               gtid) );
01615                 goto start;
01616             }
01617         }
01618 
01619         // The victims's work queue is empty.  If we are in the final spin loop
01620         // of the barrier, check and see if the termination condition is satisfied.
01621         // Going on and finding a new victim to steal from is expensive, as it
01622         // involves a lot of cache misses, so we definitely want to re-check the
01623         // termination condition before doing that.
01624         if (final_spin) {
01625             // First, decrement the #unfinished threads, if that has not already
01626             // been done.  This decrement might be to the spin location, and
01627             // result in the termination condition being satisfied.
01628             if (! *thread_finished) {
01629                 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
01630                 KA_TRACE(20, ("__kmp_execute_tasks(dec #3): T#%d dec unfinished_threads to %d; "
01631                               "task_team=%p\n",
01632                               gtid, count, task_team) );
01633                 *thread_finished = TRUE;
01634             }
01635 
01636             // If __kmp_tasking_mode != tskm_immediate_exec,
01637             // then it is now unsafe to reference thread->th.th_team !!!
01638             // Decrementing task_team->tt.tt_unfinished_threads can allow the master
01639             // thread to pass through the barrier, where it might reset each thread's
01640             // th.th_team field for the next parallel region.
01641             // If we can steal more work, we know that this has not happened yet.
01642             if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
01643                 KA_TRACE(15, ("__kmp_execute_tasks(exit #6): T#%d spin condition satisfied\n",
01644                               gtid) );
01645                 return TRUE;
01646             }
01647         }
01648     }
01649 
01650     KA_TRACE(15, ("__kmp_execute_tasks(exit #7): T#%d can't find work\n", gtid) );
01651     return FALSE;
01652 }
01653 
01654 
01655 //-----------------------------------------------------------------------------
01656 // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
01657 // next barrier so they can assist in executing enqueued tasks.
01658 // First thread in allocates the task team atomically.
01659 
01660 static void
01661 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
01662 {
01663     kmp_team_t *team = this_thr->th.th_team;
01664     kmp_thread_data_t *threads_data;
01665     int nthreads, i, is_init_thread;
01666 
01667     KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
01668                     __kmp_gtid_from_thread( this_thr ) ) );
01669 
01670     KMP_DEBUG_ASSERT(task_team != NULL);
01671     KMP_DEBUG_ASSERT(team != NULL);
01672 
01673     nthreads = task_team->tt.tt_nproc;
01674     KMP_DEBUG_ASSERT(nthreads > 0);
01675     KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
01676 
01677     // Allocate or increase the size of threads_data if necessary
01678     is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
01679 
01680     if (!is_init_thread) {
01681         // Some other thread already set up the array.
01682         KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
01683                         __kmp_gtid_from_thread( this_thr ) ) );
01684         return;
01685     }
01686     threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
01687     KMP_DEBUG_ASSERT( threads_data != NULL );
01688 
01689     if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
01690          ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
01691     {
01692         // Release any threads sleeping at the barrier, so that they can steal
01693         // tasks and execute them.  In extra barrier mode, tasks do not sleep
01694         // at the separate tasking barrier, so this isn't a problem.
01695         for (i = 0; i < nthreads; i++) {
01696             volatile kmp_uint *sleep_loc;
01697             kmp_info_t *thread = threads_data[i].td.td_thr;
01698 
01699             if (i == this_thr->th.th_info.ds.ds_tid) {
01700                 continue;
01701             }
01702             // Since we haven't locked the thread's suspend mutex lock at this
01703             // point, there is a small window where a thread might be putting
01704             // itself to sleep, but hasn't set the th_sleep_loc field yet.
01705             // To work around this, __kmp_execute_tasks() periodically checks
01706             // see if other threads are sleeping (using the same random
01707             // mechanism that is used for task stealing) and awakens them if
01708             // they are.
01709             if ( ( sleep_loc =  (volatile kmp_uint *)
01710                                 TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
01711             {
01712                 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
01713                                  __kmp_gtid_from_thread( this_thr ),
01714                                  __kmp_gtid_from_thread( thread ) ) );
01715                 __kmp_resume( __kmp_gtid_from_thread( thread ), sleep_loc );
01716             }
01717             else {
01718                 KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
01719                                  __kmp_gtid_from_thread( this_thr ),
01720                                  __kmp_gtid_from_thread( thread ) ) );
01721             }
01722         }
01723     }
01724 
01725     KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
01726                     __kmp_gtid_from_thread( this_thr ) ) );
01727 }
01728 
01729 
01730 /* ------------------------------------------------------------------------ */
01731 /*
01732  * Utility routines for "task teams".  A task team (kmp_task_t) is kind of
01733  * like a shadow of the kmp_team_t data struct, with a different lifetime.
01734  * After a child * thread checks into a barrier and calls __kmp_release() from
01735  * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
01736  * longer assume that the kmp_team_t structure is intact (at any moment, the
01737  * master thread may exit the barrier code and free the team data structure,
01738  * and return the threads to the thread pool).
01739  *
01740  * This does not work with the the tasking code, as the thread is still
01741  * expected to participate in the execution of any tasks that may have been
01742  * spawned my a member of the team, and the thread still needs access to all
01743  * to each thread in the team, so that it can steal work from it.
01744  *
01745  * Enter the existence of the kmp_task_team_t struct.  It employs a reference
01746  * counting mechanims, and is allocated by the master thread before calling
01747  * __kmp_<barrier_kind>_release, and then is release by the last thread to
01748  * exit __kmp_<barrier_kind>_release at the next barrier.  I.e. the lifetimes
01749  * of the kmp_task_team_t structs for consecutive barriers can overlap
01750  * (and will, unless the master thread is the last thread to exit the barrier
01751  * release phase, which is not typical).
01752  *
01753  * The existence of such a struct is useful outside the context of tasking,
01754  * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
01755  * so that any performance differences show up when comparing the 2.5 vs. 3.0
01756  * libraries.
01757  *
01758  * We currently use the existence of the threads array as an indicator that
01759  * tasks were spawned since the last barrier.  If the structure is to be
01760  * useful outside the context of tasking, then this will have to change, but
01761  * not settting the field minimizes the performance impact of tasking on
01762  * barriers, when no explicit tasks were spawned (pushed, actually).
01763  */
01764 
01765 static kmp_task_team_t *__kmp_free_task_teams = NULL;           // Free list for task_team data structures
01766 // Lock for task team data structures
01767 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
01768 
01769 
01770 //------------------------------------------------------------------------------
01771 // __kmp_alloc_task_deque:
01772 // Allocates a task deque for a particular thread, and initialize the necessary
01773 // data structures relating to the deque.  This only happens once per thread
01774 // per task team since task teams are recycled.
01775 // No lock is needed during allocation since each thread allocates its own
01776 // deque.
01777 
01778 static void
01779 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
01780 {
01781     __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
01782     KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
01783 
01784     // Initialize last stolen task field to "none"
01785     thread_data -> td.td_deque_last_stolen = -1;
01786 
01787     KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
01788     KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
01789     KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
01790 
01791     KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
01792                    __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
01793     // Allocate space for task deque, and zero the deque
01794     // Cannot use __kmp_thread_calloc() because threads not around for
01795     // kmp_reap_task_team( ).
01796     thread_data -> td.td_deque = (kmp_taskdata_t **)
01797             __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
01798 }
01799 
01800 
01801 //------------------------------------------------------------------------------
01802 // __kmp_free_task_deque:
01803 // Deallocates a task deque for a particular thread.
01804 // Happens at library deallocation so don't need to reset all thread data fields.
01805 
01806 static void
01807 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
01808 {
01809     __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
01810 
01811     if ( thread_data -> td.td_deque != NULL ) {
01812         TCW_4(thread_data -> td.td_deque_ntasks, 0);
01813          __kmp_free( thread_data -> td.td_deque );
01814         thread_data -> td.td_deque = NULL;
01815     }
01816     __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
01817 
01818 #ifdef BUILD_TIED_TASK_STACK
01819     // GEH: Figure out what to do here for td_susp_tied_tasks
01820     if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
01821         __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
01822     }
01823 #endif // BUILD_TIED_TASK_STACK
01824 }
01825 
01826 
01827 //------------------------------------------------------------------------------
01828 // __kmp_realloc_task_threads_data:
01829 // Allocates a threads_data array for a task team, either by allocating an initial
01830 // array or enlarging an existing array.  Only the first thread to get the lock
01831 // allocs or enlarges the array and re-initializes the array eleemnts.
01832 // That thread returns "TRUE", the rest return "FALSE".
01833 // Assumes that the new array size is given by task_team -> tt.tt_nproc.
01834 // The current size is given by task_team -> tt.tt_max_threads.
01835 
01836 static int
01837 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
01838 {
01839     kmp_thread_data_t ** threads_data_p;
01840     kmp_int32            nthreads, maxthreads;
01841     int                  is_init_thread = FALSE;
01842 
01843     if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
01844         // Already reallocated and initialized.
01845         return FALSE;
01846     }
01847 
01848     threads_data_p = & task_team -> tt.tt_threads_data;
01849     nthreads   = task_team -> tt.tt_nproc;
01850     maxthreads = task_team -> tt.tt_max_threads;
01851 
01852     // All threads must lock when they encounter the first task of the implicit task
01853     // region to make sure threads_data fields are (re)initialized before used.
01854     __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
01855 
01856     if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
01857         // first thread to enable tasking
01858         kmp_team_t *team = thread -> th.th_team;
01859         int i;
01860 
01861         is_init_thread = TRUE;
01862         if ( maxthreads < nthreads ) {
01863 
01864             if ( *threads_data_p != NULL ) {
01865                 kmp_thread_data_t *old_data = *threads_data_p;
01866                 kmp_thread_data_t *new_data = NULL;
01867 
01868                 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
01869                                "threads data for task_team %p, new_size = %d, old_size = %d\n",
01870                                __kmp_gtid_from_thread( thread ), task_team,
01871                                nthreads, maxthreads ) );
01872                 // Reallocate threads_data to have more elements than current array
01873                 // Cannot use __kmp_thread_realloc() because threads not around for
01874                 // kmp_reap_task_team( ).  Note all new array entries are initialized
01875                 // to zero by __kmp_allocate().
01876                 new_data = (kmp_thread_data_t *)
01877                             __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
01878                 // copy old data to new data
01879                 memcpy( (void *) new_data, (void *) old_data,
01880                         maxthreads * sizeof(kmp_taskdata_t *) );
01881 
01882 #ifdef BUILD_TIED_TASK_STACK
01883                 // GEH: Figure out if this is the right thing to do
01884                 for (i = maxthreads; i < nthreads; i++) {
01885                     kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
01886                     __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
01887                 }
01888 #endif // BUILD_TIED_TASK_STACK
01889                 // Install the new data and free the old data
01890                 (*threads_data_p) = new_data;
01891                 __kmp_free( old_data );
01892             }
01893             else {
01894                 KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
01895                                "threads data for task_team %p, size = %d\n",
01896                                __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
01897                 // Make the initial allocate for threads_data array, and zero entries
01898                 // Cannot use __kmp_thread_calloc() because threads not around for
01899                 // kmp_reap_task_team( ).
01900                 *threads_data_p = (kmp_thread_data_t *)
01901                                   __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
01902 #ifdef BUILD_TIED_TASK_STACK
01903                 // GEH: Figure out if this is the right thing to do
01904                 for (i = 0; i < nthreads; i++) {
01905                     kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
01906                     __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
01907                 }
01908 #endif // BUILD_TIED_TASK_STACK
01909             }
01910             task_team -> tt.tt_max_threads = nthreads;
01911         }
01912         else {
01913             // If array has (more than) enough elements, go ahead and use it
01914             KMP_DEBUG_ASSERT( *threads_data_p != NULL );
01915         }
01916 
01917         // initialize threads_data pointers back to thread_info structures
01918         for (i = 0; i < nthreads; i++) {
01919             kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
01920             thread_data -> td.td_thr = team -> t.t_threads[i];
01921 
01922             if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
01923                 // The last stolen field survives across teams / barrier, and the number
01924                 // of threads may have changed.  It's possible (likely?) that a new
01925                 // parallel region will exhibit the same behavior as the previous region.
01926                 thread_data -> td.td_deque_last_stolen = -1;
01927             }
01928         }
01929 
01930         KMP_MB();
01931         TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
01932     }
01933 
01934     __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
01935     return is_init_thread;
01936 }
01937 
01938 
01939 //------------------------------------------------------------------------------
01940 // __kmp_free_task_threads_data:
01941 // Deallocates a threads_data array for a task team, including any attached
01942 // tasking deques.  Only occurs at library shutdown.
01943 
01944 static void
01945 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
01946 {
01947     __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
01948     if ( task_team -> tt.tt_threads_data != NULL ) {
01949         int i;
01950         for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
01951             __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
01952         }
01953         __kmp_free( task_team -> tt.tt_threads_data );
01954         task_team -> tt.tt_threads_data = NULL;
01955     }
01956     __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
01957 }
01958 
01959 
01960 //------------------------------------------------------------------------------
01961 // __kmp_allocate_task_team:
01962 // Allocates a task team associated with a specific team, taking it from
01963 // the global task team free list if possible.  Also initializes data structures.
01964 
01965 static kmp_task_team_t *
01966 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
01967 {
01968     kmp_task_team_t *task_team = NULL;
01969     int nthreads;
01970 
01971     KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
01972                     (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
01973 
01974     if (TCR_PTR(__kmp_free_task_teams) != NULL) {
01975         // Take a task team from the task team pool
01976         __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
01977         if (__kmp_free_task_teams != NULL) {
01978             task_team = __kmp_free_task_teams;
01979             TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
01980             task_team -> tt.tt_next = NULL;
01981         }
01982         __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
01983     }
01984 
01985     if (task_team == NULL) {
01986         KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
01987                        "task team for team %p\n",
01988                        __kmp_gtid_from_thread( thread ), team ) );
01989         // Allocate a new task team if one is not available.
01990         // Cannot use __kmp_thread_malloc() because threads not around for
01991         // kmp_reap_task_team( ).
01992         task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
01993         __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
01994         //task_team -> tt.tt_threads_data = NULL;   // AC: __kmp_allocate zeroes returned memory
01995         //task_team -> tt.tt_max_threads = 0;
01996         //task_team -> tt.tt_next = NULL;
01997     }
01998 
01999     TCW_4(task_team -> tt.tt_found_tasks, FALSE);
02000     task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
02001 
02002     task_team -> tt.tt_state = 0;
02003     TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
02004     TCW_4( task_team -> tt.tt_active, TRUE );
02005     TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
02006 
02007     KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
02008                     (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
02009     return task_team;
02010 }
02011 
02012 
02013 //------------------------------------------------------------------------------
02014 // __kmp_free_task_team:
02015 // Frees the task team associated with a specific thread, and adds it
02016 // to the global task team free list.
02017 //
02018 
02019 static void
02020 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
02021 {
02022     KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
02023                     thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
02024 
02025     KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
02026 
02027     // Put task team back on free list
02028     __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
02029 
02030     KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
02031     task_team -> tt.tt_next = __kmp_free_task_teams;
02032     TCW_4(task_team -> tt.tt_found_tasks, FALSE);
02033     TCW_PTR(__kmp_free_task_teams, task_team);
02034 
02035     __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
02036 }
02037 
02038 
02039 //------------------------------------------------------------------------------
02040 // __kmp_reap_task_teams:
02041 // Free all the task teams on the task team free list.
02042 // Should only be done during library shutdown.
02043 // Cannot do anything that needs a thread structure or gtid since they are already gone.
02044 
02045 void
02046 __kmp_reap_task_teams( void )
02047 {
02048     kmp_task_team_t   *task_team;
02049 
02050     if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
02051         // Free all task_teams on the free list
02052         __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
02053         while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
02054             __kmp_free_task_teams = task_team -> tt.tt_next;
02055             task_team -> tt.tt_next = NULL;
02056 
02057             // Free threads_data if necessary
02058             if ( task_team -> tt.tt_threads_data != NULL ) {
02059                 __kmp_free_task_threads_data( task_team );
02060             }
02061             __kmp_free( task_team );
02062         }
02063         __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
02064     }
02065 }
02066 
02067 
02068 //------------------------------------------------------------------------------
02069 // __kmp_unref_task_teams:
02070 // Remove one thread from referencing the task team structure by
02071 // decreasing the reference count and deallocate task team if no more
02072 // references to it.
02073 //
02074 void
02075 __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
02076 {
02077     kmp_uint ref_ct;
02078 
02079     ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
02080 
02081     KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
02082                     __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
02083 
02084 
02085     if ( ref_ct == 0 ) {
02086         __kmp_free_task_team( thread, task_team );
02087     }
02088 
02089     TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
02090 }
02091 
02092 
02093 //------------------------------------------------------------------------------
02094 // __kmp_wait_to_unref_task_teams:
02095 // Some threads could still be in the fork barrier release code, possibly
02096 // trying to steal tasks.  Wait for each thread to unreference its task team.
02097 //
02098 void
02099 __kmp_wait_to_unref_task_teams(void)
02100 {
02101     kmp_info_t *thread;
02102     kmp_uint32 spins;
02103     int done;
02104 
02105     KMP_INIT_YIELD( spins );
02106 
02107 
02108     for (;;) {
02109         done = TRUE;
02110 
02111         // TODO: GEH - this may be is wrong because some sync would be necessary
02112         //             in case threads are added to the pool during the traversal.
02113         //             Need to verify that lock for thread pool is held when calling
02114         //             this routine.
02115         for (thread = (kmp_info_t *)__kmp_thread_pool;
02116              thread != NULL;
02117              thread = thread->th.th_next_pool)
02118         {
02119             volatile kmp_uint *sleep_loc;
02120 #if KMP_OS_WINDOWS
02121             DWORD exit_val;
02122 #endif
02123             if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
02124                 KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
02125                                __kmp_gtid_from_thread( thread ) ) );
02126                 continue;
02127             }
02128 #if KMP_OS_WINDOWS
02129             // TODO: GEH - add this check for Linux* OS / OS X* as well?
02130             if (!__kmp_is_thread_alive(thread, &exit_val)) {
02131                 if (TCR_PTR(thread->th.th_task_team) != NULL) {
02132                     __kmp_unref_task_team( thread->th.th_task_team, thread );
02133                 }
02134                 continue;
02135             }
02136 #endif
02137 
02138             done = FALSE;  // Because th_task_team pointer is not NULL for this thread
02139 
02140             KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
02141                            __kmp_gtid_from_thread( thread ) ) );
02142 
02143             if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
02144                 // If the thread is sleeping, awaken it.
02145                 if ( ( sleep_loc = (volatile kmp_uint *) TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
02146                     KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
02147                                     __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
02148                     __kmp_resume( __kmp_gtid_from_thread( thread ), sleep_loc );
02149                 }
02150             }
02151         }
02152         if (done) {
02153             break;
02154         }
02155 
02156         // If we are oversubscribed,
02157         // or have waited a bit (and library mode is throughput), yield.
02158         // Pause is in the following code.
02159         KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
02160         KMP_YIELD_SPIN( spins );        // Yields only if KMP_LIBRARY=throughput
02161     }
02162 
02163 
02164 }
02165 
02166 
02167 //------------------------------------------------------------------------------
02168 // __kmp_task_team_setup:  Create a task_team for the current team, but use
02169 // an already created, unused one if it already exists.
02170 // This may be called by any thread, but only for teams with # threads >1.
02171 
02172 void
02173 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team )
02174 {
02175     KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
02176 
02177     if ( ( team->t.t_task_team == NULL ) && ( team->t.t_nproc > 1 ) ) {
02178         // Allocate a new task team, which will be propagated to
02179         // all of the worker threads after the barrier.  As they
02180         // spin in the barrier release phase, then will continue
02181         // to use the previous task team struct, until they receive
02182         // the signal to stop checking for tasks (they can't safely
02183         // reference the kmp_team_t struct, which could be reallocated
02184         // by the master thread).
02185         team->t.t_task_team = __kmp_allocate_task_team( this_thr, team );
02186         KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new "
02187                         "task_team %p for team %d\n",
02188                         __kmp_gtid_from_thread( this_thr ), team->t.t_task_team,
02189                         ((team != NULL) ? team->t.t_id : -1)) );
02190     }
02191     else {
02192         // All threads have reported in, and no tasks were spawned
02193         // for this release->gather region.  Leave the old task
02194         // team struct in place for the upcoming region.  No task
02195         // teams are formed for serialized teams.
02196     }
02197     if ( team->t.t_task_team != NULL ) {
02198         // Toggle the state flag so that we can tell which side of
02199         // the barrier we are on.
02200         team->t.t_task_team->tt.tt_state = 1 - this_thr->th.th_task_state;
02201     }
02202 }
02203 
02204 
02205 //------------------------------------------------------------------------------
02206 // __kmp_task_team_sync: Propagation of task team data from team to threads
02207 // which happens just after the release phase of a team barrier.  This may be
02208 // called by any thread, but only for teams with # threads > 1.
02209 
02210 void
02211 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
02212 {
02213     KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
02214 
02215     // On the rare chance that this thread never saw that the task
02216     // team was no longer active, then unref/deallocate it now.
02217     if ( this_thr->th.th_task_team != NULL ) {
02218         if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
02219             KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
02220             __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
02221         } else {
02222             //
02223             // We are re-using a task team that was never enabled.
02224             //
02225             KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
02226         }
02227     }
02228 
02229     //
02230     // It is now safe to propagate the task team pointer from the
02231     // team struct to the current thread.
02232     //
02233     TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team);
02234     if ( this_thr->th.th_task_team != NULL ) {
02235         //
02236         // Toggle the th_task_state field, instead of reading it from
02237         // the task team.  Reading the tt_state field at this point
02238         // causes a 30% regression on EPCC parallel - toggling it
02239         // is much cheaper.
02240         //
02241         this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
02242         KMP_DEBUG_ASSERT( this_thr->th.th_task_state == TCR_4(team->t.t_task_team->tt.tt_state) );
02243     }
02244     KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
02245                     __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
02246                     this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
02247 }
02248 
02249 
02250 //------------------------------------------------------------------------------
02251 // __kmp_task_team_wait: Master thread waits for outstanding tasks after
02252 // the barrier gather phase.  Only called by master thread if #threads
02253 // in team > 1 !
02254 
02255 void
02256 __kmp_task_team_wait( kmp_info_t *this_thr, 
02257                       kmp_team_t *team
02258                       )
02259 {
02260     kmp_task_team_t *task_team = team->t.t_task_team;
02261 
02262     KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
02263     KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
02264 
02265     if ( ( task_team != NULL ) && KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
02266         KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
02267                           __kmp_gtid_from_thread( this_thr ), task_team ) );
02268         //
02269         // All worker threads might have dropped through to the
02270         // release phase, but could still be executing tasks.
02271         // Wait here for all tasks to complete.  To avoid memory
02272         // contention, only the master thread checks for the
02273         // termination condition.
02274         //
02275         __kmp_wait_sleep( this_thr, &task_team->tt.tt_unfinished_threads, 0, TRUE
02276                           );
02277 
02278         //
02279         // Kill the old task team, so that the worker threads will
02280         // stop referencing it while spinning.  They will
02281         // deallocate it when the reference count reaches zero.
02282         // The master thread is not included in the ref count.
02283         //
02284         KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
02285                           __kmp_gtid_from_thread( this_thr ), task_team ) );
02286         KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
02287         TCW_SYNC_4( task_team->tt.tt_active, FALSE );
02288         KMP_MB();
02289 
02290         TCW_PTR(this_thr->th.th_task_team, NULL);
02291         team->t.t_task_team = NULL;
02292     }
02293 }
02294 
02295 
02296 //------------------------------------------------------------------------------
02297 // __kmp_tasking_barrier:
02298 // Internal function to execute all tasks prior to a regular barrier or a
02299 // join barrier.  It is a full barrier itself, which unfortunately turns
02300 // regular barriers into double barriers and join barriers into 1 1/2
02301 // barriers.
02302 // This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
02303 
02304 void
02305 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
02306 {
02307     volatile kmp_uint32 *spin = &team->t.t_task_team->tt.tt_unfinished_threads;
02308     int flag = FALSE;
02309     KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
02310 
02311     while (! __kmp_execute_tasks( thread, gtid, spin, 0, TRUE, &flag, NULL ) ) {
02312 
02313         if( TCR_4(__kmp_global.g.g_done) ) {
02314             if( __kmp_global.g.g_abort )
02315                 __kmp_abort_thread( );
02316             break;
02317         }
02318         KMP_YIELD( TRUE );       // GH: We always yield here
02319     }
02320 }
02321 
02322 #endif // OMP_30_ENABLED
02323 

Generated on 25 Aug 2013 for libomp_oss by  doxygen 1.6.1