kmp_taskq.c

Go to the documentation of this file.
00001 /*
00002  * kmp_taskq.c -- TASKQ support for OpenMP.
00003  * $Revision: 42099 $
00004  * $Date: 2013-03-08 15:25:21 -0600 (Fri, 08 Mar 2013) $
00005  */
00006 
00007 /* <copyright>
00008     Copyright (c) 1997-2013 Intel Corporation.  All Rights Reserved.
00009 
00010     Redistribution and use in source and binary forms, with or without
00011     modification, are permitted provided that the following conditions
00012     are met:
00013 
00014       * Redistributions of source code must retain the above copyright
00015         notice, this list of conditions and the following disclaimer.
00016       * Redistributions in binary form must reproduce the above copyright
00017         notice, this list of conditions and the following disclaimer in the
00018         documentation and/or other materials provided with the distribution.
00019       * Neither the name of Intel Corporation nor the names of its
00020         contributors may be used to endorse or promote products derived
00021         from this software without specific prior written permission.
00022 
00023     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00024     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00025     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00026     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00027     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00028     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00029     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00030     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00031     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00032     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00033     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00034 
00035 
00036 ------------------------------------------------------------------------
00037 
00038     Portions of this software are protected under the following patents:
00039         U.S. Patent 5,812,852
00040         U.S. Patent 6,792,599
00041         U.S. Patent 7,069,556
00042         U.S. Patent 7,328,433
00043         U.S. Patent 7,500,242
00044 
00045 </copyright> */
00046 
00047 #include "kmp.h"
00048 #include "kmp_i18n.h"
00049 #include "kmp_io.h"
00050 #include "kmp_error.h"
00051 
00052 #define MAX_MESSAGE 512
00053 
00054 /* ------------------------------------------------------------------------ */
00055 /* ------------------------------------------------------------------------ */
00056 
00057 /*
00058  * Taskq routines and global variables
00059  */
00060 
00061 #define KMP_DEBUG_REF_CTS(x)    KF_TRACE(1, x);
00062 
00063 #define THREAD_ALLOC_FOR_TASKQ
00064 
00065 static void
00066 __kmp_static_delay( int arg )
00067 {
00068 /* Work around weird code-gen bug that causes assert to trip */
00069 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
00070     KMP_ASSERT( arg != 0 );
00071 #else
00072     KMP_ASSERT( arg >= 0 );
00073 #endif
00074 }
00075 
00076 static void
00077 __kmp_static_yield( int arg )
00078 {
00079     __kmp_yield( arg );
00080 }
00081 
00082 static int
00083 in_parallel_context( kmp_team_t *team )
00084 {
00085     return ! team -> t.t_serialized;
00086 }
00087 
00088 static void
00089 __kmp_taskq_eo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
00090 {
00091     int                gtid = *gtid_ref;
00092     int                tid  = __kmp_tid_from_gtid( gtid );
00093     kmp_uint32         spins;
00094     kmp_uint32         my_token;
00095     kmpc_task_queue_t *taskq;
00096     kmp_taskq_t       *tq   = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
00097 
00098     if ( __kmp_env_consistency_check )
00099         __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL );
00100 
00101     if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
00102         KMP_MB();       /* Flush all pending memory write invalidates.  */
00103 
00104         /* GEH - need check here under stats to make sure   */
00105         /*       inside task (curr_thunk[*tid_ref] != NULL) */
00106 
00107         my_token =tq->tq_curr_thunk[ tid ]-> th_tasknum;
00108 
00109         taskq = tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue;
00110 
00111         KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
00112         KMP_MB();
00113     }
00114 }
00115 
00116 static void
00117 __kmp_taskq_xo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
00118 {
00119     int           gtid = *gtid_ref;
00120     int           tid  = __kmp_tid_from_gtid( gtid );
00121     kmp_uint32    my_token;
00122     kmp_taskq_t  *tq   = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
00123 
00124     if ( __kmp_env_consistency_check )
00125         __kmp_pop_sync( gtid, ct_ordered_in_taskq, loc_ref );
00126 
00127     if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
00128         KMP_MB();       /* Flush all pending memory write invalidates.  */
00129 
00130         /* GEH - need check here under stats to make sure */
00131         /*       inside task (curr_thunk[tid] != NULL)    */
00132 
00133         my_token = tq->tq_curr_thunk[ tid ]->th_tasknum;
00134 
00135         KMP_MB();       /* Flush all pending memory write invalidates.  */
00136 
00137         tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue -> tq_tasknum_serving = my_token + 1;
00138 
00139         KMP_MB();       /* Flush all pending memory write invalidates.  */
00140     }
00141 }
00142 
00143 static void
00144 __kmp_taskq_check_ordered( kmp_int32 gtid, kmpc_thunk_t *thunk )
00145 {
00146     kmp_uint32 spins;
00147     kmp_uint32 my_token;
00148     kmpc_task_queue_t *taskq;
00149 
00150     /* assume we are always called from an active parallel context */
00151 
00152     KMP_MB();       /* Flush all pending memory write invalidates.  */
00153 
00154     my_token =  thunk -> th_tasknum;
00155 
00156     taskq =  thunk -> th.th_shareds -> sv_queue;
00157 
00158     if(taskq->tq_tasknum_serving <= my_token) {
00159         KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
00160         KMP_MB();
00161         taskq->tq_tasknum_serving = my_token +1;
00162         KMP_MB();
00163     }
00164 }
00165 
00166 static void
00167 __kmp_dump_TQF(kmp_int32 flags)
00168 {
00169     if (flags & TQF_IS_ORDERED)
00170         __kmp_printf("ORDERED ");
00171     if (flags & TQF_IS_LASTPRIVATE)
00172         __kmp_printf("LAST_PRIV ");
00173     if (flags & TQF_IS_NOWAIT)
00174         __kmp_printf("NOWAIT ");
00175     if (flags & TQF_HEURISTICS)
00176         __kmp_printf("HEURIST ");
00177     if (flags & TQF_INTERFACE_RESERVED1)
00178         __kmp_printf("RESERV1 ");
00179     if (flags & TQF_INTERFACE_RESERVED2)
00180         __kmp_printf("RESERV2 ");
00181     if (flags & TQF_INTERFACE_RESERVED3)
00182         __kmp_printf("RESERV3 ");
00183     if (flags & TQF_INTERFACE_RESERVED4)
00184         __kmp_printf("RESERV4 ");
00185     if (flags & TQF_IS_LAST_TASK)
00186         __kmp_printf("LAST_TASK ");
00187     if (flags & TQF_TASKQ_TASK)
00188         __kmp_printf("TASKQ_TASK ");
00189     if (flags & TQF_RELEASE_WORKERS)
00190         __kmp_printf("RELEASE ");
00191     if (flags & TQF_ALL_TASKS_QUEUED)
00192         __kmp_printf("ALL_QUEUED ");
00193     if (flags & TQF_PARALLEL_CONTEXT)
00194         __kmp_printf("PARALLEL ");
00195     if (flags & TQF_DEALLOCATED)
00196         __kmp_printf("DEALLOC ");
00197     if (!(flags & (TQF_INTERNAL_FLAGS|TQF_INTERFACE_FLAGS)))
00198         __kmp_printf("(NONE)");
00199 }
00200 
00201 static void
00202 __kmp_dump_thunk( kmp_taskq_t *tq, kmpc_thunk_t *thunk, kmp_int32 global_tid )
00203 {
00204     int i;
00205     int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
00206 
00207     __kmp_printf("\tThunk at %p on (%d):  ", thunk, global_tid);
00208 
00209     if (thunk != NULL) {
00210         for (i = 0; i < nproc; i++) {
00211             if( tq->tq_curr_thunk[i] == thunk ) {
00212                 __kmp_printf("[%i] ", i);
00213             }
00214         }
00215         __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds);
00216         __kmp_printf("th_task=%p, ", thunk->th_task);
00217         __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk);
00218         __kmp_printf("th_status=%d, ", thunk->th_status);
00219         __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum);
00220         __kmp_printf("th_flags="); __kmp_dump_TQF(thunk->th_flags);
00221     }
00222 
00223     __kmp_printf("\n");
00224 }
00225 
00226 static void
00227 __kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num)
00228 {
00229     kmpc_thunk_t *th;
00230 
00231     __kmp_printf("    Thunk stack for T#%d:  ", thread_num);
00232 
00233     for (th = thunk; th != NULL; th = th->th_encl_thunk )
00234         __kmp_printf("%p ", th);
00235 
00236     __kmp_printf("\n");
00237 }
00238 
00239 static void
00240 __kmp_dump_task_queue( kmp_taskq_t *tq, kmpc_task_queue_t *queue, kmp_int32 global_tid )
00241 {
00242     int                  qs, count, i;
00243     kmpc_thunk_t        *thunk;
00244     kmpc_task_queue_t   *taskq;
00245 
00246     __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid);
00247 
00248     if (queue != NULL) {
00249         int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT;
00250 
00251     if ( __kmp_env_consistency_check ) {
00252         __kmp_printf("    tq_loc             : ");
00253     }
00254         if (in_parallel) {
00255 
00256             //if (queue->tq.tq_parent != 0)
00257                 //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
00258 
00259             //__kmp_acquire_lock(& queue->tq_link_lck, global_tid);
00260 
00261             KMP_MB();  /* make sure data structures are in consistent state before querying them */
00262                        /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00263 
00264             __kmp_printf("    tq_parent          : %p\n", queue->tq.tq_parent);
00265             __kmp_printf("    tq_first_child     : %p\n", queue->tq_first_child);
00266             __kmp_printf("    tq_next_child      : %p\n", queue->tq_next_child);
00267             __kmp_printf("    tq_prev_child      : %p\n", queue->tq_prev_child);
00268             __kmp_printf("    tq_ref_count       : %d\n", queue->tq_ref_count);
00269 
00270             //__kmp_release_lock(& queue->tq_link_lck, global_tid);
00271 
00272             //if (queue->tq.tq_parent != 0)
00273                 //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
00274 
00275             //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
00276             //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
00277 
00278             KMP_MB();  /* make sure data structures are in consistent state before querying them */
00279                        /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00280         }
00281 
00282         __kmp_printf("    tq_shareds         : ");
00283         for (i=0; i<((queue == tq->tq_root) ? queue->tq_nproc : 1); i++)
00284             __kmp_printf("%p ", queue->tq_shareds[i].ai_data);
00285         __kmp_printf("\n");
00286 
00287         if (in_parallel) {
00288             __kmp_printf("    tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing);
00289             __kmp_printf("    tq_tasknum_serving : %u\n", queue->tq_tasknum_serving);
00290         }
00291 
00292         __kmp_printf("    tq_queue           : %p\n", queue->tq_queue);
00293         __kmp_printf("    tq_thunk_space     : %p\n", queue->tq_thunk_space);
00294         __kmp_printf("    tq_taskq_slot      : %p\n", queue->tq_taskq_slot);
00295 
00296         __kmp_printf("    tq_free_thunks     : ");
00297         for (thunk = queue->tq_free_thunks; thunk != NULL; thunk = thunk->th.th_next_free )
00298             __kmp_printf("%p ", thunk);
00299         __kmp_printf("\n");
00300 
00301         __kmp_printf("    tq_nslots          : %d\n", queue->tq_nslots);
00302         __kmp_printf("    tq_head            : %d\n", queue->tq_head);
00303         __kmp_printf("    tq_tail            : %d\n", queue->tq_tail);
00304         __kmp_printf("    tq_nfull           : %d\n", queue->tq_nfull);
00305         __kmp_printf("    tq_hiwat           : %d\n", queue->tq_hiwat);
00306         __kmp_printf("    tq_flags           : "); __kmp_dump_TQF(queue->tq_flags);
00307         __kmp_printf("\n");
00308 
00309         if (in_parallel) {
00310             __kmp_printf("    tq_th_thunks       : ");
00311             for (i = 0; i < queue->tq_nproc; i++) {
00312                 __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data);
00313             }
00314             __kmp_printf("\n");
00315         }
00316 
00317         __kmp_printf("\n");
00318         __kmp_printf("    Queue slots:\n");
00319 
00320 
00321         qs = queue->tq_tail;
00322         for ( count = 0; count < queue->tq_nfull; ++count ) {
00323             __kmp_printf("(%d)", qs);
00324             __kmp_dump_thunk( tq, queue->tq_queue[qs].qs_thunk, global_tid );
00325             qs = (qs+1) % queue->tq_nslots;
00326         }
00327 
00328         __kmp_printf("\n");
00329 
00330         if (in_parallel) {
00331             if (queue->tq_taskq_slot != NULL) {
00332                 __kmp_printf("    TaskQ slot:\n");
00333                 __kmp_dump_thunk( tq, (kmpc_thunk_t *) queue->tq_taskq_slot, global_tid );
00334                 __kmp_printf("\n");
00335             }
00336             //__kmp_release_lock(& queue->tq_queue_lck, global_tid);
00337             //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
00338         }
00339     }
00340 
00341     __kmp_printf("    Taskq freelist: ");
00342 
00343     //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
00344 
00345     KMP_MB();  /* make sure data structures are in consistent state before querying them */
00346                /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00347 
00348     for( taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free )
00349         __kmp_printf("%p ", taskq);
00350 
00351     //__kmp_release_lock( & tq->tq_freelist_lck, global_tid );
00352 
00353     __kmp_printf("\n\n");
00354 }
00355 
00356 static void
00357 __kmp_aux_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *curr_queue, kmp_int32 level, kmp_int32 global_tid )
00358 {
00359     int i, count, qs;
00360     int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
00361     kmpc_task_queue_t *queue = curr_queue;
00362 
00363     if (curr_queue == NULL)
00364         return;
00365 
00366     __kmp_printf("    ");
00367 
00368     for (i=0; i<level; i++)
00369         __kmp_printf("  ");
00370 
00371     __kmp_printf("%p", curr_queue);
00372 
00373     for (i = 0; i < nproc; i++) {
00374         if( tq->tq_curr_thunk[i] && tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue ) {
00375             __kmp_printf(" [%i]", i);
00376         }
00377     }
00378 
00379     __kmp_printf(":");
00380 
00381     //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid);
00382 
00383     KMP_MB();  /* make sure data structures are in consistent state before querying them */
00384                /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00385 
00386     qs = curr_queue->tq_tail;
00387 
00388     for ( count = 0; count < curr_queue->tq_nfull; ++count ) {
00389         __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk);
00390          qs = (qs+1) % curr_queue->tq_nslots;
00391     }
00392 
00393     //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid);
00394 
00395     __kmp_printf("\n");
00396 
00397     if (curr_queue->tq_first_child) {
00398         //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
00399 
00400         KMP_MB();  /* make sure data structures are in consistent state before querying them */
00401                    /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00402 
00403         if (curr_queue->tq_first_child) {
00404             for(queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
00405                 queue != NULL;
00406                 queue = queue->tq_next_child) {
00407                 __kmp_aux_dump_task_queue_tree( tq, queue, level+1, global_tid );
00408             }
00409         }
00410 
00411         //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
00412     }
00413 }
00414 
00415 static void
00416 __kmp_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *tqroot, kmp_int32 global_tid)
00417 {
00418     __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid);
00419 
00420     __kmp_aux_dump_task_queue_tree( tq, tqroot, 0, global_tid );
00421 
00422     __kmp_printf("\n");
00423 }
00424 
00425 /* --------------------------------------------------------------------------- */
00426 
00427 /*
00428     New taskq storage routines that try to minimize overhead of mallocs but
00429     still provide cache line alignment.
00430 */
00431 
00432 
00433 static void *
00434 __kmp_taskq_allocate(size_t size, kmp_int32 global_tid)
00435 {
00436     void *addr, *orig_addr;
00437     size_t bytes;
00438 
00439     KB_TRACE( 5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int) size, global_tid ) );
00440 
00441     bytes = sizeof(void *) + CACHE_LINE + size;
00442 
00443 #ifdef THREAD_ALLOC_FOR_TASKQ
00444     orig_addr = (void *) __kmp_thread_malloc( __kmp_thread_from_gtid(global_tid), bytes );
00445 #else
00446     KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", bytes ) );
00447     orig_addr = (void *) KMP_INTERNAL_MALLOC( bytes );
00448 #endif /* THREAD_ALLOC_FOR_TASKQ */
00449 
00450     if (orig_addr == 0)
00451         KMP_FATAL( OutOfHeapMemory );
00452 
00453     addr = orig_addr;
00454 
00455     if (((kmp_uintptr_t) addr & ( CACHE_LINE - 1 )) != 0) {
00456         KB_TRACE( 50, ("__kmp_taskq_allocate:  adjust for cache alignment\n" ) );
00457         addr = (void *) (((kmp_uintptr_t) addr + CACHE_LINE) & ~( CACHE_LINE - 1 ));
00458     }
00459 
00460     (* (void **) addr) = orig_addr;
00461 
00462     KB_TRACE( 10, ("__kmp_taskq_allocate:  allocate: %p, use: %p - %p, size: %d, gtid: %d\n",
00463              orig_addr, ((void **) addr) + 1, ((char *)(((void **) addr) + 1)) + size-1,
00464              (int) size, global_tid ));
00465 
00466     return ( ((void **) addr) + 1 );
00467 }
00468 
00469 static void
00470 __kmpc_taskq_free(void *p, kmp_int32 global_tid)
00471 {
00472     KB_TRACE( 5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid ) );
00473 
00474     KB_TRACE(10, ("__kmpc_taskq_free:  freeing: %p, gtid: %d\n", (*( ((void **) p)-1)), global_tid ));
00475 
00476 #ifdef THREAD_ALLOC_FOR_TASKQ
00477     __kmp_thread_free( __kmp_thread_from_gtid(global_tid), *( ((void **) p)-1) );
00478 #else
00479     KMP_INTERNAL_FREE( *( ((void **) p)-1) );
00480 #endif /* THREAD_ALLOC_FOR_TASKQ */
00481 }
00482 
00483 /* --------------------------------------------------------------------------- */
00484 
00485 /*
00486  *      Keep freed kmpc_task_queue_t on an internal freelist and recycle since
00487  *      they're of constant size.
00488  */
00489 
00490 static kmpc_task_queue_t *
00491 __kmp_alloc_taskq ( kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots, kmp_int32 nthunks,
00492                     kmp_int32 nshareds, kmp_int32 nproc, size_t sizeof_thunk,
00493                     size_t sizeof_shareds, kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid )
00494 {
00495     kmp_int32                  i;
00496     size_t                     bytes;
00497     kmpc_task_queue_t          *new_queue;
00498     kmpc_aligned_shared_vars_t *shared_var_array;
00499     char                       *shared_var_storage;
00500     char                       *pt; /* for doing byte-adjusted address computations */
00501 
00502     __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
00503 
00504     KMP_MB();  /* make sure data structures are in consistent state before querying them */
00505                /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00506 
00507     if( tq->tq_freelist ) {
00508         new_queue =  tq -> tq_freelist;
00509         tq -> tq_freelist =  tq -> tq_freelist -> tq.tq_next_free;
00510 
00511         KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED);
00512 
00513         new_queue->tq_flags = 0;
00514 
00515         __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
00516     }
00517     else {
00518         __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
00519 
00520         new_queue = (kmpc_task_queue_t *) __kmp_taskq_allocate (sizeof (kmpc_task_queue_t), global_tid);
00521         new_queue->tq_flags = 0;
00522     }
00523 
00524     /*  space in the task queue for queue slots (allocate as one big chunk */
00525     /* of storage including new_taskq_task space)                          */
00526 
00527     sizeof_thunk += (CACHE_LINE - (sizeof_thunk % CACHE_LINE));         /* pad to cache line size */
00528     pt = (char *) __kmp_taskq_allocate (nthunks * sizeof_thunk, global_tid);
00529     new_queue->tq_thunk_space = (kmpc_thunk_t *)pt;
00530     *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk);
00531 
00532     /*  chain the allocated thunks into a freelist for this queue  */
00533 
00534     new_queue->tq_free_thunks = (kmpc_thunk_t *)pt;
00535 
00536     for (i = 0; i < (nthunks - 2); i++) {
00537         ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th.th_next_free = (kmpc_thunk_t *)(pt + (i+1)*sizeof_thunk);
00538 #ifdef KMP_DEBUG
00539         ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
00540 #endif
00541     }
00542 
00543     ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th.th_next_free = NULL;
00544 #ifdef KMP_DEBUG
00545     ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
00546 #endif
00547 
00548     /* initialize the locks */
00549 
00550     if (in_parallel) {
00551         __kmp_init_lock( & new_queue->tq_link_lck );
00552         __kmp_init_lock( & new_queue->tq_free_thunks_lck );
00553         __kmp_init_lock( & new_queue->tq_queue_lck );
00554     }
00555 
00556     /* now allocate the slots */
00557 
00558     bytes = nslots * sizeof (kmpc_aligned_queue_slot_t);
00559     new_queue->tq_queue = (kmpc_aligned_queue_slot_t *) __kmp_taskq_allocate( bytes, global_tid );
00560 
00561     /*  space for array of pointers to shared variable structures */
00562     sizeof_shareds += sizeof(kmpc_task_queue_t *);
00563     sizeof_shareds += (CACHE_LINE - (sizeof_shareds % CACHE_LINE));     /* pad to cache line size */
00564 
00565     bytes = nshareds * sizeof (kmpc_aligned_shared_vars_t);
00566     shared_var_array = (kmpc_aligned_shared_vars_t *) __kmp_taskq_allocate ( bytes, global_tid);
00567 
00568     bytes = nshareds * sizeof_shareds;
00569     shared_var_storage = (char *) __kmp_taskq_allocate ( bytes, global_tid);
00570 
00571     for (i=0; i<nshareds; i++) {
00572         shared_var_array[i].ai_data = (kmpc_shared_vars_t *) (shared_var_storage + i*sizeof_shareds);
00573         shared_var_array[i].ai_data->sv_queue = new_queue;
00574     }
00575     new_queue->tq_shareds = shared_var_array;
00576 
00577 
00578     /* array for number of outstanding thunks per thread */
00579 
00580     if (in_parallel) {
00581         bytes = nproc * sizeof(kmpc_aligned_int32_t);
00582         new_queue->tq_th_thunks = (kmpc_aligned_int32_t *) __kmp_taskq_allocate ( bytes, global_tid);
00583         new_queue->tq_nproc     = nproc;
00584 
00585         for (i=0; i<nproc; i++)
00586             new_queue->tq_th_thunks[i].ai_data = 0;
00587     }
00588 
00589     return new_queue;
00590 }
00591 
00592 static void
00593 __kmp_free_taskq (kmp_taskq_t *tq, kmpc_task_queue_t *p, int in_parallel, kmp_int32 global_tid)
00594 {
00595     __kmpc_taskq_free(p->tq_thunk_space, global_tid);
00596     __kmpc_taskq_free(p->tq_queue, global_tid);
00597 
00598     /* free shared var structure storage */
00599     __kmpc_taskq_free((void *) p->tq_shareds[0].ai_data, global_tid);
00600 
00601     /* free array of pointers to shared vars storage */
00602     __kmpc_taskq_free(p->tq_shareds, global_tid);
00603 
00604 #ifdef KMP_DEBUG
00605     p->tq_first_child = NULL;
00606     p->tq_next_child = NULL;
00607     p->tq_prev_child = NULL;
00608     p->tq_ref_count = -10;
00609     p->tq_shareds = NULL;
00610     p->tq_tasknum_queuing = 0;
00611     p->tq_tasknum_serving = 0;
00612     p->tq_queue = NULL;
00613     p->tq_thunk_space = NULL;
00614     p->tq_taskq_slot = NULL;
00615     p->tq_free_thunks = NULL;
00616     p->tq_nslots = 0;
00617     p->tq_head = 0;
00618     p->tq_tail = 0;
00619     p->tq_nfull = 0;
00620     p->tq_hiwat = 0;
00621 
00622     if (in_parallel) {
00623         int i;
00624 
00625         for (i=0; i<p->tq_nproc; i++)
00626             p->tq_th_thunks[i].ai_data = 0;
00627     }
00628     if ( __kmp_env_consistency_check )
00629         p->tq_loc = NULL;
00630     KMP_DEBUG_ASSERT( p->tq_flags & TQF_DEALLOCATED );
00631     p->tq_flags = TQF_DEALLOCATED;
00632 #endif /* KMP_DEBUG */
00633 
00634     if (in_parallel)  {
00635         __kmpc_taskq_free(p->tq_th_thunks, global_tid);
00636         __kmp_destroy_lock(& p->tq_link_lck);
00637         __kmp_destroy_lock(& p->tq_queue_lck);
00638         __kmp_destroy_lock(& p->tq_free_thunks_lck);
00639     }
00640 #ifdef KMP_DEBUG
00641     p->tq_th_thunks = NULL;
00642 #endif /* KMP_DEBUG */
00643 
00644     KMP_MB();  /* make sure data structures are in consistent state before querying them */
00645                /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00646 
00647     __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
00648     p->tq.tq_next_free = tq->tq_freelist;
00649 
00650     tq->tq_freelist = p;
00651     __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
00652 }
00653 
00654 /*
00655  *    Once a group of thunks has been allocated for use in a particular queue,
00656  *    these are managed via a per-queue freelist.
00657  *    We force a check that there's always a thunk free if we need one.
00658  */
00659 
00660 static kmpc_thunk_t *
00661 __kmp_alloc_thunk (kmpc_task_queue_t *queue, int in_parallel, kmp_int32 global_tid)
00662 {
00663     kmpc_thunk_t *fl;
00664 
00665     if (in_parallel) {
00666         __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
00667 
00668         KMP_MB();  /* make sure data structures are in consistent state before querying them */
00669                    /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00670     }
00671 
00672     fl = queue->tq_free_thunks;
00673 
00674     KMP_DEBUG_ASSERT (fl != NULL);
00675 
00676     queue->tq_free_thunks = fl->th.th_next_free;
00677     fl->th_flags = 0;
00678 
00679     if (in_parallel)
00680         __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
00681 
00682     return fl;
00683 }
00684 
00685 static void
00686 __kmp_free_thunk (kmpc_task_queue_t *queue, kmpc_thunk_t *p, int in_parallel, kmp_int32 global_tid)
00687 {
00688 #ifdef KMP_DEBUG
00689     p->th_task = 0;
00690     p->th_encl_thunk = 0;
00691     p->th_status = 0;
00692     p->th_tasknum = 0;
00693     /* Also could zero pointers to private vars */
00694 #endif
00695 
00696     if (in_parallel) {
00697         __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
00698 
00699         KMP_MB();  /* make sure data structures are in consistent state before querying them */
00700                    /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00701     }
00702 
00703     p->th.th_next_free = queue->tq_free_thunks;
00704     queue->tq_free_thunks = p;
00705 
00706 #ifdef KMP_DEBUG
00707     p->th_flags = TQF_DEALLOCATED;
00708 #endif
00709 
00710     if (in_parallel)
00711         __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
00712 }
00713 
00714 /* --------------------------------------------------------------------------- */
00715 
00716 /*  returns nonzero if the queue just became full after the enqueue  */
00717 
00718 static kmp_int32
00719 __kmp_enqueue_task ( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, kmpc_thunk_t *thunk, int in_parallel )
00720 {
00721     kmp_int32    ret;
00722 
00723     /*  dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the master is executing then)  */
00724     if (in_parallel) {
00725         __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
00726 
00727         KMP_MB();  /* make sure data structures are in consistent state before querying them */
00728                    /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00729     }
00730 
00731     KMP_DEBUG_ASSERT (queue->tq_nfull < queue->tq_nslots);  /*  check queue not full  */
00732 
00733     queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk;
00734 
00735     if (queue->tq_head >= queue->tq_nslots)
00736         queue->tq_head = 0;
00737 
00738     (queue->tq_nfull)++;
00739 
00740     KMP_MB();   /* to assure that nfull is seen to increase before TQF_ALL_TASKS_QUEUED is set */
00741 
00742     ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE;
00743 
00744     if (in_parallel) {
00745         /* don't need to wait until workers are released before unlocking */
00746         __kmp_release_lock(& queue->tq_queue_lck, global_tid);
00747 
00748         if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
00749             /* If just creating the root queue, the worker threads are waiting at */
00750             /* a join barrier until now, when there's something in the queue for  */
00751             /* them to do; release them now to do work.                           */
00752             /* This should only be done when this is the first task enqueued,     */
00753             /* so reset the flag here also.                                       */
00754 
00755             tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;  /* no lock needed, workers are still in spin mode */
00756 
00757             KMP_MB();   /* avoid releasing barrier twice if taskq_task switches threads */
00758 
00759             __kmpc_end_barrier_master( NULL, global_tid);
00760         }
00761     }
00762 
00763     return ret;
00764 }
00765 
00766 static kmpc_thunk_t *
00767 __kmp_dequeue_task (kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel)
00768 {
00769     kmpc_thunk_t *pt;
00770     int           tid = __kmp_tid_from_gtid( global_tid );
00771 
00772     KMP_DEBUG_ASSERT (queue->tq_nfull > 0);  /*  check queue not empty  */
00773 
00774     if (queue->tq.tq_parent != NULL && in_parallel) {
00775         int ct;
00776         __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
00777         ct = ++(queue->tq_ref_count);
00778         __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
00779         KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
00780           __LINE__, global_tid, queue, ct));
00781     }
00782 
00783     pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk;
00784 
00785     if (queue->tq_tail >= queue->tq_nslots)
00786         queue->tq_tail = 0;
00787 
00788     if (in_parallel) {
00789         queue->tq_th_thunks[tid].ai_data++;
00790 
00791         KMP_MB(); /* necessary so ai_data increment is propagated to other threads immediately (digital) */
00792 
00793         KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding thunks from queue %p\n",
00794             global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue));
00795     }
00796 
00797     (queue->tq_nfull)--;
00798 
00799 #ifdef KMP_DEBUG
00800     KMP_MB();
00801 
00802     /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is decremented */
00803 
00804     KMP_DEBUG_ASSERT(queue->tq_nfull >= 0);
00805 
00806     if (in_parallel) {
00807         KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <= __KMP_TASKQ_THUNKS_PER_TH);
00808     }
00809 #endif
00810 
00811     return pt;
00812 }
00813 
00814 /*
00815  * Find the next (non-null) task to dequeue and return it.
00816  * This is never called unless in_parallel=TRUE
00817  *
00818  * Here are the rules for deciding which queue to take the task from:
00819  * 1.  Walk up the task queue tree from the current queue's parent and look
00820  *      on the way up (for loop, below).
00821  * 2.  Do a depth-first search back down the tree from the root and
00822  *      look (find_task_in_descandent_queue()).
00823  *
00824  * Here are the rules for deciding which task to take from a queue
00825  * (__kmp_find_task_in_queue ()):
00826  * 1.  Never take the last task from a queue if TQF_IS_LASTPRIVATE; this task
00827  *     must be staged to make sure we execute the last one with
00828  *     TQF_IS_LAST_TASK at the end of task queue execution.
00829  * 2.  If the queue length is below some high water mark and the taskq task
00830  *     is enqueued, prefer running the taskq task.
00831  * 3.  Otherwise, take a (normal) task from the queue.
00832  *
00833  * If we do all this and return pt == NULL at the bottom of this routine,
00834  * this means there are no more tasks to execute (except possibly for
00835  * TQF_IS_LASTPRIVATE).
00836  */
00837 
00838 static kmpc_thunk_t *
00839 __kmp_find_task_in_queue (kmp_int32 global_tid, kmpc_task_queue_t *queue)
00840 {
00841     kmpc_thunk_t *pt  = NULL;
00842     int           tid = __kmp_tid_from_gtid( global_tid );
00843 
00844     /* To prevent deadlock from tq_queue_lck if queue already deallocated */
00845     if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
00846 
00847         __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
00848 
00849         /* Check again to avoid race in __kmpc_end_taskq() */
00850         if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
00851 
00852             KMP_MB();  /* make sure data structures are in consistent state before querying them */
00853                        /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00854 
00855             if ((queue->tq_taskq_slot != NULL) && (queue->tq_nfull <= queue->tq_hiwat)) {
00856                 /* if there's enough room in the queue and the dispatcher */
00857                 /* (taskq task) is available, schedule more tasks         */
00858                 pt = (kmpc_thunk_t *) queue->tq_taskq_slot;
00859                 queue->tq_taskq_slot = NULL;
00860             }
00861             else if (queue->tq_nfull == 0 ||
00862                      queue->tq_th_thunks[tid].ai_data >= __KMP_TASKQ_THUNKS_PER_TH) {
00863                 /* do nothing if no thunks available or this thread can't */
00864                 /* run any because it already is executing too many       */
00865 
00866                 pt = NULL;
00867             }
00868             else if (queue->tq_nfull > 1) {
00869                 /*  always safe to schedule a task even if TQF_IS_LASTPRIVATE  */
00870 
00871                 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
00872             }
00873             else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) {
00874                 /*  one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE  */
00875 
00876                 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
00877             }
00878             else if (queue->tq_flags & TQF_IS_LAST_TASK) {
00879                 /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task()   */
00880                 /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
00881                 /* instrumentation does copy-out.                                  */
00882 
00883                 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
00884                 pt->th_flags |= TQF_IS_LAST_TASK;  /* don't need test_then_or since already locked */
00885             }
00886         }
00887 
00888         /* GEH - What happens here if is lastprivate, but not last task? */
00889         __kmp_release_lock(& queue->tq_queue_lck, global_tid);
00890     }
00891 
00892     return pt;
00893 }
00894 
00895 /*
00896  * Walk a tree of queues starting at queue's first child
00897  * and return a non-NULL thunk if one can be scheduled.
00898  * Must only be called when in_parallel=TRUE
00899  */
00900 
00901 static kmpc_thunk_t *
00902 __kmp_find_task_in_descendant_queue (kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
00903 {
00904     kmpc_thunk_t *pt = NULL;
00905     kmpc_task_queue_t *queue = curr_queue;
00906 
00907     if (curr_queue->tq_first_child != NULL) {
00908         __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
00909 
00910         KMP_MB();  /* make sure data structures are in consistent state before querying them */
00911                    /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00912 
00913         queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
00914         if (queue == NULL) {
00915             __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
00916             return NULL;
00917         }
00918 
00919         while (queue != NULL)  {
00920             int ct;
00921             kmpc_task_queue_t *next;
00922 
00923             ct= ++(queue->tq_ref_count);
00924             __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
00925             KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
00926               __LINE__, global_tid, queue, ct));
00927 
00928             pt = __kmp_find_task_in_queue (global_tid, queue);
00929 
00930             if (pt != NULL) {
00931                 int ct;
00932 
00933                 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
00934 
00935                 KMP_MB();  /* make sure data structures are in consistent state before querying them */
00936                            /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00937 
00938                 ct = --(queue->tq_ref_count);
00939                 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
00940                   __LINE__, global_tid, queue, ct));
00941                 KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
00942 
00943                 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
00944 
00945                 return pt;
00946             }
00947 
00948             /* although reference count stays active during descendant walk, shouldn't matter  */
00949             /* since if children still exist, reference counts aren't being monitored anyway   */
00950 
00951             pt = __kmp_find_task_in_descendant_queue (global_tid, queue);
00952 
00953             if (pt != NULL) {
00954                 int ct;
00955 
00956                 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
00957 
00958                 KMP_MB();  /* make sure data structures are in consistent state before querying them */
00959                            /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00960 
00961                 ct = --(queue->tq_ref_count);
00962                 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
00963                   __LINE__, global_tid, queue, ct));
00964                 KMP_DEBUG_ASSERT( ct >= 0 );
00965 
00966                 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
00967 
00968                 return pt;
00969             }
00970 
00971             __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
00972 
00973             KMP_MB();  /* make sure data structures are in consistent state before querying them */
00974                        /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
00975 
00976             next = queue->tq_next_child;
00977 
00978             ct = --(queue->tq_ref_count);
00979             KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
00980               __LINE__, global_tid, queue, ct));
00981             KMP_DEBUG_ASSERT( ct >= 0 );
00982 
00983             queue = next;
00984         }
00985 
00986         __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
00987     }
00988 
00989     return pt;
00990 }
00991 
00992 /*
00993  * Walk up the taskq tree looking for a task to execute.
00994  * If we get to the root, search the tree for a descendent queue task.
00995  * Must only be called when in_parallel=TRUE
00996  */
00997 
00998 static kmpc_thunk_t *
00999 __kmp_find_task_in_ancestor_queue (kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
01000 {
01001     kmpc_task_queue_t *queue;
01002     kmpc_thunk_t      *pt;
01003 
01004     pt = NULL;
01005 
01006     if (curr_queue->tq.tq_parent != NULL) {
01007         queue = curr_queue->tq.tq_parent;
01008 
01009         while (queue != NULL) {
01010             if (queue->tq.tq_parent != NULL) {
01011                 int ct;
01012                 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01013 
01014                 KMP_MB();  /* make sure data structures are in consistent state before querying them */
01015                            /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
01016 
01017                 ct = ++(queue->tq_ref_count);
01018                 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01019                 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
01020                   __LINE__, global_tid, queue, ct));
01021             }
01022 
01023             pt = __kmp_find_task_in_queue (global_tid, queue);
01024             if (pt != NULL) {
01025                 if (queue->tq.tq_parent != NULL) {
01026                     int ct;
01027                     __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01028 
01029                     KMP_MB();  /* make sure data structures are in consistent state before querying them   */
01030                                /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */
01031 
01032                     ct = --(queue->tq_ref_count);
01033                     KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
01034                       __LINE__, global_tid, queue, ct));
01035                     KMP_DEBUG_ASSERT( ct >= 0 );
01036 
01037                     __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01038                 }
01039 
01040                 return pt;
01041             }
01042 
01043             if (queue->tq.tq_parent != NULL) {
01044                 int ct;
01045                 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01046 
01047                 KMP_MB();  /* make sure data structures are in consistent state before querying them */
01048                            /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
01049 
01050                 ct = --(queue->tq_ref_count);
01051                 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
01052                   __LINE__, global_tid, queue, ct));
01053                 KMP_DEBUG_ASSERT( ct >= 0 );
01054             }
01055             queue = queue->tq.tq_parent;
01056 
01057             if (queue != NULL)
01058                 __kmp_release_lock(& queue->tq_link_lck, global_tid);
01059         }
01060 
01061     }
01062 
01063     pt = __kmp_find_task_in_descendant_queue( global_tid, tq->tq_root );
01064 
01065     return pt;
01066 }
01067 
01068 static int
01069 __kmp_taskq_tasks_finished (kmpc_task_queue_t *queue)
01070 {
01071     int i;
01072 
01073     /* KMP_MB(); *//* is this really necessary? */
01074 
01075     for (i=0; i<queue->tq_nproc; i++) {
01076         if (queue->tq_th_thunks[i].ai_data != 0)
01077             return FALSE;
01078     }
01079 
01080     return TRUE;
01081 }
01082 
01083 static int
01084 __kmp_taskq_has_any_children (kmpc_task_queue_t *queue)
01085 {
01086     return (queue->tq_first_child != NULL);
01087 }
01088 
01089 static void
01090 __kmp_remove_queue_from_tree( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel )
01091 {
01092 #ifdef KMP_DEBUG
01093     kmp_int32     i;
01094     kmpc_thunk_t *thunk;
01095 #endif
01096 
01097     KF_TRACE(50, ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
01098     KF_DUMP(50, __kmp_dump_task_queue( tq, queue, global_tid ));
01099 
01100     /*  sub-queue in a recursion, not the root task queue  */
01101     KMP_DEBUG_ASSERT (queue->tq.tq_parent != NULL);
01102 
01103     if (in_parallel) {
01104         __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01105 
01106         KMP_MB();  /* make sure data structures are in consistent state before querying them */
01107                    /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
01108     }
01109 
01110     KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
01111 
01112     /*  unlink queue from its siblings if any at this level  */
01113     if (queue->tq_prev_child != NULL)
01114         queue->tq_prev_child->tq_next_child = queue->tq_next_child;
01115     if (queue->tq_next_child != NULL)
01116         queue->tq_next_child->tq_prev_child = queue->tq_prev_child;
01117     if (queue->tq.tq_parent->tq_first_child == queue)
01118         queue->tq.tq_parent->tq_first_child = queue->tq_next_child;
01119 
01120     queue->tq_prev_child = NULL;
01121     queue->tq_next_child = NULL;
01122 
01123     if (in_parallel) {
01124         kmp_uint32 spins;
01125 
01126         KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",
01127           __LINE__, global_tid, queue, queue->tq_ref_count));
01128 
01129         /* wait until all other threads have stopped accessing this queue */
01130         while (queue->tq_ref_count > 1) {
01131             __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01132 
01133             KMP_WAIT_YIELD((volatile kmp_uint32*)&queue->tq_ref_count, 1, KMP_LE, NULL);
01134 
01135             __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01136 
01137             KMP_MB();  /* make sure data structures are in consistent state before querying them */
01138                        /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
01139         }
01140 
01141         __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01142     }
01143 
01144     KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p freeing queue\n",
01145       __LINE__, global_tid, queue));
01146 
01147 #ifdef KMP_DEBUG
01148     KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED);
01149     KMP_DEBUG_ASSERT(queue->tq_nfull == 0);
01150 
01151     for (i=0; i<queue->tq_nproc; i++) {
01152         KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
01153     }
01154 
01155     i = 0;
01156     for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
01157         ++i;
01158 
01159     KMP_ASSERT (i == queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH));
01160 #endif
01161 
01162     /*  release storage for queue entry  */
01163     __kmp_free_taskq ( tq, queue, TRUE, global_tid );
01164 
01165     KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
01166     KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
01167 }
01168 
01169 /*
01170  * Starting from indicated queue, proceed downward through tree and
01171  * remove all taskqs which are finished, but only go down to taskqs
01172  * which have the "nowait" clause present.  Assume this is only called
01173  * when in_parallel=TRUE.
01174  */
01175 
01176 static void
01177 __kmp_find_and_remove_finished_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue )
01178 {
01179     kmpc_task_queue_t *queue = curr_queue;
01180 
01181     if (curr_queue->tq_first_child != NULL) {
01182         __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
01183 
01184         KMP_MB();  /* make sure data structures are in consistent state before querying them */
01185                    /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
01186 
01187         queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
01188         if (queue != NULL) {
01189             __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
01190             return;
01191         }
01192 
01193         while (queue != NULL)  {
01194             kmpc_task_queue_t *next;
01195             int ct = ++(queue->tq_ref_count);
01196             KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
01197               __LINE__, global_tid, queue, ct));
01198 
01199 
01200             /* although reference count stays active during descendant walk, */
01201             /* shouldn't matter since if children still exist, reference     */
01202             /* counts aren't being monitored anyway                          */
01203 
01204             if (queue->tq_flags & TQF_IS_NOWAIT) {
01205                 __kmp_find_and_remove_finished_child_taskq ( tq, global_tid, queue );
01206 
01207                 if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) && (queue->tq_nfull == 0) &&
01208                     __kmp_taskq_tasks_finished(queue) && ! __kmp_taskq_has_any_children(queue)) {
01209 
01210                     /*
01211                      Only remove this if we have not already marked it for deallocation.
01212                      This should prevent multiple threads from trying to free this.
01213                      */
01214 
01215                     if ( __kmp_test_lock(& queue->tq_queue_lck, global_tid) ) {
01216                         if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
01217                             queue->tq_flags |= TQF_DEALLOCATED;
01218                             __kmp_release_lock(& queue->tq_queue_lck, global_tid);
01219 
01220                             __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
01221 
01222                             /* Can't do any more here since can't be sure where sibling queue is so just exit this level */
01223                             return;
01224                         }
01225                         else {
01226                             __kmp_release_lock(& queue->tq_queue_lck, global_tid);
01227                         }
01228                     }
01229                     /* otherwise, just fall through and decrement reference count */
01230                 }
01231             }
01232 
01233             __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
01234 
01235             KMP_MB();  /* make sure data structures are in consistent state before querying them */
01236                        /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
01237 
01238             next = queue->tq_next_child;
01239 
01240             ct = --(queue->tq_ref_count);
01241             KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
01242               __LINE__, global_tid, queue, ct));
01243             KMP_DEBUG_ASSERT( ct >= 0 );
01244 
01245             queue = next;
01246         }
01247 
01248         __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
01249     }
01250 }
01251 
01252 /*
01253  * Starting from indicated queue, proceed downward through tree and
01254  * remove all taskq's assuming all are finished and
01255  * assuming NO other threads are executing at this point.
01256  */
01257 
01258 static void
01259 __kmp_remove_all_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue )
01260 {
01261     kmpc_task_queue_t *next_child;
01262 
01263     queue = (kmpc_task_queue_t *) queue->tq_first_child;
01264 
01265     while (queue != NULL)  {
01266         __kmp_remove_all_child_taskq ( tq, global_tid, queue );
01267 
01268         next_child = queue->tq_next_child;
01269         queue->tq_flags |= TQF_DEALLOCATED;
01270         __kmp_remove_queue_from_tree ( tq, global_tid, queue, FALSE );
01271         queue = next_child;
01272     }
01273 }
01274 
01275 static void
01276 __kmp_execute_task_from_queue( kmp_taskq_t *tq, ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, int in_parallel )
01277 {
01278     kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue;
01279     kmp_int32          tid   = __kmp_tid_from_gtid( global_tid );
01280 
01281     KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid));
01282     KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
01283     KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid));
01284     KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
01285 
01286     /*
01287      * For the taskq task, the curr_thunk pushes and pop pairs are set up as follows:
01288      *
01289      * happens exactly once:
01290      * 1) __kmpc_taskq             : push (if returning thunk only)
01291      * 4) __kmpc_end_taskq_task    : pop
01292      *
01293      * optionally happens *each* time taskq task is dequeued/enqueued:
01294      * 2) __kmpc_taskq_task        : pop
01295      * 3) __kmp_execute_task_from_queue  : push
01296      *
01297      * execution ordering:  1,(2,3)*,4
01298      */
01299 
01300     if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
01301         kmp_int32 index = (queue == tq->tq_root) ? tid : 0;
01302         thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[index].ai_data;
01303 
01304         if ( __kmp_env_consistency_check ) {
01305             __kmp_push_workshare( global_tid,
01306                     (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
01307                     queue->tq_loc );
01308         }
01309     }
01310     else {
01311         if ( __kmp_env_consistency_check )
01312             __kmp_push_workshare( global_tid, ct_taskq, queue->tq_loc );
01313     }
01314 
01315     if (in_parallel) {
01316         thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
01317         tq->tq_curr_thunk[tid] = thunk;
01318 
01319         KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
01320     }
01321 
01322     KF_TRACE( 50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
01323     thunk->th_task (global_tid, thunk);
01324     KF_TRACE( 50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
01325 
01326     if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
01327         if ( __kmp_env_consistency_check )
01328             __kmp_pop_workshare( global_tid, (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
01329                                  queue->tq_loc );
01330 
01331         if (in_parallel) {
01332             tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
01333             thunk->th_encl_thunk = NULL;
01334             KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
01335         }
01336 
01337         if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) {
01338             __kmp_taskq_check_ordered(global_tid, thunk);
01339         }
01340 
01341         __kmp_free_thunk (queue, thunk, in_parallel, global_tid);
01342 
01343         KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n", global_tid, thunk));
01344         KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
01345 
01346         if (in_parallel) {
01347             KMP_MB();   /* needed so thunk put on free list before outstanding thunk count is decremented */
01348 
01349             KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1);
01350 
01351             KF_TRACE( 200, ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",
01352                 global_tid, queue->tq_th_thunks[tid].ai_data-1, queue));
01353 
01354             queue->tq_th_thunks[tid].ai_data--;
01355 
01356             /* KMP_MB(); */     /* is MB really necessary ? */
01357         }
01358 
01359         if (queue->tq.tq_parent != NULL && in_parallel) {
01360             int ct;
01361             __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01362             ct = --(queue->tq_ref_count);
01363             __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01364             KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
01365               __LINE__, global_tid, queue, ct));
01366             KMP_DEBUG_ASSERT( ct >= 0 );
01367         }
01368     }
01369 }
01370 
01371 /* --------------------------------------------------------------------------- */
01372 
01373 /* starts a taskq; creates and returns a thunk for the taskq_task        */
01374 /* also, returns pointer to shared vars for this thread in "shareds" arg */
01375 
01376 kmpc_thunk_t *
01377 __kmpc_taskq( ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task,
01378               size_t sizeof_thunk, size_t sizeof_shareds,
01379               kmp_int32 flags, kmpc_shared_vars_t **shareds )
01380 {
01381     int                  in_parallel;
01382     kmp_int32            nslots, nthunks, nshareds, nproc;
01383     kmpc_task_queue_t   *new_queue, *curr_queue;
01384     kmpc_thunk_t        *new_taskq_thunk;
01385     kmp_info_t          *th;
01386     kmp_team_t          *team;
01387     kmp_taskq_t         *tq;
01388     kmp_int32            tid;
01389 
01390     KE_TRACE( 10, ("__kmpc_taskq called (%d)\n", global_tid));
01391 
01392     th = __kmp_threads[ global_tid ];
01393     team = th -> th.th_team;
01394     tq = & team -> t.t_taskq;
01395     nproc = team -> t.t_nproc;
01396     tid = __kmp_tid_from_gtid( global_tid );
01397 
01398     /* find out whether this is a parallel taskq or serialized one. */
01399     in_parallel = in_parallel_context( team );
01400 
01401     if( ! tq->tq_root ) {
01402         if (in_parallel) {
01403             /* Vector ORDERED SECTION to taskq version */
01404             th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
01405 
01406             /* Vector ORDERED SECTION to taskq version */
01407             th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
01408         }
01409 
01410         if (in_parallel) {
01411             /* This shouldn't be a barrier region boundary, it will confuse the user. */
01412             /* Need the boundary to be at the end taskq instead. */
01413             if ( __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
01414                 /* Creating the active root queue, and we are not the master thread. */
01415                 /* The master thread below created the queue and tasks have been     */
01416                 /* enqueued, and the master thread released this barrier.  This      */
01417                 /* worker thread can now proceed and execute tasks.  See also the    */
01418                 /* TQF_RELEASE_WORKERS which is used to handle this case.            */
01419 
01420                 *shareds = (kmpc_shared_vars_t *) tq->tq_root->tq_shareds[tid].ai_data;
01421 
01422                 KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid));
01423 
01424                 return NULL;
01425             }
01426         }
01427 
01428         /* master thread only executes this code */
01429 
01430         if( tq->tq_curr_thunk_capacity < nproc ) {
01431             int i;
01432 
01433             if(tq->tq_curr_thunk)
01434                 __kmp_free(tq->tq_curr_thunk);
01435             else {
01436                 /* only need to do this once at outer level, i.e. when tq_curr_thunk is still NULL */
01437                 __kmp_init_lock( & tq->tq_freelist_lck );
01438             }
01439 
01440             tq->tq_curr_thunk = (kmpc_thunk_t **) __kmp_allocate( nproc * sizeof(kmpc_thunk_t *) );
01441             tq -> tq_curr_thunk_capacity = nproc;
01442         }
01443 
01444         if (in_parallel)
01445             tq->tq_global_flags = TQF_RELEASE_WORKERS;
01446     }
01447 
01448     /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */
01449     /*      on some heuristics (e.g., depth of queue nesting?).            */
01450 
01451     nslots = (in_parallel) ? (2 * nproc) : 1;
01452 
01453     /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */
01454     /* jobs being executed by other threads, and one extra for taskq slot          */
01455 
01456     nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1) : nslots + 2;
01457 
01458     /* Only the root taskq gets a per-thread array of shareds.       */
01459     /* The rest of the taskq's only get one copy of the shared vars. */
01460 
01461     nshareds = ( !tq->tq_root && in_parallel) ? nproc : 1;
01462 
01463     /*  create overall queue data structure and its components that require allocation */
01464 
01465     new_queue = __kmp_alloc_taskq ( tq, in_parallel, nslots, nthunks, nshareds, nproc,
01466         sizeof_thunk, sizeof_shareds, &new_taskq_thunk, global_tid );
01467 
01468     /*  rest of new_queue initializations  */
01469 
01470     new_queue->tq_flags           = flags & TQF_INTERFACE_FLAGS;
01471 
01472     if (in_parallel) {
01473         new_queue->tq_tasknum_queuing  = 0;
01474         new_queue->tq_tasknum_serving  = 0;
01475         new_queue->tq_flags           |= TQF_PARALLEL_CONTEXT;
01476     }
01477 
01478     new_queue->tq_taskq_slot   = NULL;
01479     new_queue->tq_nslots       = nslots;
01480     new_queue->tq_hiwat        = HIGH_WATER_MARK (nslots);
01481     new_queue->tq_nfull        = 0;
01482     new_queue->tq_head         = 0;
01483     new_queue->tq_tail         = 0;
01484     new_queue->tq_loc          = loc;
01485 
01486     if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) {
01487         /* prepare to serve the first-queued task's ORDERED directive */
01488         new_queue->tq_tasknum_serving = 1;
01489 
01490         /* Vector ORDERED SECTION to taskq version */
01491         th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
01492 
01493         /* Vector ORDERED SECTION to taskq version */
01494         th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
01495     }
01496 
01497     /*  create a new thunk for the taskq_task in the new_queue  */
01498     *shareds = (kmpc_shared_vars_t *) new_queue->tq_shareds[0].ai_data;
01499 
01500     new_taskq_thunk->th.th_shareds = *shareds;
01501     new_taskq_thunk->th_task       = taskq_task;
01502     new_taskq_thunk->th_flags      = new_queue->tq_flags | TQF_TASKQ_TASK;
01503     new_taskq_thunk->th_status     = 0;
01504 
01505     KMP_DEBUG_ASSERT (new_taskq_thunk->th_flags & TQF_TASKQ_TASK);
01506 
01507     /* KMP_MB(); */ /* make sure these inits complete before threads start using this queue (necessary?) */
01508 
01509     /* insert the new task queue into the tree, but only after all fields initialized */
01510 
01511     if (in_parallel) {
01512         if( ! tq->tq_root ) {
01513             new_queue->tq.tq_parent   = NULL;
01514             new_queue->tq_first_child = NULL;
01515             new_queue->tq_next_child  = NULL;
01516             new_queue->tq_prev_child  = NULL;
01517             new_queue->tq_ref_count   = 1;
01518             tq->tq_root = new_queue;
01519         }
01520         else {
01521             curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
01522             new_queue->tq.tq_parent   = curr_queue;
01523             new_queue->tq_first_child = NULL;
01524             new_queue->tq_prev_child  = NULL;
01525             new_queue->tq_ref_count   = 1;      /* for this the thread that built the queue */
01526 
01527             KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n",
01528               __LINE__, global_tid, new_queue, new_queue->tq_ref_count));
01529 
01530             __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
01531 
01532             KMP_MB();  /* make sure data structures are in consistent state before querying them */
01533                        /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
01534 
01535             new_queue->tq_next_child = (struct kmpc_task_queue_t *) curr_queue->tq_first_child;
01536 
01537             if (curr_queue->tq_first_child != NULL)
01538                 curr_queue->tq_first_child->tq_prev_child = new_queue;
01539 
01540             curr_queue->tq_first_child = new_queue;
01541 
01542             __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
01543         }
01544 
01545         /* set up thunk stack only after code that determines curr_queue above */
01546         new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
01547         tq->tq_curr_thunk[tid] = new_taskq_thunk;
01548 
01549         KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
01550     }
01551     else {
01552         new_taskq_thunk->th_encl_thunk = 0;
01553         new_queue->tq.tq_parent   = NULL;
01554         new_queue->tq_first_child = NULL;
01555         new_queue->tq_next_child  = NULL;
01556         new_queue->tq_prev_child  = NULL;
01557         new_queue->tq_ref_count   = 1;
01558     }
01559 
01560 #ifdef KMP_DEBUG
01561     KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid));
01562     KF_DUMP(150, __kmp_dump_thunk( tq, new_taskq_thunk, global_tid ));
01563 
01564     if (in_parallel) {
01565         KF_TRACE(25, ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
01566     } else {
01567         KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
01568     }
01569 
01570     KF_DUMP(25, __kmp_dump_task_queue( tq, new_queue, global_tid ));
01571 
01572     if (in_parallel) {
01573         KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
01574     }
01575 #endif /* KMP_DEBUG */
01576 
01577     if ( __kmp_env_consistency_check )
01578         __kmp_push_workshare( global_tid, ct_taskq, new_queue->tq_loc );
01579 
01580     KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid));
01581 
01582     return new_taskq_thunk;
01583 }
01584 
01585 
01586 /*  ends a taskq; last thread out destroys the queue  */
01587 
01588 void
01589 __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk)
01590 {
01591 #ifdef KMP_DEBUG
01592     kmp_int32           i;
01593 #endif
01594     kmp_taskq_t        *tq;
01595     int                 in_parallel;
01596     kmp_info_t         *th;
01597     kmp_int32           is_outermost;
01598     kmpc_task_queue_t  *queue;
01599     kmpc_thunk_t       *thunk;
01600     int                 nproc;
01601 
01602     KE_TRACE( 10, ("__kmpc_end_taskq called (%d)\n", global_tid));
01603 
01604     tq = & __kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
01605     nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
01606 
01607     /* For the outermost taskq only, all but one thread will have taskq_thunk == NULL */
01608     queue = (taskq_thunk == NULL) ? tq->tq_root : taskq_thunk->th.th_shareds->sv_queue;
01609 
01610     KE_TRACE( 50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid));
01611     is_outermost = (queue == tq->tq_root);
01612     in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
01613 
01614     if (in_parallel) {
01615         kmp_uint32 spins;
01616 
01617         /* this is just a safeguard to release the waiting threads if */
01618         /* the outermost taskq never queues a task                    */
01619 
01620         if (is_outermost && (KMP_MASTER_GTID( global_tid ))) {
01621             if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
01622                 /* no lock needed, workers are still in spin mode */
01623                 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
01624 
01625                 __kmp_end_split_barrier( bs_plain_barrier, global_tid );
01626             }
01627         }
01628 
01629         /* keep dequeueing work until all tasks are queued and dequeued */
01630 
01631         do {
01632             /* wait until something is available to dequeue */
01633             KMP_INIT_YIELD(spins);
01634 
01635             while ( (queue->tq_nfull == 0)
01636                  && (queue->tq_taskq_slot == NULL)
01637                  && (! __kmp_taskq_has_any_children(queue) )
01638                  && (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED) )
01639                   ) {
01640                 __kmp_static_delay( 1 );
01641                 KMP_YIELD_WHEN( TRUE, spins );
01642             }
01643 
01644             /* check to see if we can execute tasks in the queue */
01645             while ( ( (queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL) )
01646                  && (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL
01647                   ) {
01648                 KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk, queue, global_tid));
01649                 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
01650             }
01651 
01652             /* see if work found can be found in a descendant queue */
01653             if ( (__kmp_taskq_has_any_children(queue))
01654               && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
01655                ) {
01656 
01657                 KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
01658                     thunk, thunk->th.th_shareds->sv_queue, queue, global_tid ));
01659 
01660                 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
01661             }
01662 
01663         } while ( (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED))
01664                || (queue->tq_nfull != 0)
01665                 );
01666 
01667         KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue, global_tid));
01668 
01669         /* wait while all tasks are not finished and more work found
01670            in descendant queues */
01671 
01672         while ( (!__kmp_taskq_tasks_finished(queue))
01673              && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
01674               ) {
01675 
01676             KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
01677                 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
01678 
01679             __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
01680         }
01681 
01682         KF_TRACE(50, ("No work found in descendent queues or all work finished in queue: %p (%d)\n", queue, global_tid));
01683 
01684         if (!is_outermost) {
01685             /* need to return if NOWAIT present and not outermost taskq */
01686 
01687             if (queue->tq_flags & TQF_IS_NOWAIT) {
01688                 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01689                 queue->tq_ref_count--;
01690                 KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
01691                 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01692 
01693                 KE_TRACE( 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid));
01694 
01695                 return;
01696             }
01697 
01698             __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
01699 
01700             /* WAIT until all tasks are finished and no child queues exist before proceeding */
01701             KMP_INIT_YIELD(spins);
01702 
01703             while (!__kmp_taskq_tasks_finished(queue) || __kmp_taskq_has_any_children(queue)) {
01704                 thunk = __kmp_find_task_in_ancestor_queue( tq, global_tid, queue );
01705 
01706                 if (thunk != NULL) {
01707                     KF_TRACE(50, ("Stole thunk: %p in ancestor queue: %p while waiting in queue: %p (%d)\n",
01708                                   thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
01709                     __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
01710                 }
01711 
01712                 KMP_YIELD_WHEN( thunk == NULL, spins );
01713 
01714                 __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
01715             }
01716 
01717             __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
01718             if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
01719                 queue->tq_flags |= TQF_DEALLOCATED;
01720             }
01721             __kmp_release_lock(& queue->tq_queue_lck, global_tid);
01722 
01723             /* only the allocating thread can deallocate the queue */
01724             if (taskq_thunk != NULL) {
01725                 __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
01726             }
01727 
01728             KE_TRACE( 10, ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n", global_tid));
01729 
01730             return;
01731         }
01732 
01733         /* Outermost Queue: steal work from descendants until all tasks are finished */
01734 
01735         KMP_INIT_YIELD(spins);
01736 
01737         while (!__kmp_taskq_tasks_finished(queue)) {
01738             thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
01739 
01740             if (thunk != NULL) {
01741                 KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
01742                     thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
01743 
01744                 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
01745             }
01746 
01747             KMP_YIELD_WHEN( thunk == NULL, spins );
01748         }
01749 
01750         /* Need this barrier to prevent destruction of queue before threads have all executed above code */
01751         /* This may need to be done earlier when NOWAIT is implemented for the outermost level */
01752 
01753         if ( !__kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
01754             /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here;   */
01755             /* for right now, everybody waits, and the master thread destroys the  */
01756             /* remaining queues.                                                   */
01757 
01758             __kmp_remove_all_child_taskq( tq, global_tid, queue );
01759 
01760             /* Now destroy the root queue */
01761             KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n", global_tid, queue ));
01762             KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
01763 
01764 #ifdef KMP_DEBUG
01765             /*  the root queue entry  */
01766             KMP_DEBUG_ASSERT ((queue->tq.tq_parent == NULL) && (queue->tq_next_child == NULL));
01767 
01768             /*  children must all be gone by now because of barrier above */
01769             KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
01770 
01771             for (i=0; i<nproc; i++) {
01772                 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
01773             }
01774 
01775             for (i=0, thunk=queue->tq_free_thunks; thunk != NULL; i++, thunk=thunk->th.th_next_free);
01776 
01777             KMP_DEBUG_ASSERT (i == queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH));
01778 
01779             for (i = 0; i < nproc; i++) {
01780                 KMP_DEBUG_ASSERT( ! tq->tq_curr_thunk[i] );
01781             }
01782 #endif
01783             /*  unlink the root queue entry  */
01784             tq -> tq_root =  NULL;
01785 
01786             /*  release storage for root queue entry  */
01787             KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue, global_tid));
01788 
01789             queue->tq_flags |= TQF_DEALLOCATED;
01790             __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
01791 
01792             KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
01793 
01794             /* release the workers now that the data structures are up to date */
01795             __kmp_end_split_barrier( bs_plain_barrier, global_tid );
01796         }
01797 
01798         th = __kmp_threads[ global_tid ];
01799 
01800         /* Reset ORDERED SECTION to parallel version */
01801         th->th.th_dispatch->th_deo_fcn = 0;
01802 
01803         /* Reset ORDERED SECTION to parallel version */
01804         th->th.th_dispatch->th_dxo_fcn = 0;
01805     }
01806     else {
01807         /* in serial execution context, dequeue the last task  */
01808         /* and execute it, if there were any tasks encountered */
01809 
01810         if (queue->tq_nfull > 0) {
01811             KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
01812 
01813             thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
01814 
01815             if (queue->tq_flags & TQF_IS_LAST_TASK) {
01816                 /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */
01817                 /* has been run so this is last task, run with TQF_IS_LAST_TASK so */
01818                 /* instrumentation does copy-out.                                  */
01819 
01820                 /* no need for test_then_or call since already locked */
01821                 thunk->th_flags |= TQF_IS_LAST_TASK;
01822             }
01823 
01824             KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, thunk, queue));
01825 
01826             __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
01827         }
01828 
01829         /* destroy the unattached serial queue now that there is no more work to do */
01830         KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n", queue, global_tid));
01831         KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
01832 
01833 #ifdef KMP_DEBUG
01834         i = 0;
01835         for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
01836             ++i;
01837         KMP_DEBUG_ASSERT (i == queue->tq_nslots + 1);
01838 #endif
01839         /*  release storage for unattached serial queue  */
01840         KF_TRACE(50, ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid));
01841 
01842         queue->tq_flags |= TQF_DEALLOCATED;
01843         __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
01844     }
01845 
01846     KE_TRACE( 10, ("__kmpc_end_taskq return (%d)\n", global_tid));
01847 }
01848 
01849 /*  Enqueues a task for thunk previously created by __kmpc_task_buffer. */
01850 /*  Returns nonzero if just filled up queue  */
01851 
01852 kmp_int32
01853 __kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
01854 {
01855     kmp_int32          ret;
01856     kmpc_task_queue_t *queue;
01857     int                in_parallel;
01858     kmp_taskq_t       *tq;
01859 
01860     KE_TRACE( 10, ("__kmpc_task called (%d)\n", global_tid));
01861 
01862     KMP_DEBUG_ASSERT (!(thunk->th_flags & TQF_TASKQ_TASK));  /*  thunk->th_task is a regular task  */
01863 
01864     tq          = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
01865     queue       = thunk->th.th_shareds->sv_queue;
01866     in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
01867 
01868     if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED))
01869         thunk->th_tasknum = ++queue->tq_tasknum_queuing;
01870 
01871     /* For serial execution dequeue the preceding task and execute it, if one exists */
01872     /* This cannot be the last task.  That one is handled in __kmpc_end_taskq */
01873 
01874     if (!in_parallel && queue->tq_nfull > 0) {
01875         kmpc_thunk_t *prev_thunk;
01876 
01877         KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
01878 
01879         prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
01880 
01881         KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, prev_thunk, queue));
01882 
01883         __kmp_execute_task_from_queue( tq, loc, global_tid, prev_thunk, in_parallel );
01884     }
01885 
01886     /* The instrumentation sequence is:  __kmpc_task_buffer(), initialize private    */
01887     /* variables, __kmpc_task().  The __kmpc_task_buffer routine checks that the     */
01888     /* task queue is not full and allocates a thunk (which is then passed to         */
01889     /* __kmpc_task()).  So, the enqueue below should never fail due to a full queue. */
01890 
01891     KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid));
01892     KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
01893 
01894     ret = __kmp_enqueue_task ( tq, global_tid, queue, thunk, in_parallel );
01895 
01896     KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid));
01897     KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
01898 
01899     KE_TRACE( 10, ("__kmpc_task return (%d)\n", global_tid));
01900 
01901     return ret;
01902 }
01903 
01904 /*  enqueues a taskq_task for thunk previously created by __kmpc_taskq  */
01905 /*  this should never be called unless in a parallel context            */
01906 
01907 void
01908 __kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status)
01909 {
01910     kmpc_task_queue_t *queue;
01911     kmp_taskq_t       *tq  = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
01912     int                tid = __kmp_tid_from_gtid( global_tid );
01913 
01914     KE_TRACE( 10, ("__kmpc_taskq_task called (%d)\n", global_tid));
01915     KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid));
01916     KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
01917 
01918     queue = thunk->th.th_shareds->sv_queue;
01919 
01920     if ( __kmp_env_consistency_check )
01921         __kmp_pop_workshare( global_tid, ct_taskq, loc );
01922 
01923     /*  thunk->th_task is the taskq_task  */
01924     KMP_DEBUG_ASSERT (thunk->th_flags & TQF_TASKQ_TASK);
01925 
01926     /*  not supposed to call __kmpc_taskq_task if it's already enqueued  */
01927     KMP_DEBUG_ASSERT (queue->tq_taskq_slot == NULL);
01928 
01929     /* dequeue taskq thunk from curr_thunk stack */
01930     tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
01931     thunk->th_encl_thunk = NULL;
01932 
01933     KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
01934 
01935     thunk->th_status = status;
01936 
01937     KMP_MB();  /*  flush thunk->th_status before taskq_task enqueued to avoid race condition  */
01938 
01939     /*  enqueue taskq_task in thunk into special slot in queue     */
01940     /* GEH - probably don't need to lock taskq slot since only one */
01941     /*       thread enqueues & already a lock set at dequeue point */
01942 
01943     queue->tq_taskq_slot = thunk;
01944 
01945     KE_TRACE( 10, ("__kmpc_taskq_task return (%d)\n", global_tid));
01946 }
01947 
01948 /*  ends a taskq_task; done generating tasks  */
01949 
01950 void
01951 __kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
01952 {
01953     kmp_taskq_t       *tq;
01954     kmpc_task_queue_t *queue;
01955     int                in_parallel;
01956     int                tid;
01957 
01958     KE_TRACE( 10, ("__kmpc_end_taskq_task called (%d)\n", global_tid));
01959 
01960     tq          = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
01961     queue       = thunk->th.th_shareds->sv_queue;
01962     in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
01963     tid         = __kmp_tid_from_gtid( global_tid );
01964 
01965     if ( __kmp_env_consistency_check )
01966         __kmp_pop_workshare( global_tid, ct_taskq, loc );
01967 
01968     if (in_parallel) {
01969 #if KMP_ARCH_X86 || \
01970     KMP_ARCH_X86_64
01971 
01972         __kmp_test_then_or32( &queue->tq_flags, (kmp_int32) TQF_ALL_TASKS_QUEUED );
01973 #else
01974         {
01975             __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
01976 
01977             KMP_MB();  /* make sure data structures are in consistent state before querying them */
01978                        /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */
01979 
01980             queue->tq_flags |= TQF_ALL_TASKS_QUEUED;
01981 
01982             __kmp_release_lock(& queue->tq_queue_lck, global_tid);
01983         }
01984 #endif
01985     }
01986 
01987     if (thunk->th_flags & TQF_IS_LASTPRIVATE) {
01988         /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in the */
01989         /* queue if TQF_IS_LASTPRIVATE so we can positively identify that last task      */
01990         /* and run it with its TQF_IS_LAST_TASK bit turned on in th_flags.  When         */
01991         /* __kmpc_end_taskq_task() is called we are done generating all the tasks, so    */
01992         /* we know the last one in the queue is the lastprivate task.  Mark the queue    */
01993         /* as having gotten to this state via tq_flags & TQF_IS_LAST_TASK; when that     */
01994         /* task actually executes mark it via th_flags & TQF_IS_LAST_TASK (this th_flags */
01995         /* bit signals the instrumented code to do copy-outs after execution).           */
01996 
01997         if (! in_parallel) {
01998             /* No synchronization needed for serial context */
01999             queue->tq_flags |= TQF_IS_LAST_TASK;
02000         }
02001         else {
02002 #if KMP_ARCH_X86 || \
02003     KMP_ARCH_X86_64
02004 
02005             __kmp_test_then_or32( &queue->tq_flags, (kmp_int32) TQF_IS_LAST_TASK );
02006 #else
02007             {
02008                 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
02009 
02010                 KMP_MB();  /* make sure data structures are in consistent state before querying them */
02011                            /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */
02012 
02013                 queue->tq_flags |= TQF_IS_LAST_TASK;
02014 
02015                 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
02016             }
02017 #endif
02018             /* to prevent race condition where last task is dequeued but */
02019             /* flag isn't visible yet (not sure about this)              */
02020             KMP_MB();
02021         }
02022     }
02023 
02024     /* dequeue taskq thunk from curr_thunk stack */
02025     if (in_parallel) {
02026         tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
02027         thunk->th_encl_thunk = NULL;
02028 
02029         KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
02030     }
02031 
02032     KE_TRACE( 10, ("__kmpc_end_taskq_task return (%d)\n", global_tid));
02033 }
02034 
02035 /* returns thunk for a regular task based on taskq_thunk              */
02036 /* (__kmpc_taskq_task does the analogous thing for a TQF_TASKQ_TASK)  */
02037 
02038 kmpc_thunk_t *
02039 __kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task)
02040 {
02041     kmp_taskq_t       *tq;
02042     kmpc_task_queue_t *queue;
02043     kmpc_thunk_t      *new_thunk;
02044     int                in_parallel;
02045 
02046     KE_TRACE( 10, ("__kmpc_task_buffer called (%d)\n", global_tid));
02047 
02048     KMP_DEBUG_ASSERT (taskq_thunk->th_flags & TQF_TASKQ_TASK);  /*  taskq_thunk->th_task is the taskq_task  */
02049 
02050     tq          = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
02051     queue       = taskq_thunk->th.th_shareds->sv_queue;
02052     in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
02053 
02054     /* The instrumentation sequence is:  __kmpc_task_buffer(), initialize private */
02055     /* variables, __kmpc_task().  The __kmpc_task_buffer routine checks that the  */
02056     /* task queue is not full and allocates a thunk (which is then passed to      */
02057     /* __kmpc_task()).  So, we can pre-allocate a thunk here assuming it will be  */
02058     /* the next to be enqueued in __kmpc_task().                                  */
02059 
02060     new_thunk = __kmp_alloc_thunk (queue, in_parallel, global_tid);
02061     new_thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[0].ai_data;
02062     new_thunk->th_encl_thunk = NULL;
02063     new_thunk->th_task       = task;
02064 
02065     /* GEH - shouldn't need to lock the read of tq_flags here */
02066     new_thunk->th_flags      = queue->tq_flags & TQF_INTERFACE_FLAGS;
02067 
02068     new_thunk->th_status     = 0;
02069 
02070     KMP_DEBUG_ASSERT (!(new_thunk->th_flags & TQF_TASKQ_TASK));
02071 
02072     KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid));
02073     KF_DUMP(100, __kmp_dump_thunk( tq, new_thunk, global_tid ));
02074 
02075     KE_TRACE( 10, ("__kmpc_task_buffer return (%d)\n", global_tid));
02076 
02077     return new_thunk;
02078 }
02079 
02080 /* --------------------------------------------------------------------------- */

Generated on 25 Aug 2013 for libomp_oss by  doxygen 1.6.1