00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #include "kmp.h"
00048 #include "kmp_i18n.h"
00049 #include "kmp_io.h"
00050 #include "kmp_error.h"
00051
00052 #define MAX_MESSAGE 512
00053
00054
00055
00056
00057
00058
00059
00060
00061 #define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x);
00062
00063 #define THREAD_ALLOC_FOR_TASKQ
00064
00065 static void
00066 __kmp_static_delay( int arg )
00067 {
00068
00069 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
00070 KMP_ASSERT( arg != 0 );
00071 #else
00072 KMP_ASSERT( arg >= 0 );
00073 #endif
00074 }
00075
00076 static void
00077 __kmp_static_yield( int arg )
00078 {
00079 __kmp_yield( arg );
00080 }
00081
00082 static int
00083 in_parallel_context( kmp_team_t *team )
00084 {
00085 return ! team -> t.t_serialized;
00086 }
00087
00088 static void
00089 __kmp_taskq_eo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
00090 {
00091 int gtid = *gtid_ref;
00092 int tid = __kmp_tid_from_gtid( gtid );
00093 kmp_uint32 spins;
00094 kmp_uint32 my_token;
00095 kmpc_task_queue_t *taskq;
00096 kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
00097
00098 if ( __kmp_env_consistency_check )
00099 __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL );
00100
00101 if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
00102 KMP_MB();
00103
00104
00105
00106
00107 my_token =tq->tq_curr_thunk[ tid ]-> th_tasknum;
00108
00109 taskq = tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue;
00110
00111 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL);
00112 KMP_MB();
00113 }
00114 }
00115
00116 static void
00117 __kmp_taskq_xo( int *gtid_ref, int *cid_ref, ident_t *loc_ref )
00118 {
00119 int gtid = *gtid_ref;
00120 int tid = __kmp_tid_from_gtid( gtid );
00121 kmp_uint32 my_token;
00122 kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq;
00123
00124 if ( __kmp_env_consistency_check )
00125 __kmp_pop_sync( gtid, ct_ordered_in_taskq, loc_ref );
00126
00127 if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) {
00128 KMP_MB();
00129
00130
00131
00132
00133 my_token = tq->tq_curr_thunk[ tid ]->th_tasknum;
00134
00135 KMP_MB();
00136
00137 tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue -> tq_tasknum_serving = my_token + 1;
00138
00139 KMP_MB();
00140 }
00141 }
00142
00143 static void
00144 __kmp_taskq_check_ordered( kmp_int32 gtid, kmpc_thunk_t *thunk )
00145 {
00146 kmp_uint32 spins;
00147 kmp_uint32 my_token;
00148 kmpc_task_queue_t *taskq;
00149
00150
00151
00152 KMP_MB();
00153
00154 my_token = thunk -> th_tasknum;
00155
00156 taskq = thunk -> th.th_shareds -> sv_queue;
00157
00158 if(taskq->tq_tasknum_serving <= my_token) {
00159 KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL);
00160 KMP_MB();
00161 taskq->tq_tasknum_serving = my_token +1;
00162 KMP_MB();
00163 }
00164 }
00165
00166 static void
00167 __kmp_dump_TQF(kmp_int32 flags)
00168 {
00169 if (flags & TQF_IS_ORDERED)
00170 __kmp_printf("ORDERED ");
00171 if (flags & TQF_IS_LASTPRIVATE)
00172 __kmp_printf("LAST_PRIV ");
00173 if (flags & TQF_IS_NOWAIT)
00174 __kmp_printf("NOWAIT ");
00175 if (flags & TQF_HEURISTICS)
00176 __kmp_printf("HEURIST ");
00177 if (flags & TQF_INTERFACE_RESERVED1)
00178 __kmp_printf("RESERV1 ");
00179 if (flags & TQF_INTERFACE_RESERVED2)
00180 __kmp_printf("RESERV2 ");
00181 if (flags & TQF_INTERFACE_RESERVED3)
00182 __kmp_printf("RESERV3 ");
00183 if (flags & TQF_INTERFACE_RESERVED4)
00184 __kmp_printf("RESERV4 ");
00185 if (flags & TQF_IS_LAST_TASK)
00186 __kmp_printf("LAST_TASK ");
00187 if (flags & TQF_TASKQ_TASK)
00188 __kmp_printf("TASKQ_TASK ");
00189 if (flags & TQF_RELEASE_WORKERS)
00190 __kmp_printf("RELEASE ");
00191 if (flags & TQF_ALL_TASKS_QUEUED)
00192 __kmp_printf("ALL_QUEUED ");
00193 if (flags & TQF_PARALLEL_CONTEXT)
00194 __kmp_printf("PARALLEL ");
00195 if (flags & TQF_DEALLOCATED)
00196 __kmp_printf("DEALLOC ");
00197 if (!(flags & (TQF_INTERNAL_FLAGS|TQF_INTERFACE_FLAGS)))
00198 __kmp_printf("(NONE)");
00199 }
00200
00201 static void
00202 __kmp_dump_thunk( kmp_taskq_t *tq, kmpc_thunk_t *thunk, kmp_int32 global_tid )
00203 {
00204 int i;
00205 int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
00206
00207 __kmp_printf("\tThunk at %p on (%d): ", thunk, global_tid);
00208
00209 if (thunk != NULL) {
00210 for (i = 0; i < nproc; i++) {
00211 if( tq->tq_curr_thunk[i] == thunk ) {
00212 __kmp_printf("[%i] ", i);
00213 }
00214 }
00215 __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds);
00216 __kmp_printf("th_task=%p, ", thunk->th_task);
00217 __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk);
00218 __kmp_printf("th_status=%d, ", thunk->th_status);
00219 __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum);
00220 __kmp_printf("th_flags="); __kmp_dump_TQF(thunk->th_flags);
00221 }
00222
00223 __kmp_printf("\n");
00224 }
00225
00226 static void
00227 __kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num)
00228 {
00229 kmpc_thunk_t *th;
00230
00231 __kmp_printf(" Thunk stack for T#%d: ", thread_num);
00232
00233 for (th = thunk; th != NULL; th = th->th_encl_thunk )
00234 __kmp_printf("%p ", th);
00235
00236 __kmp_printf("\n");
00237 }
00238
00239 static void
00240 __kmp_dump_task_queue( kmp_taskq_t *tq, kmpc_task_queue_t *queue, kmp_int32 global_tid )
00241 {
00242 int qs, count, i;
00243 kmpc_thunk_t *thunk;
00244 kmpc_task_queue_t *taskq;
00245
00246 __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid);
00247
00248 if (queue != NULL) {
00249 int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT;
00250
00251 if ( __kmp_env_consistency_check ) {
00252 __kmp_printf(" tq_loc : ");
00253 }
00254 if (in_parallel) {
00255
00256
00257
00258
00259
00260
00261 KMP_MB();
00262
00263
00264 __kmp_printf(" tq_parent : %p\n", queue->tq.tq_parent);
00265 __kmp_printf(" tq_first_child : %p\n", queue->tq_first_child);
00266 __kmp_printf(" tq_next_child : %p\n", queue->tq_next_child);
00267 __kmp_printf(" tq_prev_child : %p\n", queue->tq_prev_child);
00268 __kmp_printf(" tq_ref_count : %d\n", queue->tq_ref_count);
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278 KMP_MB();
00279
00280 }
00281
00282 __kmp_printf(" tq_shareds : ");
00283 for (i=0; i<((queue == tq->tq_root) ? queue->tq_nproc : 1); i++)
00284 __kmp_printf("%p ", queue->tq_shareds[i].ai_data);
00285 __kmp_printf("\n");
00286
00287 if (in_parallel) {
00288 __kmp_printf(" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing);
00289 __kmp_printf(" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving);
00290 }
00291
00292 __kmp_printf(" tq_queue : %p\n", queue->tq_queue);
00293 __kmp_printf(" tq_thunk_space : %p\n", queue->tq_thunk_space);
00294 __kmp_printf(" tq_taskq_slot : %p\n", queue->tq_taskq_slot);
00295
00296 __kmp_printf(" tq_free_thunks : ");
00297 for (thunk = queue->tq_free_thunks; thunk != NULL; thunk = thunk->th.th_next_free )
00298 __kmp_printf("%p ", thunk);
00299 __kmp_printf("\n");
00300
00301 __kmp_printf(" tq_nslots : %d\n", queue->tq_nslots);
00302 __kmp_printf(" tq_head : %d\n", queue->tq_head);
00303 __kmp_printf(" tq_tail : %d\n", queue->tq_tail);
00304 __kmp_printf(" tq_nfull : %d\n", queue->tq_nfull);
00305 __kmp_printf(" tq_hiwat : %d\n", queue->tq_hiwat);
00306 __kmp_printf(" tq_flags : "); __kmp_dump_TQF(queue->tq_flags);
00307 __kmp_printf("\n");
00308
00309 if (in_parallel) {
00310 __kmp_printf(" tq_th_thunks : ");
00311 for (i = 0; i < queue->tq_nproc; i++) {
00312 __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data);
00313 }
00314 __kmp_printf("\n");
00315 }
00316
00317 __kmp_printf("\n");
00318 __kmp_printf(" Queue slots:\n");
00319
00320
00321 qs = queue->tq_tail;
00322 for ( count = 0; count < queue->tq_nfull; ++count ) {
00323 __kmp_printf("(%d)", qs);
00324 __kmp_dump_thunk( tq, queue->tq_queue[qs].qs_thunk, global_tid );
00325 qs = (qs+1) % queue->tq_nslots;
00326 }
00327
00328 __kmp_printf("\n");
00329
00330 if (in_parallel) {
00331 if (queue->tq_taskq_slot != NULL) {
00332 __kmp_printf(" TaskQ slot:\n");
00333 __kmp_dump_thunk( tq, (kmpc_thunk_t *) queue->tq_taskq_slot, global_tid );
00334 __kmp_printf("\n");
00335 }
00336
00337
00338 }
00339 }
00340
00341 __kmp_printf(" Taskq freelist: ");
00342
00343
00344
00345 KMP_MB();
00346
00347
00348 for( taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free )
00349 __kmp_printf("%p ", taskq);
00350
00351
00352
00353 __kmp_printf("\n\n");
00354 }
00355
00356 static void
00357 __kmp_aux_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *curr_queue, kmp_int32 level, kmp_int32 global_tid )
00358 {
00359 int i, count, qs;
00360 int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
00361 kmpc_task_queue_t *queue = curr_queue;
00362
00363 if (curr_queue == NULL)
00364 return;
00365
00366 __kmp_printf(" ");
00367
00368 for (i=0; i<level; i++)
00369 __kmp_printf(" ");
00370
00371 __kmp_printf("%p", curr_queue);
00372
00373 for (i = 0; i < nproc; i++) {
00374 if( tq->tq_curr_thunk[i] && tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue ) {
00375 __kmp_printf(" [%i]", i);
00376 }
00377 }
00378
00379 __kmp_printf(":");
00380
00381
00382
00383 KMP_MB();
00384
00385
00386 qs = curr_queue->tq_tail;
00387
00388 for ( count = 0; count < curr_queue->tq_nfull; ++count ) {
00389 __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk);
00390 qs = (qs+1) % curr_queue->tq_nslots;
00391 }
00392
00393
00394
00395 __kmp_printf("\n");
00396
00397 if (curr_queue->tq_first_child) {
00398
00399
00400 KMP_MB();
00401
00402
00403 if (curr_queue->tq_first_child) {
00404 for(queue = (kmpc_task_queue_t *)curr_queue->tq_first_child;
00405 queue != NULL;
00406 queue = queue->tq_next_child) {
00407 __kmp_aux_dump_task_queue_tree( tq, queue, level+1, global_tid );
00408 }
00409 }
00410
00411
00412 }
00413 }
00414
00415 static void
00416 __kmp_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *tqroot, kmp_int32 global_tid)
00417 {
00418 __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid);
00419
00420 __kmp_aux_dump_task_queue_tree( tq, tqroot, 0, global_tid );
00421
00422 __kmp_printf("\n");
00423 }
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433 static void *
00434 __kmp_taskq_allocate(size_t size, kmp_int32 global_tid)
00435 {
00436 void *addr, *orig_addr;
00437 size_t bytes;
00438
00439 KB_TRACE( 5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int) size, global_tid ) );
00440
00441 bytes = sizeof(void *) + CACHE_LINE + size;
00442
00443 #ifdef THREAD_ALLOC_FOR_TASKQ
00444 orig_addr = (void *) __kmp_thread_malloc( __kmp_thread_from_gtid(global_tid), bytes );
00445 #else
00446 KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", bytes ) );
00447 orig_addr = (void *) KMP_INTERNAL_MALLOC( bytes );
00448 #endif
00449
00450 if (orig_addr == 0)
00451 KMP_FATAL( OutOfHeapMemory );
00452
00453 addr = orig_addr;
00454
00455 if (((kmp_uintptr_t) addr & ( CACHE_LINE - 1 )) != 0) {
00456 KB_TRACE( 50, ("__kmp_taskq_allocate: adjust for cache alignment\n" ) );
00457 addr = (void *) (((kmp_uintptr_t) addr + CACHE_LINE) & ~( CACHE_LINE - 1 ));
00458 }
00459
00460 (* (void **) addr) = orig_addr;
00461
00462 KB_TRACE( 10, ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, gtid: %d\n",
00463 orig_addr, ((void **) addr) + 1, ((char *)(((void **) addr) + 1)) + size-1,
00464 (int) size, global_tid ));
00465
00466 return ( ((void **) addr) + 1 );
00467 }
00468
00469 static void
00470 __kmpc_taskq_free(void *p, kmp_int32 global_tid)
00471 {
00472 KB_TRACE( 5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid ) );
00473
00474 KB_TRACE(10, ("__kmpc_taskq_free: freeing: %p, gtid: %d\n", (*( ((void **) p)-1)), global_tid ));
00475
00476 #ifdef THREAD_ALLOC_FOR_TASKQ
00477 __kmp_thread_free( __kmp_thread_from_gtid(global_tid), *( ((void **) p)-1) );
00478 #else
00479 KMP_INTERNAL_FREE( *( ((void **) p)-1) );
00480 #endif
00481 }
00482
00483
00484
00485
00486
00487
00488
00489
00490 static kmpc_task_queue_t *
00491 __kmp_alloc_taskq ( kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots, kmp_int32 nthunks,
00492 kmp_int32 nshareds, kmp_int32 nproc, size_t sizeof_thunk,
00493 size_t sizeof_shareds, kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid )
00494 {
00495 kmp_int32 i;
00496 size_t bytes;
00497 kmpc_task_queue_t *new_queue;
00498 kmpc_aligned_shared_vars_t *shared_var_array;
00499 char *shared_var_storage;
00500 char *pt;
00501
00502 __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
00503
00504 KMP_MB();
00505
00506
00507 if( tq->tq_freelist ) {
00508 new_queue = tq -> tq_freelist;
00509 tq -> tq_freelist = tq -> tq_freelist -> tq.tq_next_free;
00510
00511 KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED);
00512
00513 new_queue->tq_flags = 0;
00514
00515 __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
00516 }
00517 else {
00518 __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
00519
00520 new_queue = (kmpc_task_queue_t *) __kmp_taskq_allocate (sizeof (kmpc_task_queue_t), global_tid);
00521 new_queue->tq_flags = 0;
00522 }
00523
00524
00525
00526
00527 sizeof_thunk += (CACHE_LINE - (sizeof_thunk % CACHE_LINE));
00528 pt = (char *) __kmp_taskq_allocate (nthunks * sizeof_thunk, global_tid);
00529 new_queue->tq_thunk_space = (kmpc_thunk_t *)pt;
00530 *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk);
00531
00532
00533
00534 new_queue->tq_free_thunks = (kmpc_thunk_t *)pt;
00535
00536 for (i = 0; i < (nthunks - 2); i++) {
00537 ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th.th_next_free = (kmpc_thunk_t *)(pt + (i+1)*sizeof_thunk);
00538 #ifdef KMP_DEBUG
00539 ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
00540 #endif
00541 }
00542
00543 ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th.th_next_free = NULL;
00544 #ifdef KMP_DEBUG
00545 ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th_flags = TQF_DEALLOCATED;
00546 #endif
00547
00548
00549
00550 if (in_parallel) {
00551 __kmp_init_lock( & new_queue->tq_link_lck );
00552 __kmp_init_lock( & new_queue->tq_free_thunks_lck );
00553 __kmp_init_lock( & new_queue->tq_queue_lck );
00554 }
00555
00556
00557
00558 bytes = nslots * sizeof (kmpc_aligned_queue_slot_t);
00559 new_queue->tq_queue = (kmpc_aligned_queue_slot_t *) __kmp_taskq_allocate( bytes, global_tid );
00560
00561
00562 sizeof_shareds += sizeof(kmpc_task_queue_t *);
00563 sizeof_shareds += (CACHE_LINE - (sizeof_shareds % CACHE_LINE));
00564
00565 bytes = nshareds * sizeof (kmpc_aligned_shared_vars_t);
00566 shared_var_array = (kmpc_aligned_shared_vars_t *) __kmp_taskq_allocate ( bytes, global_tid);
00567
00568 bytes = nshareds * sizeof_shareds;
00569 shared_var_storage = (char *) __kmp_taskq_allocate ( bytes, global_tid);
00570
00571 for (i=0; i<nshareds; i++) {
00572 shared_var_array[i].ai_data = (kmpc_shared_vars_t *) (shared_var_storage + i*sizeof_shareds);
00573 shared_var_array[i].ai_data->sv_queue = new_queue;
00574 }
00575 new_queue->tq_shareds = shared_var_array;
00576
00577
00578
00579
00580 if (in_parallel) {
00581 bytes = nproc * sizeof(kmpc_aligned_int32_t);
00582 new_queue->tq_th_thunks = (kmpc_aligned_int32_t *) __kmp_taskq_allocate ( bytes, global_tid);
00583 new_queue->tq_nproc = nproc;
00584
00585 for (i=0; i<nproc; i++)
00586 new_queue->tq_th_thunks[i].ai_data = 0;
00587 }
00588
00589 return new_queue;
00590 }
00591
00592 static void
00593 __kmp_free_taskq (kmp_taskq_t *tq, kmpc_task_queue_t *p, int in_parallel, kmp_int32 global_tid)
00594 {
00595 __kmpc_taskq_free(p->tq_thunk_space, global_tid);
00596 __kmpc_taskq_free(p->tq_queue, global_tid);
00597
00598
00599 __kmpc_taskq_free((void *) p->tq_shareds[0].ai_data, global_tid);
00600
00601
00602 __kmpc_taskq_free(p->tq_shareds, global_tid);
00603
00604 #ifdef KMP_DEBUG
00605 p->tq_first_child = NULL;
00606 p->tq_next_child = NULL;
00607 p->tq_prev_child = NULL;
00608 p->tq_ref_count = -10;
00609 p->tq_shareds = NULL;
00610 p->tq_tasknum_queuing = 0;
00611 p->tq_tasknum_serving = 0;
00612 p->tq_queue = NULL;
00613 p->tq_thunk_space = NULL;
00614 p->tq_taskq_slot = NULL;
00615 p->tq_free_thunks = NULL;
00616 p->tq_nslots = 0;
00617 p->tq_head = 0;
00618 p->tq_tail = 0;
00619 p->tq_nfull = 0;
00620 p->tq_hiwat = 0;
00621
00622 if (in_parallel) {
00623 int i;
00624
00625 for (i=0; i<p->tq_nproc; i++)
00626 p->tq_th_thunks[i].ai_data = 0;
00627 }
00628 if ( __kmp_env_consistency_check )
00629 p->tq_loc = NULL;
00630 KMP_DEBUG_ASSERT( p->tq_flags & TQF_DEALLOCATED );
00631 p->tq_flags = TQF_DEALLOCATED;
00632 #endif
00633
00634 if (in_parallel) {
00635 __kmpc_taskq_free(p->tq_th_thunks, global_tid);
00636 __kmp_destroy_lock(& p->tq_link_lck);
00637 __kmp_destroy_lock(& p->tq_queue_lck);
00638 __kmp_destroy_lock(& p->tq_free_thunks_lck);
00639 }
00640 #ifdef KMP_DEBUG
00641 p->tq_th_thunks = NULL;
00642 #endif
00643
00644 KMP_MB();
00645
00646
00647 __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid );
00648 p->tq.tq_next_free = tq->tq_freelist;
00649
00650 tq->tq_freelist = p;
00651 __kmp_release_lock( & tq->tq_freelist_lck, global_tid );
00652 }
00653
00654
00655
00656
00657
00658
00659
00660 static kmpc_thunk_t *
00661 __kmp_alloc_thunk (kmpc_task_queue_t *queue, int in_parallel, kmp_int32 global_tid)
00662 {
00663 kmpc_thunk_t *fl;
00664
00665 if (in_parallel) {
00666 __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
00667
00668 KMP_MB();
00669
00670 }
00671
00672 fl = queue->tq_free_thunks;
00673
00674 KMP_DEBUG_ASSERT (fl != NULL);
00675
00676 queue->tq_free_thunks = fl->th.th_next_free;
00677 fl->th_flags = 0;
00678
00679 if (in_parallel)
00680 __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
00681
00682 return fl;
00683 }
00684
00685 static void
00686 __kmp_free_thunk (kmpc_task_queue_t *queue, kmpc_thunk_t *p, int in_parallel, kmp_int32 global_tid)
00687 {
00688 #ifdef KMP_DEBUG
00689 p->th_task = 0;
00690 p->th_encl_thunk = 0;
00691 p->th_status = 0;
00692 p->th_tasknum = 0;
00693
00694 #endif
00695
00696 if (in_parallel) {
00697 __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid);
00698
00699 KMP_MB();
00700
00701 }
00702
00703 p->th.th_next_free = queue->tq_free_thunks;
00704 queue->tq_free_thunks = p;
00705
00706 #ifdef KMP_DEBUG
00707 p->th_flags = TQF_DEALLOCATED;
00708 #endif
00709
00710 if (in_parallel)
00711 __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid);
00712 }
00713
00714
00715
00716
00717
00718 static kmp_int32
00719 __kmp_enqueue_task ( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, kmpc_thunk_t *thunk, int in_parallel )
00720 {
00721 kmp_int32 ret;
00722
00723
00724 if (in_parallel) {
00725 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
00726
00727 KMP_MB();
00728
00729 }
00730
00731 KMP_DEBUG_ASSERT (queue->tq_nfull < queue->tq_nslots);
00732
00733 queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk;
00734
00735 if (queue->tq_head >= queue->tq_nslots)
00736 queue->tq_head = 0;
00737
00738 (queue->tq_nfull)++;
00739
00740 KMP_MB();
00741
00742 ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE;
00743
00744 if (in_parallel) {
00745
00746 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
00747
00748 if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
00749
00750
00751
00752
00753
00754
00755 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
00756
00757 KMP_MB();
00758
00759 __kmpc_end_barrier_master( NULL, global_tid);
00760 }
00761 }
00762
00763 return ret;
00764 }
00765
00766 static kmpc_thunk_t *
00767 __kmp_dequeue_task (kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel)
00768 {
00769 kmpc_thunk_t *pt;
00770 int tid = __kmp_tid_from_gtid( global_tid );
00771
00772 KMP_DEBUG_ASSERT (queue->tq_nfull > 0);
00773
00774 if (queue->tq.tq_parent != NULL && in_parallel) {
00775 int ct;
00776 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
00777 ct = ++(queue->tq_ref_count);
00778 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
00779 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
00780 __LINE__, global_tid, queue, ct));
00781 }
00782
00783 pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk;
00784
00785 if (queue->tq_tail >= queue->tq_nslots)
00786 queue->tq_tail = 0;
00787
00788 if (in_parallel) {
00789 queue->tq_th_thunks[tid].ai_data++;
00790
00791 KMP_MB();
00792
00793 KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding thunks from queue %p\n",
00794 global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue));
00795 }
00796
00797 (queue->tq_nfull)--;
00798
00799 #ifdef KMP_DEBUG
00800 KMP_MB();
00801
00802
00803
00804 KMP_DEBUG_ASSERT(queue->tq_nfull >= 0);
00805
00806 if (in_parallel) {
00807 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <= __KMP_TASKQ_THUNKS_PER_TH);
00808 }
00809 #endif
00810
00811 return pt;
00812 }
00813
00814
00815
00816
00817
00818
00819
00820
00821
00822
00823
00824
00825
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835
00836
00837
00838 static kmpc_thunk_t *
00839 __kmp_find_task_in_queue (kmp_int32 global_tid, kmpc_task_queue_t *queue)
00840 {
00841 kmpc_thunk_t *pt = NULL;
00842 int tid = __kmp_tid_from_gtid( global_tid );
00843
00844
00845 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
00846
00847 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
00848
00849
00850 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
00851
00852 KMP_MB();
00853
00854
00855 if ((queue->tq_taskq_slot != NULL) && (queue->tq_nfull <= queue->tq_hiwat)) {
00856
00857
00858 pt = (kmpc_thunk_t *) queue->tq_taskq_slot;
00859 queue->tq_taskq_slot = NULL;
00860 }
00861 else if (queue->tq_nfull == 0 ||
00862 queue->tq_th_thunks[tid].ai_data >= __KMP_TASKQ_THUNKS_PER_TH) {
00863
00864
00865
00866 pt = NULL;
00867 }
00868 else if (queue->tq_nfull > 1) {
00869
00870
00871 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
00872 }
00873 else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) {
00874
00875
00876 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
00877 }
00878 else if (queue->tq_flags & TQF_IS_LAST_TASK) {
00879
00880
00881
00882
00883 pt = __kmp_dequeue_task (global_tid, queue, TRUE);
00884 pt->th_flags |= TQF_IS_LAST_TASK;
00885 }
00886 }
00887
00888
00889 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
00890 }
00891
00892 return pt;
00893 }
00894
00895
00896
00897
00898
00899
00900
00901 static kmpc_thunk_t *
00902 __kmp_find_task_in_descendant_queue (kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
00903 {
00904 kmpc_thunk_t *pt = NULL;
00905 kmpc_task_queue_t *queue = curr_queue;
00906
00907 if (curr_queue->tq_first_child != NULL) {
00908 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
00909
00910 KMP_MB();
00911
00912
00913 queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
00914 if (queue == NULL) {
00915 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
00916 return NULL;
00917 }
00918
00919 while (queue != NULL) {
00920 int ct;
00921 kmpc_task_queue_t *next;
00922
00923 ct= ++(queue->tq_ref_count);
00924 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
00925 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
00926 __LINE__, global_tid, queue, ct));
00927
00928 pt = __kmp_find_task_in_queue (global_tid, queue);
00929
00930 if (pt != NULL) {
00931 int ct;
00932
00933 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
00934
00935 KMP_MB();
00936
00937
00938 ct = --(queue->tq_ref_count);
00939 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
00940 __LINE__, global_tid, queue, ct));
00941 KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
00942
00943 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
00944
00945 return pt;
00946 }
00947
00948
00949
00950
00951 pt = __kmp_find_task_in_descendant_queue (global_tid, queue);
00952
00953 if (pt != NULL) {
00954 int ct;
00955
00956 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
00957
00958 KMP_MB();
00959
00960
00961 ct = --(queue->tq_ref_count);
00962 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
00963 __LINE__, global_tid, queue, ct));
00964 KMP_DEBUG_ASSERT( ct >= 0 );
00965
00966 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
00967
00968 return pt;
00969 }
00970
00971 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
00972
00973 KMP_MB();
00974
00975
00976 next = queue->tq_next_child;
00977
00978 ct = --(queue->tq_ref_count);
00979 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
00980 __LINE__, global_tid, queue, ct));
00981 KMP_DEBUG_ASSERT( ct >= 0 );
00982
00983 queue = next;
00984 }
00985
00986 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
00987 }
00988
00989 return pt;
00990 }
00991
00992
00993
00994
00995
00996
00997
00998 static kmpc_thunk_t *
00999 __kmp_find_task_in_ancestor_queue (kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue)
01000 {
01001 kmpc_task_queue_t *queue;
01002 kmpc_thunk_t *pt;
01003
01004 pt = NULL;
01005
01006 if (curr_queue->tq.tq_parent != NULL) {
01007 queue = curr_queue->tq.tq_parent;
01008
01009 while (queue != NULL) {
01010 if (queue->tq.tq_parent != NULL) {
01011 int ct;
01012 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01013
01014 KMP_MB();
01015
01016
01017 ct = ++(queue->tq_ref_count);
01018 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01019 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
01020 __LINE__, global_tid, queue, ct));
01021 }
01022
01023 pt = __kmp_find_task_in_queue (global_tid, queue);
01024 if (pt != NULL) {
01025 if (queue->tq.tq_parent != NULL) {
01026 int ct;
01027 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01028
01029 KMP_MB();
01030
01031
01032 ct = --(queue->tq_ref_count);
01033 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
01034 __LINE__, global_tid, queue, ct));
01035 KMP_DEBUG_ASSERT( ct >= 0 );
01036
01037 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01038 }
01039
01040 return pt;
01041 }
01042
01043 if (queue->tq.tq_parent != NULL) {
01044 int ct;
01045 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01046
01047 KMP_MB();
01048
01049
01050 ct = --(queue->tq_ref_count);
01051 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
01052 __LINE__, global_tid, queue, ct));
01053 KMP_DEBUG_ASSERT( ct >= 0 );
01054 }
01055 queue = queue->tq.tq_parent;
01056
01057 if (queue != NULL)
01058 __kmp_release_lock(& queue->tq_link_lck, global_tid);
01059 }
01060
01061 }
01062
01063 pt = __kmp_find_task_in_descendant_queue( global_tid, tq->tq_root );
01064
01065 return pt;
01066 }
01067
01068 static int
01069 __kmp_taskq_tasks_finished (kmpc_task_queue_t *queue)
01070 {
01071 int i;
01072
01073
01074
01075 for (i=0; i<queue->tq_nproc; i++) {
01076 if (queue->tq_th_thunks[i].ai_data != 0)
01077 return FALSE;
01078 }
01079
01080 return TRUE;
01081 }
01082
01083 static int
01084 __kmp_taskq_has_any_children (kmpc_task_queue_t *queue)
01085 {
01086 return (queue->tq_first_child != NULL);
01087 }
01088
01089 static void
01090 __kmp_remove_queue_from_tree( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel )
01091 {
01092 #ifdef KMP_DEBUG
01093 kmp_int32 i;
01094 kmpc_thunk_t *thunk;
01095 #endif
01096
01097 KF_TRACE(50, ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
01098 KF_DUMP(50, __kmp_dump_task_queue( tq, queue, global_tid ));
01099
01100
01101 KMP_DEBUG_ASSERT (queue->tq.tq_parent != NULL);
01102
01103 if (in_parallel) {
01104 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01105
01106 KMP_MB();
01107
01108 }
01109
01110 KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
01111
01112
01113 if (queue->tq_prev_child != NULL)
01114 queue->tq_prev_child->tq_next_child = queue->tq_next_child;
01115 if (queue->tq_next_child != NULL)
01116 queue->tq_next_child->tq_prev_child = queue->tq_prev_child;
01117 if (queue->tq.tq_parent->tq_first_child == queue)
01118 queue->tq.tq_parent->tq_first_child = queue->tq_next_child;
01119
01120 queue->tq_prev_child = NULL;
01121 queue->tq_next_child = NULL;
01122
01123 if (in_parallel) {
01124 kmp_uint32 spins;
01125
01126 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n",
01127 __LINE__, global_tid, queue, queue->tq_ref_count));
01128
01129
01130 while (queue->tq_ref_count > 1) {
01131 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01132
01133 KMP_WAIT_YIELD((volatile kmp_uint32*)&queue->tq_ref_count, 1, KMP_LE, NULL);
01134
01135 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01136
01137 KMP_MB();
01138
01139 }
01140
01141 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01142 }
01143
01144 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p freeing queue\n",
01145 __LINE__, global_tid, queue));
01146
01147 #ifdef KMP_DEBUG
01148 KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED);
01149 KMP_DEBUG_ASSERT(queue->tq_nfull == 0);
01150
01151 for (i=0; i<queue->tq_nproc; i++) {
01152 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
01153 }
01154
01155 i = 0;
01156 for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
01157 ++i;
01158
01159 KMP_ASSERT (i == queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH));
01160 #endif
01161
01162
01163 __kmp_free_taskq ( tq, queue, TRUE, global_tid );
01164
01165 KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid));
01166 KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
01167 }
01168
01169
01170
01171
01172
01173
01174
01175
01176 static void
01177 __kmp_find_and_remove_finished_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue )
01178 {
01179 kmpc_task_queue_t *queue = curr_queue;
01180
01181 if (curr_queue->tq_first_child != NULL) {
01182 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
01183
01184 KMP_MB();
01185
01186
01187 queue = (kmpc_task_queue_t *) curr_queue->tq_first_child;
01188 if (queue != NULL) {
01189 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
01190 return;
01191 }
01192
01193 while (queue != NULL) {
01194 kmpc_task_queue_t *next;
01195 int ct = ++(queue->tq_ref_count);
01196 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n",
01197 __LINE__, global_tid, queue, ct));
01198
01199
01200
01201
01202
01203
01204 if (queue->tq_flags & TQF_IS_NOWAIT) {
01205 __kmp_find_and_remove_finished_child_taskq ( tq, global_tid, queue );
01206
01207 if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) && (queue->tq_nfull == 0) &&
01208 __kmp_taskq_tasks_finished(queue) && ! __kmp_taskq_has_any_children(queue)) {
01209
01210
01211
01212
01213
01214
01215 if ( __kmp_test_lock(& queue->tq_queue_lck, global_tid) ) {
01216 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
01217 queue->tq_flags |= TQF_DEALLOCATED;
01218 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
01219
01220 __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
01221
01222
01223 return;
01224 }
01225 else {
01226 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
01227 }
01228 }
01229
01230 }
01231 }
01232
01233 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
01234
01235 KMP_MB();
01236
01237
01238 next = queue->tq_next_child;
01239
01240 ct = --(queue->tq_ref_count);
01241 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
01242 __LINE__, global_tid, queue, ct));
01243 KMP_DEBUG_ASSERT( ct >= 0 );
01244
01245 queue = next;
01246 }
01247
01248 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
01249 }
01250 }
01251
01252
01253
01254
01255
01256
01257
01258 static void
01259 __kmp_remove_all_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue )
01260 {
01261 kmpc_task_queue_t *next_child;
01262
01263 queue = (kmpc_task_queue_t *) queue->tq_first_child;
01264
01265 while (queue != NULL) {
01266 __kmp_remove_all_child_taskq ( tq, global_tid, queue );
01267
01268 next_child = queue->tq_next_child;
01269 queue->tq_flags |= TQF_DEALLOCATED;
01270 __kmp_remove_queue_from_tree ( tq, global_tid, queue, FALSE );
01271 queue = next_child;
01272 }
01273 }
01274
01275 static void
01276 __kmp_execute_task_from_queue( kmp_taskq_t *tq, ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, int in_parallel )
01277 {
01278 kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue;
01279 kmp_int32 tid = __kmp_tid_from_gtid( global_tid );
01280
01281 KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid));
01282 KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
01283 KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid));
01284 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
01285
01286
01287
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299
01300 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
01301 kmp_int32 index = (queue == tq->tq_root) ? tid : 0;
01302 thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[index].ai_data;
01303
01304 if ( __kmp_env_consistency_check ) {
01305 __kmp_push_workshare( global_tid,
01306 (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
01307 queue->tq_loc );
01308 }
01309 }
01310 else {
01311 if ( __kmp_env_consistency_check )
01312 __kmp_push_workshare( global_tid, ct_taskq, queue->tq_loc );
01313 }
01314
01315 if (in_parallel) {
01316 thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
01317 tq->tq_curr_thunk[tid] = thunk;
01318
01319 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
01320 }
01321
01322 KF_TRACE( 50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
01323 thunk->th_task (global_tid, thunk);
01324 KF_TRACE( 50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid));
01325
01326 if (!(thunk->th_flags & TQF_TASKQ_TASK)) {
01327 if ( __kmp_env_consistency_check )
01328 __kmp_pop_workshare( global_tid, (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task,
01329 queue->tq_loc );
01330
01331 if (in_parallel) {
01332 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
01333 thunk->th_encl_thunk = NULL;
01334 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
01335 }
01336
01337 if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) {
01338 __kmp_taskq_check_ordered(global_tid, thunk);
01339 }
01340
01341 __kmp_free_thunk (queue, thunk, in_parallel, global_tid);
01342
01343 KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n", global_tid, thunk));
01344 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
01345
01346 if (in_parallel) {
01347 KMP_MB();
01348
01349 KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1);
01350
01351 KF_TRACE( 200, ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n",
01352 global_tid, queue->tq_th_thunks[tid].ai_data-1, queue));
01353
01354 queue->tq_th_thunks[tid].ai_data--;
01355
01356
01357 }
01358
01359 if (queue->tq.tq_parent != NULL && in_parallel) {
01360 int ct;
01361 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01362 ct = --(queue->tq_ref_count);
01363 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01364 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n",
01365 __LINE__, global_tid, queue, ct));
01366 KMP_DEBUG_ASSERT( ct >= 0 );
01367 }
01368 }
01369 }
01370
01371
01372
01373
01374
01375
01376 kmpc_thunk_t *
01377 __kmpc_taskq( ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task,
01378 size_t sizeof_thunk, size_t sizeof_shareds,
01379 kmp_int32 flags, kmpc_shared_vars_t **shareds )
01380 {
01381 int in_parallel;
01382 kmp_int32 nslots, nthunks, nshareds, nproc;
01383 kmpc_task_queue_t *new_queue, *curr_queue;
01384 kmpc_thunk_t *new_taskq_thunk;
01385 kmp_info_t *th;
01386 kmp_team_t *team;
01387 kmp_taskq_t *tq;
01388 kmp_int32 tid;
01389
01390 KE_TRACE( 10, ("__kmpc_taskq called (%d)\n", global_tid));
01391
01392 th = __kmp_threads[ global_tid ];
01393 team = th -> th.th_team;
01394 tq = & team -> t.t_taskq;
01395 nproc = team -> t.t_nproc;
01396 tid = __kmp_tid_from_gtid( global_tid );
01397
01398
01399 in_parallel = in_parallel_context( team );
01400
01401 if( ! tq->tq_root ) {
01402 if (in_parallel) {
01403
01404 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
01405
01406
01407 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
01408 }
01409
01410 if (in_parallel) {
01411
01412
01413 if ( __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
01414
01415
01416
01417
01418
01419
01420 *shareds = (kmpc_shared_vars_t *) tq->tq_root->tq_shareds[tid].ai_data;
01421
01422 KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid));
01423
01424 return NULL;
01425 }
01426 }
01427
01428
01429
01430 if( tq->tq_curr_thunk_capacity < nproc ) {
01431 int i;
01432
01433 if(tq->tq_curr_thunk)
01434 __kmp_free(tq->tq_curr_thunk);
01435 else {
01436
01437 __kmp_init_lock( & tq->tq_freelist_lck );
01438 }
01439
01440 tq->tq_curr_thunk = (kmpc_thunk_t **) __kmp_allocate( nproc * sizeof(kmpc_thunk_t *) );
01441 tq -> tq_curr_thunk_capacity = nproc;
01442 }
01443
01444 if (in_parallel)
01445 tq->tq_global_flags = TQF_RELEASE_WORKERS;
01446 }
01447
01448
01449
01450
01451 nslots = (in_parallel) ? (2 * nproc) : 1;
01452
01453
01454
01455
01456 nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1) : nslots + 2;
01457
01458
01459
01460
01461 nshareds = ( !tq->tq_root && in_parallel) ? nproc : 1;
01462
01463
01464
01465 new_queue = __kmp_alloc_taskq ( tq, in_parallel, nslots, nthunks, nshareds, nproc,
01466 sizeof_thunk, sizeof_shareds, &new_taskq_thunk, global_tid );
01467
01468
01469
01470 new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS;
01471
01472 if (in_parallel) {
01473 new_queue->tq_tasknum_queuing = 0;
01474 new_queue->tq_tasknum_serving = 0;
01475 new_queue->tq_flags |= TQF_PARALLEL_CONTEXT;
01476 }
01477
01478 new_queue->tq_taskq_slot = NULL;
01479 new_queue->tq_nslots = nslots;
01480 new_queue->tq_hiwat = HIGH_WATER_MARK (nslots);
01481 new_queue->tq_nfull = 0;
01482 new_queue->tq_head = 0;
01483 new_queue->tq_tail = 0;
01484 new_queue->tq_loc = loc;
01485
01486 if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) {
01487
01488 new_queue->tq_tasknum_serving = 1;
01489
01490
01491 th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo;
01492
01493
01494 th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo;
01495 }
01496
01497
01498 *shareds = (kmpc_shared_vars_t *) new_queue->tq_shareds[0].ai_data;
01499
01500 new_taskq_thunk->th.th_shareds = *shareds;
01501 new_taskq_thunk->th_task = taskq_task;
01502 new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK;
01503 new_taskq_thunk->th_status = 0;
01504
01505 KMP_DEBUG_ASSERT (new_taskq_thunk->th_flags & TQF_TASKQ_TASK);
01506
01507
01508
01509
01510
01511 if (in_parallel) {
01512 if( ! tq->tq_root ) {
01513 new_queue->tq.tq_parent = NULL;
01514 new_queue->tq_first_child = NULL;
01515 new_queue->tq_next_child = NULL;
01516 new_queue->tq_prev_child = NULL;
01517 new_queue->tq_ref_count = 1;
01518 tq->tq_root = new_queue;
01519 }
01520 else {
01521 curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue;
01522 new_queue->tq.tq_parent = curr_queue;
01523 new_queue->tq_first_child = NULL;
01524 new_queue->tq_prev_child = NULL;
01525 new_queue->tq_ref_count = 1;
01526
01527 KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n",
01528 __LINE__, global_tid, new_queue, new_queue->tq_ref_count));
01529
01530 __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid);
01531
01532 KMP_MB();
01533
01534
01535 new_queue->tq_next_child = (struct kmpc_task_queue_t *) curr_queue->tq_first_child;
01536
01537 if (curr_queue->tq_first_child != NULL)
01538 curr_queue->tq_first_child->tq_prev_child = new_queue;
01539
01540 curr_queue->tq_first_child = new_queue;
01541
01542 __kmp_release_lock(& curr_queue->tq_link_lck, global_tid);
01543 }
01544
01545
01546 new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid];
01547 tq->tq_curr_thunk[tid] = new_taskq_thunk;
01548
01549 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
01550 }
01551 else {
01552 new_taskq_thunk->th_encl_thunk = 0;
01553 new_queue->tq.tq_parent = NULL;
01554 new_queue->tq_first_child = NULL;
01555 new_queue->tq_next_child = NULL;
01556 new_queue->tq_prev_child = NULL;
01557 new_queue->tq_ref_count = 1;
01558 }
01559
01560 #ifdef KMP_DEBUG
01561 KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid));
01562 KF_DUMP(150, __kmp_dump_thunk( tq, new_taskq_thunk, global_tid ));
01563
01564 if (in_parallel) {
01565 KF_TRACE(25, ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
01566 } else {
01567 KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue, global_tid));
01568 }
01569
01570 KF_DUMP(25, __kmp_dump_task_queue( tq, new_queue, global_tid ));
01571
01572 if (in_parallel) {
01573 KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
01574 }
01575 #endif
01576
01577 if ( __kmp_env_consistency_check )
01578 __kmp_push_workshare( global_tid, ct_taskq, new_queue->tq_loc );
01579
01580 KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid));
01581
01582 return new_taskq_thunk;
01583 }
01584
01585
01586
01587
01588 void
01589 __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk)
01590 {
01591 #ifdef KMP_DEBUG
01592 kmp_int32 i;
01593 #endif
01594 kmp_taskq_t *tq;
01595 int in_parallel;
01596 kmp_info_t *th;
01597 kmp_int32 is_outermost;
01598 kmpc_task_queue_t *queue;
01599 kmpc_thunk_t *thunk;
01600 int nproc;
01601
01602 KE_TRACE( 10, ("__kmpc_end_taskq called (%d)\n", global_tid));
01603
01604 tq = & __kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
01605 nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc;
01606
01607
01608 queue = (taskq_thunk == NULL) ? tq->tq_root : taskq_thunk->th.th_shareds->sv_queue;
01609
01610 KE_TRACE( 50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid));
01611 is_outermost = (queue == tq->tq_root);
01612 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
01613
01614 if (in_parallel) {
01615 kmp_uint32 spins;
01616
01617
01618
01619
01620 if (is_outermost && (KMP_MASTER_GTID( global_tid ))) {
01621 if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) {
01622
01623 tq->tq_global_flags &= ~TQF_RELEASE_WORKERS;
01624
01625 __kmp_end_split_barrier( bs_plain_barrier, global_tid );
01626 }
01627 }
01628
01629
01630
01631 do {
01632
01633 KMP_INIT_YIELD(spins);
01634
01635 while ( (queue->tq_nfull == 0)
01636 && (queue->tq_taskq_slot == NULL)
01637 && (! __kmp_taskq_has_any_children(queue) )
01638 && (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED) )
01639 ) {
01640 __kmp_static_delay( 1 );
01641 KMP_YIELD_WHEN( TRUE, spins );
01642 }
01643
01644
01645 while ( ( (queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL) )
01646 && (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL
01647 ) {
01648 KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk, queue, global_tid));
01649 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
01650 }
01651
01652
01653 if ( (__kmp_taskq_has_any_children(queue))
01654 && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
01655 ) {
01656
01657 KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
01658 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid ));
01659
01660 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
01661 }
01662
01663 } while ( (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED))
01664 || (queue->tq_nfull != 0)
01665 );
01666
01667 KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue, global_tid));
01668
01669
01670
01671
01672 while ( (!__kmp_taskq_tasks_finished(queue))
01673 && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL
01674 ) {
01675
01676 KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
01677 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
01678
01679 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
01680 }
01681
01682 KF_TRACE(50, ("No work found in descendent queues or all work finished in queue: %p (%d)\n", queue, global_tid));
01683
01684 if (!is_outermost) {
01685
01686
01687 if (queue->tq_flags & TQF_IS_NOWAIT) {
01688 __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01689 queue->tq_ref_count--;
01690 KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 );
01691 __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid);
01692
01693 KE_TRACE( 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid));
01694
01695 return;
01696 }
01697
01698 __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
01699
01700
01701 KMP_INIT_YIELD(spins);
01702
01703 while (!__kmp_taskq_tasks_finished(queue) || __kmp_taskq_has_any_children(queue)) {
01704 thunk = __kmp_find_task_in_ancestor_queue( tq, global_tid, queue );
01705
01706 if (thunk != NULL) {
01707 KF_TRACE(50, ("Stole thunk: %p in ancestor queue: %p while waiting in queue: %p (%d)\n",
01708 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
01709 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
01710 }
01711
01712 KMP_YIELD_WHEN( thunk == NULL, spins );
01713
01714 __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue );
01715 }
01716
01717 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
01718 if ( !(queue->tq_flags & TQF_DEALLOCATED) ) {
01719 queue->tq_flags |= TQF_DEALLOCATED;
01720 }
01721 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
01722
01723
01724 if (taskq_thunk != NULL) {
01725 __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE );
01726 }
01727
01728 KE_TRACE( 10, ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n", global_tid));
01729
01730 return;
01731 }
01732
01733
01734
01735 KMP_INIT_YIELD(spins);
01736
01737 while (!__kmp_taskq_tasks_finished(queue)) {
01738 thunk = __kmp_find_task_in_descendant_queue(global_tid, queue);
01739
01740 if (thunk != NULL) {
01741 KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n",
01742 thunk, thunk->th.th_shareds->sv_queue, queue, global_tid));
01743
01744 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
01745 }
01746
01747 KMP_YIELD_WHEN( thunk == NULL, spins );
01748 }
01749
01750
01751
01752
01753 if ( !__kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) {
01754
01755
01756
01757
01758 __kmp_remove_all_child_taskq( tq, global_tid, queue );
01759
01760
01761 KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n", global_tid, queue ));
01762 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
01763
01764 #ifdef KMP_DEBUG
01765
01766 KMP_DEBUG_ASSERT ((queue->tq.tq_parent == NULL) && (queue->tq_next_child == NULL));
01767
01768
01769 KMP_DEBUG_ASSERT (queue->tq_first_child == NULL);
01770
01771 for (i=0; i<nproc; i++) {
01772 KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0);
01773 }
01774
01775 for (i=0, thunk=queue->tq_free_thunks; thunk != NULL; i++, thunk=thunk->th.th_next_free);
01776
01777 KMP_DEBUG_ASSERT (i == queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH));
01778
01779 for (i = 0; i < nproc; i++) {
01780 KMP_DEBUG_ASSERT( ! tq->tq_curr_thunk[i] );
01781 }
01782 #endif
01783
01784 tq -> tq_root = NULL;
01785
01786
01787 KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue, global_tid));
01788
01789 queue->tq_flags |= TQF_DEALLOCATED;
01790 __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
01791
01792 KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid ));
01793
01794
01795 __kmp_end_split_barrier( bs_plain_barrier, global_tid );
01796 }
01797
01798 th = __kmp_threads[ global_tid ];
01799
01800
01801 th->th.th_dispatch->th_deo_fcn = 0;
01802
01803
01804 th->th.th_dispatch->th_dxo_fcn = 0;
01805 }
01806 else {
01807
01808
01809
01810 if (queue->tq_nfull > 0) {
01811 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
01812
01813 thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
01814
01815 if (queue->tq_flags & TQF_IS_LAST_TASK) {
01816
01817
01818
01819
01820
01821 thunk->th_flags |= TQF_IS_LAST_TASK;
01822 }
01823
01824 KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, thunk, queue));
01825
01826 __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel );
01827 }
01828
01829
01830 KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n", queue, global_tid));
01831 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
01832
01833 #ifdef KMP_DEBUG
01834 i = 0;
01835 for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free)
01836 ++i;
01837 KMP_DEBUG_ASSERT (i == queue->tq_nslots + 1);
01838 #endif
01839
01840 KF_TRACE(50, ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid));
01841
01842 queue->tq_flags |= TQF_DEALLOCATED;
01843 __kmp_free_taskq ( tq, queue, in_parallel, global_tid );
01844 }
01845
01846 KE_TRACE( 10, ("__kmpc_end_taskq return (%d)\n", global_tid));
01847 }
01848
01849
01850
01851
01852 kmp_int32
01853 __kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
01854 {
01855 kmp_int32 ret;
01856 kmpc_task_queue_t *queue;
01857 int in_parallel;
01858 kmp_taskq_t *tq;
01859
01860 KE_TRACE( 10, ("__kmpc_task called (%d)\n", global_tid));
01861
01862 KMP_DEBUG_ASSERT (!(thunk->th_flags & TQF_TASKQ_TASK));
01863
01864 tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
01865 queue = thunk->th.th_shareds->sv_queue;
01866 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
01867
01868 if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED))
01869 thunk->th_tasknum = ++queue->tq_tasknum_queuing;
01870
01871
01872
01873
01874 if (!in_parallel && queue->tq_nfull > 0) {
01875 kmpc_thunk_t *prev_thunk;
01876
01877 KMP_DEBUG_ASSERT(queue->tq_nfull == 1);
01878
01879 prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel);
01880
01881 KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, prev_thunk, queue));
01882
01883 __kmp_execute_task_from_queue( tq, loc, global_tid, prev_thunk, in_parallel );
01884 }
01885
01886
01887
01888
01889
01890
01891 KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid));
01892 KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
01893
01894 ret = __kmp_enqueue_task ( tq, global_tid, queue, thunk, in_parallel );
01895
01896 KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid));
01897 KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid ));
01898
01899 KE_TRACE( 10, ("__kmpc_task return (%d)\n", global_tid));
01900
01901 return ret;
01902 }
01903
01904
01905
01906
01907 void
01908 __kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status)
01909 {
01910 kmpc_task_queue_t *queue;
01911 kmp_taskq_t *tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
01912 int tid = __kmp_tid_from_gtid( global_tid );
01913
01914 KE_TRACE( 10, ("__kmpc_taskq_task called (%d)\n", global_tid));
01915 KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid));
01916 KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid ));
01917
01918 queue = thunk->th.th_shareds->sv_queue;
01919
01920 if ( __kmp_env_consistency_check )
01921 __kmp_pop_workshare( global_tid, ct_taskq, loc );
01922
01923
01924 KMP_DEBUG_ASSERT (thunk->th_flags & TQF_TASKQ_TASK);
01925
01926
01927 KMP_DEBUG_ASSERT (queue->tq_taskq_slot == NULL);
01928
01929
01930 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
01931 thunk->th_encl_thunk = NULL;
01932
01933 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
01934
01935 thunk->th_status = status;
01936
01937 KMP_MB();
01938
01939
01940
01941
01942
01943 queue->tq_taskq_slot = thunk;
01944
01945 KE_TRACE( 10, ("__kmpc_taskq_task return (%d)\n", global_tid));
01946 }
01947
01948
01949
01950 void
01951 __kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk)
01952 {
01953 kmp_taskq_t *tq;
01954 kmpc_task_queue_t *queue;
01955 int in_parallel;
01956 int tid;
01957
01958 KE_TRACE( 10, ("__kmpc_end_taskq_task called (%d)\n", global_tid));
01959
01960 tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
01961 queue = thunk->th.th_shareds->sv_queue;
01962 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
01963 tid = __kmp_tid_from_gtid( global_tid );
01964
01965 if ( __kmp_env_consistency_check )
01966 __kmp_pop_workshare( global_tid, ct_taskq, loc );
01967
01968 if (in_parallel) {
01969 #if KMP_ARCH_X86 || \
01970 KMP_ARCH_X86_64
01971
01972 __kmp_test_then_or32( &queue->tq_flags, (kmp_int32) TQF_ALL_TASKS_QUEUED );
01973 #else
01974 {
01975 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
01976
01977 KMP_MB();
01978
01979
01980 queue->tq_flags |= TQF_ALL_TASKS_QUEUED;
01981
01982 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
01983 }
01984 #endif
01985 }
01986
01987 if (thunk->th_flags & TQF_IS_LASTPRIVATE) {
01988
01989
01990
01991
01992
01993
01994
01995
01996
01997 if (! in_parallel) {
01998
01999 queue->tq_flags |= TQF_IS_LAST_TASK;
02000 }
02001 else {
02002 #if KMP_ARCH_X86 || \
02003 KMP_ARCH_X86_64
02004
02005 __kmp_test_then_or32( &queue->tq_flags, (kmp_int32) TQF_IS_LAST_TASK );
02006 #else
02007 {
02008 __kmp_acquire_lock(& queue->tq_queue_lck, global_tid);
02009
02010 KMP_MB();
02011
02012
02013 queue->tq_flags |= TQF_IS_LAST_TASK;
02014
02015 __kmp_release_lock(& queue->tq_queue_lck, global_tid);
02016 }
02017 #endif
02018
02019
02020 KMP_MB();
02021 }
02022 }
02023
02024
02025 if (in_parallel) {
02026 tq->tq_curr_thunk[tid] = thunk->th_encl_thunk;
02027 thunk->th_encl_thunk = NULL;
02028
02029 KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid ));
02030 }
02031
02032 KE_TRACE( 10, ("__kmpc_end_taskq_task return (%d)\n", global_tid));
02033 }
02034
02035
02036
02037
02038 kmpc_thunk_t *
02039 __kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task)
02040 {
02041 kmp_taskq_t *tq;
02042 kmpc_task_queue_t *queue;
02043 kmpc_thunk_t *new_thunk;
02044 int in_parallel;
02045
02046 KE_TRACE( 10, ("__kmpc_task_buffer called (%d)\n", global_tid));
02047
02048 KMP_DEBUG_ASSERT (taskq_thunk->th_flags & TQF_TASKQ_TASK);
02049
02050 tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq;
02051 queue = taskq_thunk->th.th_shareds->sv_queue;
02052 in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT);
02053
02054
02055
02056
02057
02058
02059
02060 new_thunk = __kmp_alloc_thunk (queue, in_parallel, global_tid);
02061 new_thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[0].ai_data;
02062 new_thunk->th_encl_thunk = NULL;
02063 new_thunk->th_task = task;
02064
02065
02066 new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS;
02067
02068 new_thunk->th_status = 0;
02069
02070 KMP_DEBUG_ASSERT (!(new_thunk->th_flags & TQF_TASKQ_TASK));
02071
02072 KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid));
02073 KF_DUMP(100, __kmp_dump_thunk( tq, new_thunk, global_tid ));
02074
02075 KE_TRACE( 10, ("__kmpc_task_buffer return (%d)\n", global_tid));
02076
02077 return new_thunk;
02078 }
02079
02080