team.c

Go to the documentation of this file.
00001 /* Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
00002    Contributed by Richard Henderson <rth@redhat.com>.
00003 
00004    This file is part of the GNU OpenMP Library (libgomp).
00005 
00006    Libgomp is free software; you can redistribute it and/or modify it
00007    under the terms of the GNU General Public License as published by
00008    the Free Software Foundation; either version 3, or (at your option)
00009    any later version.
00010 
00011    Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
00012    WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00013    FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
00014    more details.
00015 
00016    Under Section 7 of GPL version 3, you are granted additional
00017    permissions described in the GCC Runtime Library Exception, version
00018    3.1, as published by the Free Software Foundation.
00019 
00020    You should have received a copy of the GNU General Public License and
00021    a copy of the GCC Runtime Library Exception along with this program;
00022    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023    <http://www.gnu.org/licenses/>.  */
00024 
00025 /* This file handles the maintainence of threads in response to team
00026    creation and termination.  */
00027 
00028 #include "libgomp.h"
00029 #include <stdlib.h>
00030 #include <string.h>
00031 
00032 /* This attribute contains PTHREAD_CREATE_DETACHED.  */
00033 pthread_attr_t gomp_thread_attr;
00034 
00035 /* This key is for the thread destructor.  */
00036 pthread_key_t gomp_thread_destructor;
00037 
00038 
00039 /* This is the libgomp per-thread data structure.  */
00040 #ifdef HAVE_TLS
00041 __thread struct gomp_thread gomp_tls_data;
00042 #else
00043 pthread_key_t gomp_tls_key;
00044 #endif
00045 
00046 
00047 /* This structure is used to communicate across pthread_create.  */
00048 
00049 struct gomp_thread_start_data
00050 {
00051   void (*fn) (void *);
00052   void *fn_data;
00053   struct gomp_team_state ts;
00054   struct gomp_task *task;
00055   struct gomp_thread_pool *thread_pool;
00056   bool nested;
00057 };
00058 
00059 /*
00060  * ----------------------------------------------------------------- start
00061  * HPCToolkit addition for correctness tool
00062  * ----------------------------------------------------------------- start
00063  */
00064 
00065 void 
00066 ompc_team_start( uint loop_bounds )
00067 __attribute__ ((weak)) ;
00068 
00069 void 
00070 ompc_team_end()
00071 __attribute__ ((weak)); 
00072 /*
00073  * ----------------------------------------------------------------- end
00074  * HPCToolkit addition for correctness tool
00075  * ----------------------------------------------------------------- end
00076  */
00077 
00078 
00079 /* This function is a pthread_create entry point.  This contains the idle
00080    loop in which a thread waits to be called up to become part of a team.  */
00081 
00082 static void *
00083 gomp_thread_start (void *xdata)
00084 {
00085   struct gomp_thread_start_data *data = xdata;
00086   struct gomp_thread *thr;
00087   struct gomp_thread_pool *pool;
00088   void (*local_fn) (void *);
00089   void *local_data;
00090 
00091 #ifdef HAVE_TLS
00092   thr = &gomp_tls_data;
00093 #else
00094   struct gomp_thread local_thr;
00095   thr = &local_thr;
00096   pthread_setspecific (gomp_tls_key, thr);
00097 #endif
00098   gomp_sem_init (&thr->release, 0);
00099 
00100   /* Extract what we need from data.  */
00101   local_fn = data->fn;
00102   local_data = data->fn_data;
00103   thr->thread_pool = data->thread_pool;
00104   thr->ts = data->ts;
00105   thr->task = data->task;
00106 
00107   thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release;
00108 
00109   /* Make thread pool local. */
00110   pool = thr->thread_pool;
00111 
00112   if (data->nested)
00113     {
00114       struct gomp_team *team = thr->ts.team;
00115       struct gomp_task *task = thr->task;
00116 
00117       gomp_barrier_wait (&team->barrier);
00118 
00119       local_fn (local_data);
00120       gomp_team_barrier_wait (&team->barrier);
00121       gomp_finish_task (task);
00122       gomp_barrier_wait_last (&team->barrier);
00123     }
00124   else
00125     {
00126       pool->threads[thr->ts.team_id] = thr;
00127 
00128       gomp_barrier_wait (&pool->threads_dock);
00129       do
00130     {
00131       struct gomp_team *team = thr->ts.team;
00132       struct gomp_task *task = thr->task;
00133 
00134       local_fn (local_data);
00135       gomp_team_barrier_wait (&team->barrier);
00136       gomp_finish_task (task);
00137 
00138       gomp_barrier_wait (&pool->threads_dock);
00139 
00140       local_fn = thr->fn;
00141       local_data = thr->data;
00142       thr->fn = NULL;
00143     }
00144       while (local_fn);
00145     }
00146 
00147   gomp_sem_destroy (&thr->release);
00148   return NULL;
00149 }
00150 
00151 
00152 /* Create a new team data structure.  */
00153 
00154 struct gomp_team *
00155 gomp_new_team (unsigned nthreads)
00156 {
00157   struct gomp_team *team;
00158   size_t size;
00159   int i;
00160 
00161   size = sizeof (*team) + nthreads * (sizeof (team->ordered_release[0])
00162                       + sizeof (team->implicit_task[0]));
00163   team = gomp_malloc (size);
00164 
00165   team->work_share_chunk = 8;
00166 #ifdef HAVE_SYNC_BUILTINS
00167   team->single_count = 0;
00168 #else
00169   gomp_mutex_init (&team->work_share_list_free_lock);
00170 #endif
00171   gomp_init_work_share (&team->work_shares[0], false, nthreads);
00172   team->work_shares[0].next_alloc = NULL;
00173   team->work_share_list_free = NULL;
00174   team->work_share_list_alloc = &team->work_shares[1];
00175   for (i = 1; i < 7; i++)
00176     team->work_shares[i].next_free = &team->work_shares[i + 1];
00177   team->work_shares[i].next_free = NULL;
00178 
00179   team->nthreads = nthreads;
00180   gomp_barrier_init (&team->barrier, nthreads);
00181 
00182   gomp_sem_init (&team->master_release, 0);
00183   team->ordered_release = (void *) &team->implicit_task[nthreads];
00184   team->ordered_release[0] = &team->master_release;
00185 
00186   gomp_mutex_init (&team->task_lock);
00187   team->task_queue = NULL;
00188   team->task_count = 0;
00189   team->task_running_count = 0;
00190 
00191   return team;
00192 }
00193 
00194 
00195 /* Free a team data structure.  */
00196 
00197 static void
00198 free_team (struct gomp_team *team)
00199 {
00200   gomp_barrier_destroy (&team->barrier);
00201   gomp_mutex_destroy (&team->task_lock);
00202   free (team);
00203 }
00204 
00205 /* Allocate and initialize a thread pool. */
00206 
00207 static struct gomp_thread_pool *gomp_new_thread_pool (void)
00208 {
00209   struct gomp_thread_pool *pool
00210     = gomp_malloc (sizeof(struct gomp_thread_pool));
00211   pool->threads = NULL;
00212   pool->threads_size = 0;
00213   pool->threads_used = 0;
00214   pool->last_team = NULL;
00215   return pool;
00216 }
00217 
00218 static void
00219 gomp_free_pool_helper (void *thread_pool)
00220 {
00221   struct gomp_thread_pool *pool
00222     = (struct gomp_thread_pool *) thread_pool;
00223   gomp_barrier_wait_last (&pool->threads_dock);
00224   gomp_sem_destroy (&gomp_thread ()->release);
00225   pthread_exit (NULL);
00226 }
00227 
00228 /* Free a thread pool and release its threads. */
00229 
00230 static void
00231 gomp_free_thread (void *arg __attribute__((unused)))
00232 {
00233   struct gomp_thread *thr = gomp_thread ();
00234   struct gomp_thread_pool *pool = thr->thread_pool;
00235   if (pool)
00236     {
00237       if (pool->threads_used > 0)
00238     {
00239       int i;
00240       for (i = 1; i < pool->threads_used; i++)
00241         {
00242           struct gomp_thread *nthr = pool->threads[i];
00243           nthr->fn = gomp_free_pool_helper;
00244           nthr->data = pool;
00245         }
00246       /* This barrier undocks threads docked on pool->threads_dock.  */
00247       gomp_barrier_wait (&pool->threads_dock);
00248       /* And this waits till all threads have called gomp_barrier_wait_last
00249          in gomp_free_pool_helper.  */
00250       gomp_barrier_wait (&pool->threads_dock);
00251       /* Now it is safe to destroy the barrier and free the pool.  */
00252       gomp_barrier_destroy (&pool->threads_dock);
00253     }
00254       free (pool->threads);
00255       if (pool->last_team)
00256     free_team (pool->last_team);
00257       free (pool);
00258       thr->thread_pool = NULL;
00259     }
00260   if (thr->task != NULL)
00261     {
00262       struct gomp_task *task = thr->task;
00263       gomp_end_task ();
00264       free (task);
00265     }
00266 }
00267 
00268 /* Launch a team.  */
00269 
00270 void
00271 gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
00272          struct gomp_team *team)
00273 {
00274   struct gomp_thread_start_data *start_data;
00275   struct gomp_thread *thr, *nthr;
00276   struct gomp_task *task;
00277   struct gomp_task_icv *icv;
00278   bool nested;
00279   struct gomp_thread_pool *pool;
00280   unsigned i, n, old_threads_used = 0;
00281   pthread_attr_t thread_attr, *attr;
00282 
00283   thr = gomp_thread ();
00284   nested = thr->ts.team != NULL;
00285   if (__builtin_expect (thr->thread_pool == NULL, 0))
00286     {
00287       thr->thread_pool = gomp_new_thread_pool ();
00288       pthread_setspecific (gomp_thread_destructor, thr);
00289     }
00290   pool = thr->thread_pool;
00291   task = thr->task;
00292   icv = task ? &task->icv : &gomp_global_icv;
00293 
00294   /* Always save the previous state, even if this isn't a nested team.
00295      In particular, we should save any work share state from an outer
00296      orphaned work share construct.  */
00297   team->prev_ts = thr->ts;
00298 
00299   thr->ts.team = team;
00300   thr->ts.team_id = 0;
00301   ++thr->ts.level;
00302   if (nthreads > 1)
00303     ++thr->ts.active_level;
00304   thr->ts.work_share = &team->work_shares[0];
00305   thr->ts.last_work_share = NULL;
00306 #ifdef HAVE_SYNC_BUILTINS
00307   thr->ts.single_count = 0;
00308 #endif
00309   thr->ts.static_trip = 0;
00310   thr->task = &team->implicit_task[0];
00311   gomp_init_task (thr->task, task, icv);
00312 
00313   if (nthreads == 1)
00314     return;
00315 
00316   i = 1;
00317 
00318   /* We only allow the reuse of idle threads for non-nested PARALLEL
00319      regions.  This appears to be implied by the semantics of
00320      threadprivate variables, but perhaps that's reading too much into
00321      things.  Certainly it does prevent any locking problems, since
00322      only the initial program thread will modify gomp_threads.  */
00323   if (!nested)
00324     {
00325       old_threads_used = pool->threads_used;
00326 
00327       if (nthreads <= old_threads_used)
00328     n = nthreads;
00329       else if (old_threads_used == 0)
00330     {
00331       n = 0;
00332       gomp_barrier_init (&pool->threads_dock, nthreads);
00333     }
00334       else
00335     {
00336       n = old_threads_used;
00337 
00338       /* Increase the barrier threshold to make sure all new
00339          threads arrive before the team is released.  */
00340       gomp_barrier_reinit (&pool->threads_dock, nthreads);
00341     }
00342 
00343       /* Not true yet, but soon will be.  We're going to release all
00344      threads from the dock, and those that aren't part of the
00345      team will exit.  */
00346       pool->threads_used = nthreads;
00347 
00348       /* Release existing idle threads.  */
00349       for (; i < n; ++i)
00350     {
00351       nthr = pool->threads[i];
00352       nthr->ts.team = team;
00353       nthr->ts.work_share = &team->work_shares[0];
00354       nthr->ts.last_work_share = NULL;
00355       nthr->ts.team_id = i;
00356       nthr->ts.level = team->prev_ts.level + 1;
00357       nthr->ts.active_level = thr->ts.active_level;
00358 #ifdef HAVE_SYNC_BUILTINS
00359       nthr->ts.single_count = 0;
00360 #endif
00361       nthr->ts.static_trip = 0;
00362       nthr->task = &team->implicit_task[i];
00363       gomp_init_task (nthr->task, task, icv);
00364       nthr->fn = fn;
00365       nthr->data = data;
00366       team->ordered_release[i] = &nthr->release;
00367     }
00368 
00369       if (i == nthreads)
00370     goto do_release;
00371 
00372       /* If necessary, expand the size of the gomp_threads array.  It is
00373      expected that changes in the number of threads are rare, thus we
00374      make no effort to expand gomp_threads_size geometrically.  */
00375       if (nthreads >= pool->threads_size)
00376     {
00377       pool->threads_size = nthreads + 1;
00378       pool->threads
00379         = gomp_realloc (pool->threads,
00380                 pool->threads_size
00381                 * sizeof (struct gomp_thread_data *));
00382     }
00383     }
00384 
00385   if (__builtin_expect (nthreads > old_threads_used, 0))
00386     {
00387       long diff = (long) nthreads - (long) old_threads_used;
00388 
00389       if (old_threads_used == 0)
00390     --diff;
00391 
00392 #ifdef HAVE_SYNC_BUILTINS
00393       __sync_fetch_and_add (&gomp_managed_threads, diff);
00394 #else
00395       gomp_mutex_lock (&gomp_remaining_threads_lock);
00396       gomp_managed_threads += diff;
00397       gomp_mutex_unlock (&gomp_remaining_threads_lock);
00398 #endif
00399     }
00400 
00401   attr = &gomp_thread_attr;
00402   if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
00403     {
00404       size_t stacksize;
00405       pthread_attr_init (&thread_attr);
00406       pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
00407       if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
00408     pthread_attr_setstacksize (&thread_attr, stacksize);
00409       attr = &thread_attr;
00410     }
00411 
00412   start_data = gomp_alloca (sizeof (struct gomp_thread_start_data)
00413                 * (nthreads-i));
00414 
00415   /* Launch new threads.  */
00416   for (; i < nthreads; ++i, ++start_data)
00417     {
00418       pthread_t pt;
00419       int err;
00420 
00421       start_data->fn = fn;
00422       start_data->fn_data = data;
00423       start_data->ts.team = team;
00424       start_data->ts.work_share = &team->work_shares[0];
00425       start_data->ts.last_work_share = NULL;
00426       start_data->ts.team_id = i;
00427       start_data->ts.level = team->prev_ts.level + 1;
00428       start_data->ts.active_level = thr->ts.active_level;
00429 #ifdef HAVE_SYNC_BUILTINS
00430       start_data->ts.single_count = 0;
00431 #endif
00432       start_data->ts.static_trip = 0;
00433       start_data->task = &team->implicit_task[i];
00434       gomp_init_task (start_data->task, task, icv);
00435       start_data->thread_pool = pool;
00436       start_data->nested = nested;
00437 
00438       if (gomp_cpu_affinity != NULL)
00439     gomp_init_thread_affinity (attr);
00440 
00441       err = pthread_create (&pt, attr, gomp_thread_start, start_data);
00442       if (err != 0)
00443     gomp_fatal ("Thread creation failed: %s", strerror (err));
00444     }
00445 
00446   if (__builtin_expect (gomp_cpu_affinity != NULL, 0))
00447     pthread_attr_destroy (&thread_attr);
00448 
00449  do_release:
00450   gomp_barrier_wait (nested ? &team->barrier : &pool->threads_dock);
00451 
00452   /* Decrease the barrier threshold to match the number of threads
00453      that should arrive back at the end of this team.  The extra
00454      threads should be exiting.  Note that we arrange for this test
00455      to never be true for nested teams.  */
00456   if (__builtin_expect (nthreads < old_threads_used, 0))
00457     {
00458       long diff = (long) nthreads - (long) old_threads_used;
00459 
00460       gomp_barrier_reinit (&pool->threads_dock, nthreads);
00461 
00462 #ifdef HAVE_SYNC_BUILTINS
00463       __sync_fetch_and_add (&gomp_managed_threads, diff);
00464 #else
00465       gomp_mutex_lock (&gomp_remaining_threads_lock);
00466       gomp_managed_threads += diff;
00467       gomp_mutex_unlock (&gomp_remaining_threads_lock);
00468 #endif
00469     }
00470 }
00471 
00472 
00473 /* Terminate the current team.  This is only to be called by the master
00474    thread.  We assume that we must wait for the other threads.  */
00475 
00476 void
00477 gomp_team_end (void)
00478 {
00479   struct gomp_thread *thr = gomp_thread ();
00480   struct gomp_team *team = thr->ts.team;
00481 
00482   /* This barrier handles all pending explicit threads.  */
00483   gomp_team_barrier_wait (&team->barrier);
00484   gomp_fini_work_share (thr->ts.work_share);
00485 
00486   gomp_end_task ();
00487   thr->ts = team->prev_ts;
00488 
00489   if (__builtin_expect (thr->ts.team != NULL, 0))
00490     {
00491 #ifdef HAVE_SYNC_BUILTINS
00492       __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads);
00493 #else
00494       gomp_mutex_lock (&gomp_remaining_threads_lock);
00495       gomp_managed_threads -= team->nthreads - 1L;
00496       gomp_mutex_unlock (&gomp_remaining_threads_lock);
00497 #endif
00498       /* This barrier has gomp_barrier_wait_last counterparts
00499      and ensures the team can be safely destroyed.  */
00500       gomp_barrier_wait (&team->barrier);
00501     }
00502 
00503   if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0))
00504     {
00505       struct gomp_work_share *ws = team->work_shares[0].next_alloc;
00506       do
00507     {
00508       struct gomp_work_share *next_ws = ws->next_alloc;
00509       free (ws);
00510       ws = next_ws;
00511     }
00512       while (ws != NULL);
00513     }
00514   gomp_sem_destroy (&team->master_release);
00515 #ifndef HAVE_SYNC_BUILTINS
00516   gomp_mutex_destroy (&team->work_share_list_free_lock);
00517 #endif
00518 
00519   if (__builtin_expect (thr->ts.team != NULL, 0)
00520       || __builtin_expect (team->nthreads == 1, 0))
00521     free_team (team);
00522   else
00523     {
00524       struct gomp_thread_pool *pool = thr->thread_pool;
00525       if (pool->last_team)
00526     free_team (pool->last_team);
00527       pool->last_team = team;
00528     }
00529 }
00530 
00531 
00532 /* Constructors for this file.  */
00533 
00534 static void __attribute__((constructor))
00535 initialize_team (void)
00536 {
00537   struct gomp_thread *thr;
00538 
00539 #ifndef HAVE_TLS
00540   static struct gomp_thread initial_thread_tls_data;
00541 
00542   pthread_key_create (&gomp_tls_key, NULL);
00543   pthread_setspecific (gomp_tls_key, &initial_thread_tls_data);
00544 #endif
00545 
00546   if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0)
00547     gomp_fatal ("could not create thread pool destructor.");
00548 
00549 #ifdef HAVE_TLS
00550   thr = &gomp_tls_data;
00551 #else
00552   thr = &initial_thread_tls_data;
00553 #endif
00554   gomp_sem_init (&thr->release, 0);
00555 }
00556 
00557 static void __attribute__((destructor))
00558 team_destructor (void)
00559 {
00560   /* Without this dlclose on libgomp could lead to subsequent
00561      crashes.  */
00562   pthread_key_delete (gomp_thread_destructor);
00563 }
00564 
00565 struct gomp_task_icv *
00566 gomp_new_icv (void)
00567 {
00568   struct gomp_thread *thr = gomp_thread ();
00569   struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task));
00570   gomp_init_task (task, NULL, &gomp_global_icv);
00571   thr->task = task;
00572   pthread_setspecific (gomp_thread_destructor, thr);
00573   return &task->icv;
00574 }

Generated on Fri Apr 5 05:38:10 2013 for Libgomp by  doxygen 1.4.7