kmp.h

Go to the documentation of this file.
00001 
00002 /*
00003  * kmp.h -- KPTS runtime header file.
00004  * $Revision: 42263 $
00005  * $Date: 2013-04-04 11:03:19 -0500 (Thu, 04 Apr 2013) $
00006  */
00007 
00008 /* <copyright>
00009     Copyright (c) 1997-2013 Intel Corporation.  All Rights Reserved.
00010 
00011     Redistribution and use in source and binary forms, with or without
00012     modification, are permitted provided that the following conditions
00013     are met:
00014 
00015       * Redistributions of source code must retain the above copyright
00016         notice, this list of conditions and the following disclaimer.
00017       * Redistributions in binary form must reproduce the above copyright
00018         notice, this list of conditions and the following disclaimer in the
00019         documentation and/or other materials provided with the distribution.
00020       * Neither the name of Intel Corporation nor the names of its
00021         contributors may be used to endorse or promote products derived
00022         from this software without specific prior written permission.
00023 
00024     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00025     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00026     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00027     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00028     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00029     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00030     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00031     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00032     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00033     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00034     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00035 
00036 
00037 ------------------------------------------------------------------------
00038 
00039     Portions of this software are protected under the following patents:
00040         U.S. Patent 5,812,852
00041         U.S. Patent 6,792,599
00042         U.S. Patent 7,069,556
00043         U.S. Patent 7,328,433
00044         U.S. Patent 7,500,242
00045 
00046 </copyright> */
00047 
00048 #ifndef KMP_H
00049 #define KMP_H
00050 
00051 /* #define BUILD_PARALLEL_ORDERED 1 */
00052 
00053 /* This fix replaces gettimeofday with clock_gettime for better scalability on
00054    the Altix.  Requires user code to be linked with -lrt.
00055 */
00056 //#define FIX_SGI_CLOCK
00057 
00058 #if defined( __GNUC__ ) && !defined( __INTEL_COMPILER )
00059 typedef __float128 _Quad;
00060 #endif
00061 
00062 /* Defines for OpenMP 3.0 tasking and auto scheduling */
00063 
00064 #if OMP_30_ENABLED
00065 
00066 # ifndef KMP_STATIC_STEAL_ENABLED
00067 #  define KMP_STATIC_STEAL_ENABLED 1
00068 # endif
00069 
00070 #define TASK_CURRENT_NOT_QUEUED  0
00071 #define TASK_CURRENT_QUEUED      1
00072 
00073 #define TASK_DEQUE_BITS          8  // Used solely to define TASK_DEQUE_SIZE and TASK_DEQUE_MASK.
00074 #define TASK_DEQUE_SIZE          ( 1 << TASK_DEQUE_BITS )
00075 #define TASK_DEQUE_MASK          ( TASK_DEQUE_SIZE - 1 )
00076 
00077 #ifdef BUILD_TIED_TASK_STACK
00078 #define TASK_STACK_EMPTY         0  // entries when the stack is empty
00079 
00080 #define TASK_STACK_BLOCK_BITS    5  // Used to define TASK_STACK_SIZE and TASK_STACK_MASK
00081 #define TASK_STACK_BLOCK_SIZE    ( 1 << TASK_STACK_BLOCK_BITS ) // Number of entries in each task stack array
00082 #define TASK_STACK_INDEX_MASK    ( TASK_STACK_BLOCK_SIZE - 1 )  // Mask for determining index into stack block
00083 #endif // BUILD_TIED_TASK_STACK
00084 
00085 #define TASK_NOT_PUSHED          1
00086 #define TASK_SUCCESSFULLY_PUSHED 0
00087 #define TASK_TIED                1
00088 #define TASK_UNTIED              0
00089 #define TASK_EXPLICIT            1
00090 #define TASK_IMPLICIT            0
00091 
00092 #endif  // OMP_30_ENABLED
00093 
00094 #define KMP_CANCEL_THREADS
00095 #define KMP_THREAD_ATTR
00096 
00097 #include <stdio.h>
00098 #include <stdlib.h>
00099 #include <stddef.h>
00100 #include <stdarg.h>
00101 #include <string.h>
00102 #include <signal.h>
00103 /*  include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad Microsoft library  */
00104 /*  some macros provided below to replace some of these functions  */
00105 #ifndef __ABSOFT_WIN
00106 #include <sys/types.h>
00107 #endif
00108 #include <limits.h>
00109 #include <time.h>
00110 
00111 #include <errno.h>
00112 
00113 #include "kmp_os.h"
00114 #include "kmp_version.h"
00115 #include "kmp_debug.h"
00116 #include "kmp_lock.h"
00117 #include "kmp_i18n.h"
00118 
00119 #define KMP_HANDLE_SIGNALS (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN)
00120 
00121 #ifdef KMP_SETVERSION
00122 /*  from factory/Include, to get VERSION_STRING embedded for 'what'  */
00123 #include "kaiconfig.h"
00124 #include "eye.h"
00125 #include "own.h"
00126 #include "setversion.h"
00127 #endif
00128 
00129 #include "kmp_wrapper_malloc.h"
00130 #if KMP_OS_UNIX
00131 # include <unistd.h>
00132 # if !defined NSIG && defined _NSIG
00133 #  define NSIG _NSIG
00134 # endif
00135 #endif
00136 
00137 #if KMP_OS_LINUX
00138 # pragma weak clock_gettime
00139 #endif
00140 
00141 #if KMP_MIC1
00142 #include <lmmintrin.h>
00143 #endif // KMP_MIC1 AC: no lmmintrin.h in KNC compiler
00144 
00145 #ifndef OMPT_DISABLED
00146 #define OMPT_SUPPORT 1
00147 #include "ompt-internal.h"
00148 #endif
00149 
00150 /*Select data placement in NUMA memory */
00151 #define NO_FIRST_TOUCH 0
00152 #define FIRST_TOUCH 1       /* Exploit SGI's first touch page placement algo */
00153 
00154 /* If not specified on compile command line, assume no first touch */
00155 #ifndef BUILD_MEMORY
00156 #define BUILD_MEMORY NO_FIRST_TOUCH
00157 #endif
00158 
00159 // 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
00160 // 3 - fast allocation using sync, non-sync free lists of any size, non-self free lists of limited size.
00161 #ifndef USE_FAST_MEMORY
00162 #define USE_FAST_MEMORY 3
00163 #endif
00164 
00165 // Assume using BGET compare_exchange instruction instead of lock by default.
00166 #ifndef USE_CMP_XCHG_FOR_BGET
00167 #define USE_CMP_XCHG_FOR_BGET 1
00168 #endif
00169 
00170 // Test to see if queuing lock is better than bootstrap lock for bget
00171 // #ifndef USE_QUEUING_LOCK_FOR_BGET
00172 // #define USE_QUEUING_LOCK_FOR_BGET
00173 // #endif
00174 
00175 #ifndef NSEC_PER_SEC
00176 # define NSEC_PER_SEC 1000000000L
00177 #endif
00178 
00179 #ifndef USEC_PER_SEC
00180 # define USEC_PER_SEC 1000000L
00181 #endif
00182 
00183 // For error messages
00184 #define KMP_IOMP_NAME       "Intel(R) OMP"
00185 
00191 // FIXME DOXYGEN... need to group these flags somehow (Making them an anonymous enum would do it...)
00196 #define KMP_IDENT_IMB             0x01     
00197 
00198 #define KMP_IDENT_KMPC            0x02
00199 /* 0x04 is no longer used */
00201 #define KMP_IDENT_AUTOPAR         0x08     
00202 
00203 #define KMP_IDENT_ATOMIC_REDUCE   0x10   
00204 
00205 #define KMP_IDENT_BARRIER_EXPL    0x20  
00206 
00207 #define KMP_IDENT_BARRIER_IMPL           0x0040
00208 #define KMP_IDENT_BARRIER_IMPL_MASK      0x01C0
00209 #define KMP_IDENT_BARRIER_IMPL_FOR       0x0040
00210 #define KMP_IDENT_BARRIER_IMPL_SECTIONS  0x00C0
00211 
00212 #define KMP_IDENT_BARRIER_IMPL_SINGLE    0x0140
00213 #define KMP_IDENT_BARRIER_IMPL_WORKSHARE 0x01C0
00214 
00218 typedef struct ident {
00219     kmp_int32 reserved_1;   
00220     kmp_int32 flags;        
00221     kmp_int32 reserved_2;   
00222     kmp_int32 reserved_3;   
00223     char     *psource;      
00227 } ident_t;
00232 // Some forward declarations.
00233 
00234 typedef union  kmp_team      kmp_team_t;
00235 typedef struct kmp_taskdata  kmp_taskdata_t;
00236 typedef union  kmp_task_team kmp_task_team_t;
00237 typedef union  kmp_team      kmp_team_p;
00238 typedef union  kmp_info      kmp_info_p;
00239 typedef union  kmp_root      kmp_root_p;
00240 
00241 
00242 #ifdef __cplusplus
00243 extern "C" {
00244 #endif
00245 
00246 /* ------------------------------------------------------------------------ */
00247 /* ------------------------------------------------------------------------ */
00248 
00249 /* Pack two 32-bit signed integers into a 64-bit signed integer */
00250 /* ToDo: Fix word ordering for big-endian machines. */
00251 #define KMP_PACK_64(HIGH_32,LOW_32) \
00252     ( (kmp_int64) ((((kmp_uint64)(HIGH_32))<<32) | (kmp_uint64)(LOW_32)) )
00253 
00254 
00255 /*
00256  * Generic string manipulation macros.
00257  * Assume that _x is of type char *
00258  */
00259 #define SKIP_WS(_x)     { while (*(_x) == ' ' || *(_x) == '\t') (_x)++; }
00260 #define SKIP_DIGITS(_x) { while (*(_x) >= '0' && *(_x) <= '9') (_x)++; }
00261 #define SKIP_TO(_x,_c)  { while (*(_x) != '\0' && *(_x) != (_c)) (_x)++; }
00262 
00263 /* ------------------------------------------------------------------------ */
00264 /* ------------------------------------------------------------------------ */
00265 
00266 /* Enumeration types */
00267 
00268 enum kmp_state_timer {
00269     ts_stop,
00270     ts_start,
00271     ts_pause,
00272 
00273     ts_last_state
00274 };
00275 
00276 enum dynamic_mode {
00277     dynamic_default,
00278 #ifdef USE_LOAD_BALANCE
00279     dynamic_load_balance,
00280 #endif /* USE_LOAD_BALANCE */
00281     dynamic_random,
00282     dynamic_thread_limit,
00283     dynamic_max
00284 };
00285 
00286 /* external schedule constants, duplicate enum omp_sched in omp.h in order to not include it here */
00287 #ifndef KMP_SCHED_TYPE_DEFINED
00288 #define KMP_SCHED_TYPE_DEFINED
00289 typedef enum kmp_sched {
00290     kmp_sched_lower             = 0,     // lower and upper bounds are for routine parameter check
00291     // Note: need to adjust __kmp_sch_map global array in case this enum is changed
00292     kmp_sched_static            = 1,     // mapped to kmp_sch_static_chunked           (33)
00293     kmp_sched_dynamic           = 2,     // mapped to kmp_sch_dynamic_chunked          (35)
00294     kmp_sched_guided            = 3,     // mapped to kmp_sch_guided_chunked           (36)
00295     kmp_sched_auto              = 4,     // mapped to kmp_sch_auto                     (38)
00296     kmp_sched_upper_std         = 5,     // upper bound for standard schedules
00297     kmp_sched_lower_ext         = 100,   // lower bound of Intel extension schedules
00298     kmp_sched_trapezoidal       = 101,   // mapped to kmp_sch_trapezoidal              (39)
00299 //  kmp_sched_static_steal      = 102,   // mapped to kmp_sch_static_steal             (44)
00300     kmp_sched_upper             = 102,
00301     kmp_sched_default = kmp_sched_static // default scheduling
00302 } kmp_sched_t;
00303 #endif
00304 
00309 enum sched_type {
00310     kmp_sch_lower                     = 32,   
00311     kmp_sch_static_chunked            = 33,
00312     kmp_sch_static                    = 34,   
00313     kmp_sch_dynamic_chunked           = 35,
00314     kmp_sch_guided_chunked            = 36,   
00315     kmp_sch_runtime                   = 37,
00316     kmp_sch_auto                      = 38,   
00317     kmp_sch_trapezoidal               = 39,
00318 
00319     /* accessible only through KMP_SCHEDULE environment variable */
00320     kmp_sch_static_greedy             = 40,
00321     kmp_sch_static_balanced           = 41,
00322     /* accessible only through KMP_SCHEDULE environment variable */
00323     kmp_sch_guided_iterative_chunked  = 42,
00324     kmp_sch_guided_analytical_chunked = 43,
00325 
00326     kmp_sch_static_steal              = 44,   
00328     /* accessible only through KMP_SCHEDULE environment variable */
00329     kmp_sch_upper                     = 45,   
00331     kmp_ord_lower                     = 64,   
00332     kmp_ord_static_chunked            = 65,
00333     kmp_ord_static                    = 66,   
00334     kmp_ord_dynamic_chunked           = 67,
00335     kmp_ord_guided_chunked            = 68,
00336     kmp_ord_runtime                   = 69,
00337     kmp_ord_auto                      = 70,   
00338     kmp_ord_trapezoidal               = 71,
00339     kmp_ord_upper                     = 72,   
00341     /*
00342      * For the "nomerge" versions, kmp_dispatch_next*() will always return
00343      * a single iteration/chunk, even if the loop is serialized.  For the
00344      * schedule types listed above, the entire iteration vector is returned
00345      * if the loop is serialized.  This doesn't work for gcc/gcomp sections.
00346      */
00347     kmp_nm_lower                      = 160,  
00349     kmp_nm_static_chunked             = 161,
00350     kmp_nm_static                     = 162,  
00351     kmp_nm_dynamic_chunked            = 163,
00352     kmp_nm_guided_chunked             = 164,  
00353     kmp_nm_runtime                    = 165,
00354     kmp_nm_auto                       = 166,  
00355     kmp_nm_trapezoidal                = 167,
00356 
00357     /* accessible only through KMP_SCHEDULE environment variable */
00358     kmp_nm_static_greedy              = 168,
00359     kmp_nm_static_balanced            = 169,
00360     /* accessible only through KMP_SCHEDULE environment variable */
00361     kmp_nm_guided_iterative_chunked   = 170,
00362     kmp_nm_guided_analytical_chunked  = 171,
00363     kmp_nm_static_steal               = 172,  /* accessible only through OMP_SCHEDULE environment variable */
00364 
00365     kmp_nm_ord_static_chunked         = 193,
00366     kmp_nm_ord_static                 = 194,  
00367     kmp_nm_ord_dynamic_chunked        = 195,
00368     kmp_nm_ord_guided_chunked         = 196,
00369     kmp_nm_ord_runtime                = 197,
00370     kmp_nm_ord_auto                   = 198,  
00371     kmp_nm_ord_trapezoidal            = 199,
00372     kmp_nm_upper                      = 200,  
00374     kmp_sch_default = kmp_sch_static  
00375 };
00376 
00377 /* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
00378 typedef struct kmp_r_sched {
00379     enum sched_type r_sched_type;
00380     int             chunk;
00381 } kmp_r_sched_t;
00382 
00383 extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our internal schedule types
00384 
00385 enum library_type {
00386     library_none,
00387     library_serial,
00388     library_turnaround,
00389     library_throughput
00390 };
00391 
00392 #if KMP_OS_LINUX
00393 enum clock_function_type {
00394     clock_function_gettimeofday,
00395     clock_function_clock_gettime
00396 };
00397 #endif /* KMP_OS_LINUX */
00398 
00399 /* ------------------------------------------------------------------------ */
00400 /* -- fast reduction stuff ------------------------------------------------ */
00401 
00402 #undef KMP_FAST_REDUCTION_BARRIER
00403 #define KMP_FAST_REDUCTION_BARRIER 1
00404 
00405 #undef KMP_FAST_REDUCTION_CORE_DUO
00406 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
00407     #define KMP_FAST_REDUCTION_CORE_DUO 1
00408 #endif
00409 
00410 enum _reduction_method {
00411     reduction_method_not_defined = 0,
00412     critical_reduce_block        = ( 1 << 8 ),
00413     atomic_reduce_block          = ( 2 << 8 ),
00414     tree_reduce_block            = ( 3 << 8 ),
00415     empty_reduce_block           = ( 4 << 8 )
00416 };
00417 
00418 // description of the packed_reduction_method variable
00419 // the packed_reduction_method variable consists of two enum types variables that are packed together into 0-th byte and 1-st byte:
00420 // 0: ( packed_reduction_method & 0x000000FF ) is a 'enum barrier_type' value of barrier that will be used in fast reduction: bs_plain_barrier or bs_reduction_barrier
00421 // 1: ( packed_reduction_method & 0x0000FF00 ) is a reduction method that will be used in fast reduction;
00422 // reduction method is of 'enum _reduction_method' type and it's defined the way so that the bits of 0-th byte are empty,
00423 // so no need to execute a shift instruction while packing/unpacking
00424 
00425 #if KMP_FAST_REDUCTION_BARRIER
00426     #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \
00427             ( ( reduction_method ) | ( barrier_type ) )
00428 
00429     #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
00430             ( ( enum _reduction_method )( ( packed_reduction_method ) & ( 0x0000FF00 ) ) )
00431 
00432     #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \
00433             ( ( enum barrier_type )(      ( packed_reduction_method ) & ( 0x000000FF ) ) )
00434 #else
00435     #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \
00436             ( reduction_method )
00437 
00438     #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \
00439             ( packed_reduction_method )
00440 
00441     #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \
00442             ( bs_plain_barrier )
00443 #endif
00444 
00445 #define TEST_REDUCTION_METHOD(packed_reduction_method,which_reduction_block) \
00446             ( ( UNPACK_REDUCTION_METHOD( packed_reduction_method ) ) == ( which_reduction_block ) )
00447 
00448 #if KMP_FAST_REDUCTION_BARRIER
00449     #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \
00450             ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_reduction_barrier ) )
00451 
00452     #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \
00453             ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_plain_barrier ) )
00454 #endif
00455 
00456 typedef int PACKED_REDUCTION_METHOD_T;
00457 
00458 /* -- end of fast reduction stuff ----------------------------------------- */
00459 
00460 /* ------------------------------------------------------------------------ */
00461 /* ------------------------------------------------------------------------ */
00462 
00463 #if KMP_OS_WINDOWS
00464 # define USE_CBLKDATA
00465 # pragma warning( push )
00466 # pragma warning( disable: 271 310 )
00467 # include <windows.h>
00468 # pragma warning( pop )
00469 #endif
00470 
00471 #if KMP_OS_UNIX
00472 # include <pthread.h>
00473 # include <dlfcn.h>
00474 #endif
00475 
00476 /* ------------------------------------------------------------------------ */
00477 /* ------------------------------------------------------------------------ */
00478 
00479 /*
00480  * Only Linux* OS and Windows* OS support thread affinity.
00481  */
00482 #if KMP_OS_LINUX || KMP_OS_WINDOWS  
00483 
00484 extern size_t __kmp_affin_mask_size;
00485 # define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)
00486 # define KMP_CPU_SETSIZE        (__kmp_affin_mask_size * CHAR_BIT)
00487 
00488 # if KMP_OS_LINUX   
00489 //
00490 // On Linux* OS, the mask isactually a vector of length __kmp_affin_mask_size
00491 // (in bytes).  It should be allocated on a word boundary.
00492 //
00493 // WARNING!!!  We have made the base type of the affinity mask unsigned char,
00494 // in order to eliminate a lot of checks that the true system mask size is
00495 // really a multiple of 4 bytes (on Linux* OS).
00496 //
00497 // THESE MACROS WON'T WORK PROPERLY ON BIG ENDIAN MACHINES!!!
00498 //
00499 
00500 typedef unsigned char kmp_affin_mask_t;
00501 
00502 #  define _KMP_CPU_SET(i,mask)   (mask[i/CHAR_BIT] |= (((kmp_affin_mask_t)1) << (i % CHAR_BIT)))
00503 #  define KMP_CPU_SET(i,mask)    _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask)))
00504 #  define _KMP_CPU_ISSET(i,mask) (!!(mask[i/CHAR_BIT] & (((kmp_affin_mask_t)1) << (i % CHAR_BIT))))
00505 #  define KMP_CPU_ISSET(i,mask)  _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask)))
00506 #  define _KMP_CPU_CLR(i,mask)   (mask[i/CHAR_BIT] &= ~(((kmp_affin_mask_t)1) << (i % CHAR_BIT)))
00507 #  define KMP_CPU_CLR(i,mask)    _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask)))
00508 
00509 #  define KMP_CPU_ZERO(mask) \
00510         {                                                                    \
00511             int __i;                                                         \
00512             for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \
00513                 ((kmp_affin_mask_t *)(mask))[__i] = 0;                       \
00514             }                                                                \
00515         }
00516 
00517 #  define KMP_CPU_COPY(dest, src) \
00518         {                                                                    \
00519             int __i;                                                         \
00520             for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \
00521                 ((kmp_affin_mask_t *)(dest))[__i]                            \
00522                   = ((kmp_affin_mask_t *)(src))[__i];                        \
00523             }                                                                \
00524         }
00525 
00526 #  define KMP_CPU_COMPLEMENT(mask) \
00527         {                                                                    \
00528             int __i;                                                         \
00529             for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \
00530                 ((kmp_affin_mask_t *)(mask))[__i]                            \
00531                   = ~((kmp_affin_mask_t *)(mask))[__i];                      \
00532             }                                                                \
00533         }
00534 
00535 #  define KMP_CPU_UNION(dest, src) \
00536         {                                                                    \
00537             int __i;                                                         \
00538             for (__i = 0; __i < __kmp_affin_mask_size; __i++) {              \
00539                 ((kmp_affin_mask_t *)(dest))[__i]                            \
00540                   |= ((kmp_affin_mask_t *)(src))[__i];                       \
00541             }                                                                \
00542         }
00543 
00544 # endif /* KMP_OS_LINUX */
00545 
00546 # if KMP_OS_WINDOWS
00547 //
00548 // On Windows* OS, the mask size is 4 bytes for IA-32 architecture, and on 
00549 // Intel(R) 64 it is 8 bytes times the number of processor groups.
00550 //
00551 
00552 #  if KMP_ARCH_X86_64
00553 
00554 typedef struct GROUP_AFFINITY {
00555     KAFFINITY mask;
00556     WORD group;
00557     WORD reserved[3];
00558 } GROUP_AFFINITY;
00559 
00560 typedef DWORD_PTR kmp_affin_mask_t;
00561 
00562 extern int __kmp_num_proc_groups;
00563 
00564 #   define _KMP_CPU_SET(i,mask) \
00565         (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] |=                    \
00566         (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))
00567 
00568 #   define KMP_CPU_SET(i,mask) \
00569         _KMP_CPU_SET((i), ((kmp_affin_mask_t *)(mask)))
00570 
00571 #   define _KMP_CPU_ISSET(i,mask) \
00572         (!!(mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] &                  \
00573         (((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t))))))
00574 
00575 #   define KMP_CPU_ISSET(i,mask) \
00576         _KMP_CPU_ISSET((i), ((kmp_affin_mask_t *)(mask)))
00577 
00578 #   define _KMP_CPU_CLR(i,mask) \
00579         (mask[i/(CHAR_BIT * sizeof(kmp_affin_mask_t))] &=                    \
00580         ~(((kmp_affin_mask_t)1) << (i % (CHAR_BIT * sizeof(kmp_affin_mask_t)))))
00581 
00582 #   define KMP_CPU_CLR(i,mask) \
00583         _KMP_CPU_CLR((i), ((kmp_affin_mask_t *)(mask)))
00584 
00585 #   define KMP_CPU_ZERO(mask) \
00586         {                                                                    \
00587             int __i;                                                         \
00588             for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \
00589                 ((kmp_affin_mask_t *)(mask))[__i] = 0;                       \
00590             }                                                                \
00591         }
00592 
00593 #   define KMP_CPU_COPY(dest, src) \
00594         {                                                                    \
00595             int __i;                                                         \
00596             for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \
00597                 ((kmp_affin_mask_t *)(dest))[__i]                            \
00598                   = ((kmp_affin_mask_t *)(src))[__i];                        \
00599             }                                                                \
00600         }
00601 
00602 #   define KMP_CPU_COMPLEMENT(mask) \
00603         {                                                                    \
00604             int __i;                                                         \
00605             for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \
00606                 ((kmp_affin_mask_t *)(mask))[__i]                            \
00607                   = ~((kmp_affin_mask_t *)(mask))[__i];                      \
00608             }                                                                \
00609         }
00610 
00611 #   define KMP_CPU_UNION(dest, src) \
00612         {                                                                    \
00613             int __i;                                                         \
00614             for (__i = 0; __i < __kmp_num_proc_groups; __i++) {              \
00615                 ((kmp_affin_mask_t *)(dest))[__i]                            \
00616                   |= ((kmp_affin_mask_t *)(src))[__i];                       \
00617             }                                                                \
00618         }
00619 
00620 typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);
00621 extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;
00622 
00623 typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);
00624 extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;
00625 
00626 typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);
00627 extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;
00628 
00629 typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *);
00630 extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;
00631 
00632 extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask);
00633 
00634 #  else
00635 
00636 typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
00637 
00638 #   define KMP_CPU_SET(i,mask)      (*(mask) |= (((kmp_affin_mask_t)1) << (i)))
00639 #   define KMP_CPU_ISSET(i,mask)    (!!(*(mask) & (((kmp_affin_mask_t)1) << (i))))
00640 #   define KMP_CPU_CLR(i,mask)      (*(mask) &= ~(((kmp_affin_mask_t)1) << (i)))
00641 #   define KMP_CPU_ZERO(mask)       (*(mask) = 0)
00642 #   define KMP_CPU_COPY(dest, src)  (*(dest) = *(src))
00643 #   define KMP_CPU_COMPLEMENT(mask) (*(mask) = ~*(mask))
00644 #   define KMP_CPU_UNION(dest, src) (*(dest) |= *(src))
00645 
00646 #  endif /* KMP_ARCH_X86 */
00647 
00648 # endif /* KMP_OS_WINDOWS */
00649 
00650 //
00651 // __kmp_allocate() will return memory allocated on a 4-bytes boundary.
00652 // after zeroing it - it takes care of those assumptions stated above.
00653 //
00654 # define KMP_CPU_ALLOC(ptr) \
00655         (ptr = ((kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size)))
00656 # define KMP_CPU_FREE(ptr) __kmp_free(ptr)
00657 
00658 //
00659 // The following macro should be used to index an array of masks.
00660 // The array should be declared as "kmp_affinity_t *" and allocated with
00661 // size "__kmp_affinity_mask_size * len".  The macro takes care of the fact
00662 // that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but
00663 // on Linux* OS, sizeof(kmp_affin_t) is 1.
00664 //
00665 # define KMP_CPU_INDEX(array,i) \
00666         ((kmp_affin_mask_t *)(((char *)(array)) + (i) * __kmp_affin_mask_size))
00667 
00668 //
00669 // Declare local char buffers with this size for printing debug and info
00670 // messages, using __kmp_affinity_print_mask().
00671 //
00672 #define KMP_AFFIN_MASK_PRINT_LEN        1024
00673 
00674 enum affinity_type {
00675     affinity_none = 0,
00676     affinity_physical,
00677     affinity_logical,
00678     affinity_compact,
00679     affinity_scatter,
00680     affinity_explicit,
00681 #if KMP_MIC
00682     affinity_balanced,
00683 #endif
00684     affinity_disabled,  // not used outsize the env var parser
00685     affinity_default
00686 };
00687 
00688 enum affinity_gran {
00689     affinity_gran_fine = 0,
00690     affinity_gran_thread,
00691     affinity_gran_core,
00692     affinity_gran_package,
00693     affinity_gran_node,
00694 #if KMP_OS_WINDOWS && KMP_ARCH_X86_64
00695     //
00696     // The "group" granularity isn't necesssarily coarser than all of the
00697     // other levels, but we put it last in the enum.
00698     //
00699     affinity_gran_group,
00700 #endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
00701     affinity_gran_default
00702 };
00703 
00704 enum affinity_top_method {
00705     affinity_top_method_all = 0, // try all (supported) methods, in order
00706 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
00707     affinity_top_method_apicid,
00708     affinity_top_method_x2apicid,
00709 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
00710     affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too
00711 #if KMP_OS_WINDOWS && KMP_ARCH_X86_64
00712     affinity_top_method_group,
00713 #endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
00714     affinity_top_method_flat,
00715     affinity_top_method_default
00716 };
00717 
00718 #define affinity_respect_mask_default   (-1)
00719 
00720 extern enum affinity_type __kmp_affinity_type; /* Affinity type */
00721 extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */
00722 extern int __kmp_affinity_gran_levels; /* corresponding int value */
00723 extern int __kmp_affinity_dups; /* Affinity duplicate masks */
00724 extern enum affinity_top_method __kmp_affinity_top_method;
00725 extern int __kmp_affinity_compact; /* Affinity 'compact' value */
00726 extern int __kmp_affinity_offset; /* Affinity offset value  */
00727 extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */
00728 extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */
00729 extern int __kmp_affinity_respect_mask; /* Respect process' initial affinity mask? */
00730 extern char * __kmp_affinity_proclist; /* proc ID list */
00731 extern kmp_affin_mask_t *__kmp_affinity_masks;
00732 extern unsigned __kmp_affinity_num_masks;
00733 extern int __kmp_get_system_affinity(kmp_affin_mask_t *mask, int abort_on_error);
00734 extern int __kmp_set_system_affinity(kmp_affin_mask_t const *mask, int abort_on_error);
00735 extern void __kmp_affinity_bind_thread(int which);
00736 
00737 # if KMP_OS_LINUX
00738 extern kmp_affin_mask_t *__kmp_affinity_get_fullMask();
00739 # endif /* KMP_OS_LINUX */
00740 extern char const * __kmp_cpuinfo_file;
00741 
00742 #elif KMP_OS_DARWIN
00743     // affinity not supported
00744 #else
00745     #error "Unknown or unsupported OS"
00746 #endif /* KMP_OS_LINUX || KMP_OS_WINDOWS */
00747 
00748 #if OMP_40_ENABLED
00749 
00750 //
00751 // This needs to be kept in sync with the values in omp.h !!!
00752 //
00753 typedef enum kmp_proc_bind_t {
00754     proc_bind_false = 0,
00755     proc_bind_true,
00756     proc_bind_master,
00757     proc_bind_close,
00758     proc_bind_spread,
00759     proc_bind_disabled,
00760     proc_bind_intel,    // use KMP_AFFINITY interface
00761     proc_bind_default
00762 } kmp_proc_bind_t;
00763 
00764 typedef struct kmp_nested_proc_bind_t {
00765     kmp_proc_bind_t *bind_types;
00766     int size;
00767     int used;
00768 } kmp_nested_proc_bind_t;
00769 
00770 extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
00771 
00772 # if (KMP_OS_WINDOWS || KMP_OS_LINUX)
00773 #  define KMP_PLACE_ALL       (-1)
00774 #  define KMP_PLACE_UNDEFINED (-2)
00775 # endif /* (KMP_OS_WINDOWS || KMP_OS_LINUX) */
00776 
00777 extern int __kmp_affinity_num_places;
00778 
00779 #endif /* OMP_40_ENABLED */
00780 
00781 #if KMP_MIC
00782 extern unsigned int __kmp_place_num_cores;
00783 extern unsigned int __kmp_place_num_threads_per_core;
00784 extern unsigned int __kmp_place_core_offset;
00785 #endif
00786 
00787 /* ------------------------------------------------------------------------ */
00788 /* ------------------------------------------------------------------------ */
00789 
00790 #define KMP_PAD(type, sz)     (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))
00791 
00792 //
00793 // We need to avoid using -1 as a GTID as +1 is added to the gtid
00794 // when storing it in a lock, and the value 0 is reserved.
00795 //
00796 #define KMP_GTID_DNE            (-2)    /* Does not exist */
00797 #define KMP_GTID_SHUTDOWN       (-3)    /* Library is shutting down */
00798 #define KMP_GTID_MONITOR        (-4)    /* Monitor thread ID */
00799 #define KMP_GTID_UNKNOWN        (-5)    /* Is not known */
00800 #define KMP_GTID_MIN            (-6)    /* Minimal gtid for low bound check in DEBUG */
00801 
00802 #define __kmp_get_gtid()               __kmp_get_global_thread_id()
00803 #define __kmp_entry_gtid()             __kmp_get_global_thread_id_reg()
00804 
00805 #define __kmp_tid_from_gtid(gtid)     ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \
00806                                         /*(__kmp_threads[ (gtid) ]->th.th_team_serialized) ? 0 : /* TODO remove this check, it is redundant */ \
00807                                         __kmp_threads[ (gtid) ]->th.th_info.ds.ds_tid )
00808 
00809 #define __kmp_get_tid()               ( __kmp_tid_from_gtid( __kmp_get_gtid() ) )
00810 #define __kmp_gtid_from_tid(tid,team) ( KMP_DEBUG_ASSERT( (tid) >= 0 && (team) != NULL ), \
00811                                         team -> t.t_threads[ (tid) ] -> th.th_info .ds.ds_gtid )
00812 
00813 #define __kmp_get_team()              ( __kmp_threads[ (__kmp_get_gtid()) ]-> th.th_team )
00814 #define __kmp_team_from_gtid(gtid)    ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \
00815                                         __kmp_threads[ (gtid) ]-> th.th_team )
00816 
00817 #define __kmp_thread_from_gtid(gtid)  ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), __kmp_threads[ (gtid) ] )
00818 #define __kmp_get_thread()            ( __kmp_thread_from_gtid( __kmp_get_gtid() ) )
00819 
00820     // Returns current thread (pointer to kmp_info_t). In contrast to __kmp_get_thread(), it works
00821     // with registered and not-yet-registered threads.
00822 #define __kmp_gtid_from_thread(thr)   ( KMP_DEBUG_ASSERT( (thr) != NULL ), \
00823                                         (thr)->th.th_info.ds.ds_gtid )
00824 
00825 // AT: Which way is correct?
00826 // AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;
00827 // AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;
00828 #define __kmp_get_team_num_threads(gtid) ( __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc )
00829 
00830 
00831 /* ------------------------------------------------------------------------ */
00832 /* ------------------------------------------------------------------------ */
00833 
00834 #define KMP_UINT64_MAX         (~((kmp_uint64)1<<((sizeof(kmp_uint64)*(1<<3))-1)))
00835 
00836 #define KMP_MIN_NTH           1
00837 
00838 #ifndef KMP_MAX_NTH
00839 #  ifdef PTHREAD_THREADS_MAX
00840 #    define KMP_MAX_NTH          PTHREAD_THREADS_MAX
00841 #  else
00842 #    define KMP_MAX_NTH          (32 * 1024)
00843 #  endif
00844 #endif /* KMP_MAX_NTH */
00845 
00846 #ifdef PTHREAD_STACK_MIN
00847 # define KMP_MIN_STKSIZE         PTHREAD_STACK_MIN
00848 #else
00849 # define KMP_MIN_STKSIZE         ((size_t)(32 * 1024))
00850 #endif
00851 
00852 #define KMP_MAX_STKSIZE          (~((size_t)1<<((sizeof(size_t)*(1<<3))-1)))
00853 
00854 #if KMP_ARCH_X86
00855 # define KMP_DEFAULT_STKSIZE     ((size_t)(2 * 1024 * 1024))
00856 #elif KMP_ARCH_X86_64
00857 # define KMP_DEFAULT_STKSIZE     ((size_t)(4 * 1024 * 1024))
00858 # define KMP_BACKUP_STKSIZE      ((size_t)(2 * 1024 * 1024))
00859 #else
00860 # define KMP_DEFAULT_STKSIZE     ((size_t)(1024 * 1024))
00861 #endif
00862 
00863 #define KMP_DEFAULT_MONITOR_STKSIZE     ((size_t)(64 * 1024))
00864 
00865 #define KMP_DEFAULT_MALLOC_POOL_INCR    ((size_t) (1024 * 1024))
00866 #define KMP_MIN_MALLOC_POOL_INCR        ((size_t) (4 * 1024))
00867 #define KMP_MAX_MALLOC_POOL_INCR        (~((size_t)1<<((sizeof(size_t)*(1<<3))-1)))
00868 
00869 #define KMP_MIN_STKOFFSET       (0)
00870 #define KMP_MAX_STKOFFSET       KMP_MAX_STKSIZE
00871 #define KMP_DEFAULT_STKOFFSET   KMP_MIN_STKOFFSET
00872 
00873 #define KMP_MIN_MONITOR_WAKEUPS      (1)       /* min number of times monitor wakes up per second */
00874 #define KMP_MAX_MONITOR_WAKEUPS      (1000)    /* maximum number of times monitor can wake up per second */
00875 #define KMP_BLOCKTIME_MULTIPLIER     (1000)    /* number of blocktime units per second */
00876 #define KMP_MIN_BLOCKTIME            (0)
00877 #define KMP_MAX_BLOCKTIME            (INT_MAX) /* Must be this for "infinite" setting the work */
00878 #define KMP_DEFAULT_BLOCKTIME        (200)     /*  __kmp_blocktime is in milliseconds  */
00879 /* Calculate new number of monitor wakeups for a specific block time based on previous monitor_wakeups */
00880 /* Only allow increasing number of wakeups */
00881 #define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \
00882                                  ( ((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) : \
00883                                    ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS : \
00884                                    ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) ? (monitor_wakeups) : \
00885                                        (KMP_BLOCKTIME_MULTIPLIER) / (blocktime) )
00886 
00887 /* Calculate number of intervals for a specific block time based on monitor_wakeups */
00888 #define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups)  \
00889                                  ( ( (blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1 ) /  \
00890                                    (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) )
00891 
00892 #define KMP_MIN_STATSCOLS       40
00893 #define KMP_MAX_STATSCOLS       4096
00894 #define KMP_DEFAULT_STATSCOLS   80
00895 
00896 #define KMP_MIN_INTERVAL        0
00897 #define KMP_MAX_INTERVAL        (INT_MAX-1)
00898 #define KMP_DEFAULT_INTERVAL    0
00899 
00900 #define KMP_MIN_CHUNK           1
00901 #define KMP_MAX_CHUNK           (INT_MAX-1)
00902 #define KMP_DEFAULT_CHUNK       1
00903 
00904 #define KMP_MIN_INIT_WAIT       1
00905 #define KMP_MAX_INIT_WAIT       (INT_MAX/2)
00906 #define KMP_DEFAULT_INIT_WAIT   2048U
00907 
00908 #define KMP_MIN_NEXT_WAIT       1
00909 #define KMP_MAX_NEXT_WAIT       (INT_MAX/2)
00910 #define KMP_DEFAULT_NEXT_WAIT   1024U
00911 
00912 // max possible dynamic loops in concurrent execution per team
00913 #define KMP_MAX_DISP_BUF        7
00914 #define KMP_MAX_ORDERED         8
00915 
00916 #define KMP_MAX_FIELDS          32
00917 
00918 #define KMP_MAX_BRANCH_BITS     31
00919 
00920 #define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX
00921 
00922 /* Minimum number of threads before switch to TLS gtid (experimentally determined) */
00923 /* josh TODO: what about OS X* tuning? */
00924 #if   KMP_ARCH_X86 || KMP_ARCH_X86_64
00925 # define KMP_TLS_GTID_MIN     5
00926 #else
00927 # define KMP_TLS_GTID_MIN     INT_MAX
00928 #endif
00929 
00930 #define KMP_MASTER_TID(tid)      ( (tid) == 0 )
00931 #define KMP_WORKER_TID(tid)      ( (tid) != 0 )
00932 
00933 #define KMP_MASTER_GTID(gtid)    ( __kmp_tid_from_gtid((gtid)) == 0 )
00934 #define KMP_WORKER_GTID(gtid)    ( __kmp_tid_from_gtid((gtid)) != 0 )
00935 #define KMP_UBER_GTID(gtid)                                           \
00936     (                                                                 \
00937         KMP_DEBUG_ASSERT( (gtid) >= KMP_GTID_MIN ),                   \
00938         KMP_DEBUG_ASSERT( (gtid) < __kmp_threads_capacity ),          \
00939         (gtid) >= 0 && __kmp_root[(gtid)] && __kmp_threads[(gtid)] && \
00940         (__kmp_threads[(gtid)] == __kmp_root[(gtid)]->r.r_uber_thread)\
00941     )
00942 #define KMP_INITIAL_GTID(gtid)   ( (gtid) == 0 )
00943 
00944 #ifndef TRUE
00945 #define FALSE   0
00946 #define TRUE    (! FALSE)
00947 #endif
00948 
00949 /* NOTE: all of the following constants must be even */
00950 
00951 #if KMP_OS_WINDOWS
00952 #  define KMP_INIT_WAIT    64U          /* initial number of spin-tests   */
00953 #  define KMP_NEXT_WAIT    32U          /* susequent number of spin-tests */
00954 #elif KMP_OS_LINUX
00955 #  define KMP_INIT_WAIT  1024U          /* initial number of spin-tests   */
00956 #  define KMP_NEXT_WAIT   512U          /* susequent number of spin-tests */
00957 #elif KMP_OS_DARWIN
00958 /* TODO: tune for KMP_OS_DARWIN */
00959 #  define KMP_INIT_WAIT  1024U          /* initial number of spin-tests   */
00960 #  define KMP_NEXT_WAIT   512U          /* susequent number of spin-tests */
00961 #endif
00962 
00963 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
00964 struct kmp_cpuid {
00965     kmp_uint32  eax;
00966     kmp_uint32  ebx;
00967     kmp_uint32  ecx;
00968     kmp_uint32  edx;
00969 };
00970 extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );
00971 # if KMP_MIC
00972   static void __kmp_x86_pause( void ) { _mm_delay_32( 100 ); };
00973 # else
00974   extern void __kmp_x86_pause( void );
00975 # endif
00976 # define KMP_CPU_PAUSE()        __kmp_x86_pause()
00977 #else
00978 # define KMP_CPU_PAUSE()        /* nothing to do */
00979 #endif
00980 
00981 #define KMP_INIT_YIELD(count)           { (count) = __kmp_yield_init; }
00982 
00983 #define KMP_YIELD(cond)                 { KMP_CPU_PAUSE(); __kmp_static_yield( (cond) ); }
00984 
00985 // Note the decrement of 2 in the following Macros.  With KMP_LIBRARY=turnaround,
00986 // there should be no yielding since the starting value from KMP_INIT_YIELD() is odd.
00987 
00988 #define KMP_YIELD_WHEN(cond,count)      { KMP_CPU_PAUSE(); (count) -= 2; \
00989                                                 if (!(count)) { KMP_YIELD(cond); (count) = __kmp_yield_next; } }
00990 #define KMP_YIELD_SPIN(count)           { KMP_CPU_PAUSE(); (count) -=2; \
00991                                                 if (!(count)) { KMP_YIELD(1); (count) = __kmp_yield_next; } }
00992 
00993 /* ------------------------------------------------------------------------ */
00994 /* Support datatypes for the orphaned construct nesting checks.             */
00995 /* ------------------------------------------------------------------------ */
00996 
00997 enum cons_type {
00998     ct_none,
00999     ct_parallel,
01000     ct_pdo,
01001     ct_pdo_ordered,
01002     ct_psections,
01003     ct_psingle,
01004 
01005     /* the following must be left in order and not split up */
01006     ct_taskq,
01007     ct_task,                    /* really task inside non-ordered taskq, considered a worksharing type */
01008     ct_task_ordered,            /* really task inside ordered taskq, considered a worksharing type */
01009     /* the preceding must be left in order and not split up */
01010 
01011     ct_critical,
01012     ct_ordered_in_parallel,
01013     ct_ordered_in_pdo,
01014     ct_ordered_in_taskq,
01015     ct_master,
01016     ct_reduce,
01017     ct_barrier
01018 };
01019 
01020 /* test to see if we are in a taskq construct */
01021 # define IS_CONS_TYPE_TASKQ( ct )       ( ((int)(ct)) >= ((int)ct_taskq) && ((int)(ct)) <= ((int)ct_task_ordered) )
01022 # define IS_CONS_TYPE_ORDERED( ct )     ((ct) == ct_pdo_ordered || (ct) == ct_task_ordered)
01023 
01024 struct cons_data {
01025     ident_t const     *ident;
01026     enum cons_type     type;
01027     int                prev;
01028     kmp_user_lock_p    name;    /* address exclusively for critical section name comparison */
01029 };
01030 
01031 struct cons_header {
01032     int                 p_top, w_top, s_top;
01033     int                 stack_size, stack_top;
01034     struct cons_data   *stack_data;
01035 };
01036 
01037 struct kmp_region_info {
01038     char                *text;
01039     int                 offset[KMP_MAX_FIELDS];
01040     int                 length[KMP_MAX_FIELDS];
01041 };
01042 
01043 
01044 /* ---------------------------------------------------------------------- */
01045 /* ---------------------------------------------------------------------- */
01046 
01047 #if KMP_OS_WINDOWS
01048     typedef HANDLE              kmp_thread_t;
01049     typedef DWORD               kmp_key_t;
01050 #endif /* KMP_OS_WINDOWS */
01051 
01052 #if KMP_OS_UNIX
01053     typedef pthread_t           kmp_thread_t;
01054     typedef pthread_key_t       kmp_key_t;
01055 #endif
01056 
01057 extern kmp_key_t  __kmp_gtid_threadprivate_key;
01058 
01059 typedef struct kmp_sys_info {
01060     long maxrss;          /* the maximum resident set size utilized (in kilobytes)     */
01061     long minflt;          /* the number of page faults serviced without any I/O        */
01062     long majflt;          /* the number of page faults serviced that required I/O      */
01063     long nswap;           /* the number of times a process was "swapped" out of memory */
01064     long inblock;         /* the number of times the file system had to perform input  */
01065     long oublock;         /* the number of times the file system had to perform output */
01066     long nvcsw;           /* the number of times a context switch was voluntarily      */
01067     long nivcsw;          /* the number of times a context switch was forced           */
01068 } kmp_sys_info_t;
01069 
01070 typedef struct kmp_cpuinfo {
01071     int        initialized;  // If 0, other fields are not initialized.
01072     int        signature;    // CPUID(1).EAX
01073     int        family;       // CPUID(1).EAX[27:20] + CPUID(1).EAX[11:8] ( Extended Family + Family )
01074     int        model;        // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended Model << 4 ) + Model)
01075     int        stepping;     // CPUID(1).EAX[3:0] ( Stepping )
01076     int        sse2;         // 0 if SSE2 instructions are not supported, 1 otherwise.
01077 
01078     int        cpu_stackoffset;
01079     int        apic_id;
01080     int        physical_id;
01081     int        logical_id;
01082     kmp_uint64 frequency;    // Nominal CPU frequency in Hz.
01083 } kmp_cpuinfo_t;
01084 
01085 
01086 #ifdef BUILD_TV
01087 
01088 struct tv_threadprivate {
01089     /* Record type #1 */
01090     void        *global_addr;
01091     void        *thread_addr;
01092 };
01093 
01094 struct tv_data {
01095     struct tv_data      *next;
01096     void                *type;
01097     union tv_union {
01098         struct tv_threadprivate tp;
01099     } u;
01100 };
01101 
01102 extern kmp_key_t __kmp_tv_key;
01103 
01104 #endif /* BUILD_TV */
01105 
01106 /* ------------------------------------------------------------------------ */
01107 // Some forward declarations.
01108 
01109 typedef union  kmp_team      kmp_team_t;
01110 typedef struct kmp_taskdata  kmp_taskdata_t;
01111 typedef union  kmp_task_team kmp_task_team_t;
01112 typedef union  kmp_team      kmp_team_p;
01113 typedef union  kmp_info      kmp_info_p;
01114 typedef union  kmp_root      kmp_root_p;
01115 
01116 
01117 /* ------------------------------------------------------------------------ */
01118 
01119 /*
01120  * Taskq data structures
01121  */
01122 
01123 #define HIGH_WATER_MARK(nslots)         (((nslots) * 3) / 4)
01124 #define __KMP_TASKQ_THUNKS_PER_TH        1      /* num thunks that each thread can simultaneously execute from a task queue */
01125 
01126 /*  flags for taskq_global_flags, kmp_task_queue_t tq_flags, kmpc_thunk_t th_flags  */
01127 
01128 #define TQF_IS_ORDERED          0x0001  /*  __kmpc_taskq interface, taskq ordered  */
01129 #define TQF_IS_LASTPRIVATE      0x0002  /*  __kmpc_taskq interface, taskq with lastprivate list  */
01130 #define TQF_IS_NOWAIT           0x0004  /*  __kmpc_taskq interface, end taskq nowait  */
01131 #define TQF_HEURISTICS          0x0008  /*  __kmpc_taskq interface, use heuristics to decide task queue size  */
01132 #define TQF_INTERFACE_RESERVED1 0x0010  /*  __kmpc_taskq interface, reserved for future use  */
01133 #define TQF_INTERFACE_RESERVED2 0x0020  /*  __kmpc_taskq interface, reserved for future use  */
01134 #define TQF_INTERFACE_RESERVED3 0x0040  /*  __kmpc_taskq interface, reserved for future use  */
01135 #define TQF_INTERFACE_RESERVED4 0x0080  /*  __kmpc_taskq interface, reserved for future use  */
01136 
01137 #define TQF_INTERFACE_FLAGS     0x00ff  /*  all the __kmpc_taskq interface flags  */
01138 
01139 #define TQF_IS_LAST_TASK        0x0100  /*  internal/read by instrumentation; only used with TQF_IS_LASTPRIVATE  */
01140 #define TQF_TASKQ_TASK          0x0200  /*  internal use only; this thunk->th_task is the taskq_task  */
01141 #define TQF_RELEASE_WORKERS     0x0400  /*  internal use only; must release worker threads once ANY queued task exists (global) */
01142 #define TQF_ALL_TASKS_QUEUED    0x0800  /*  internal use only; notify workers that master has finished enqueuing tasks */
01143 #define TQF_PARALLEL_CONTEXT    0x1000  /*  internal use only: this queue encountered in a parallel context: not serialized */
01144 #define TQF_DEALLOCATED         0x2000  /*  internal use only; this queue is on the freelist and not in use */
01145 
01146 #define TQF_INTERNAL_FLAGS      0x3f00  /*  all the internal use only flags  */
01147 
01148 typedef struct KMP_ALIGN_CACHE kmpc_aligned_int32_t {
01149     kmp_int32                      ai_data;
01150 } kmpc_aligned_int32_t;
01151 
01152 typedef struct KMP_ALIGN_CACHE kmpc_aligned_queue_slot_t {
01153     struct kmpc_thunk_t   *qs_thunk;
01154 } kmpc_aligned_queue_slot_t;
01155 
01156 typedef struct kmpc_task_queue_t {
01157         /* task queue linkage fields for n-ary tree of queues (locked with global taskq_tree_lck) */
01158     kmp_lock_t                    tq_link_lck;          /*  lock for child link, child next/prev links and child ref counts */
01159     union {
01160         struct kmpc_task_queue_t *tq_parent;            /*  pointer to parent taskq, not locked */
01161         struct kmpc_task_queue_t *tq_next_free;         /*  for taskq internal freelists, locked with global taskq_freelist_lck */
01162     } tq;
01163     volatile struct kmpc_task_queue_t *tq_first_child;  /*  pointer to linked-list of children, locked by tq's tq_link_lck */
01164     struct kmpc_task_queue_t     *tq_next_child;        /*  next child in linked-list, locked by parent tq's tq_link_lck */
01165     struct kmpc_task_queue_t     *tq_prev_child;        /*  previous child in linked-list, locked by parent tq's tq_link_lck */
01166     volatile kmp_int32            tq_ref_count;         /*  reference count of threads with access to this task queue */
01167                                                         /*  (other than the thread executing the kmpc_end_taskq call) */
01168                                                         /*  locked by parent tq's tq_link_lck */
01169 
01170         /* shared data for task queue */
01171     struct kmpc_aligned_shared_vars_t    *tq_shareds;   /*  per-thread array of pointers to shared variable structures */
01172                                                         /*  only one array element exists for all but outermost taskq */
01173 
01174         /* bookkeeping for ordered task queue */
01175     kmp_uint32                    tq_tasknum_queuing;   /*  ordered task number assigned while queuing tasks */
01176     volatile kmp_uint32           tq_tasknum_serving;   /*  ordered number of next task to be served (executed) */
01177 
01178         /* thunk storage management for task queue */
01179     kmp_lock_t                    tq_free_thunks_lck;   /*  lock for thunk freelist manipulation */
01180     struct kmpc_thunk_t          *tq_free_thunks;       /*  thunk freelist, chained via th.th_next_free  */
01181     struct kmpc_thunk_t          *tq_thunk_space;       /*  space allocated for thunks for this task queue  */
01182 
01183         /* data fields for queue itself */
01184     kmp_lock_t                    tq_queue_lck;         /*  lock for [de]enqueue operations: tq_queue, tq_head, tq_tail, tq_nfull */
01185     kmpc_aligned_queue_slot_t    *tq_queue;             /*  array of queue slots to hold thunks for tasks */
01186     volatile struct kmpc_thunk_t *tq_taskq_slot;        /*  special slot for taskq task thunk, occupied if not NULL  */
01187     kmp_int32                     tq_nslots;            /*  # of tq_thunk_space thunks alloc'd (not incl. tq_taskq_slot space)  */
01188     kmp_int32                     tq_head;              /*  enqueue puts next item in here (index into tq_queue array) */
01189     kmp_int32                     tq_tail;              /*  dequeue takes next item out of here (index into tq_queue array) */
01190     volatile kmp_int32            tq_nfull;             /*  # of occupied entries in task queue right now  */
01191     kmp_int32                     tq_hiwat;             /*  high-water mark for tq_nfull and queue scheduling  */
01192     volatile kmp_int32            tq_flags;             /*  TQF_xxx  */
01193 
01194         /* bookkeeping for oustanding thunks */
01195     struct kmpc_aligned_int32_t  *tq_th_thunks;         /*  per-thread array for # of regular thunks currently being executed */
01196     kmp_int32                     tq_nproc;             /*  number of thunks in the th_thunks array */
01197 
01198         /* statistics library bookkeeping */
01199     ident_t                       *tq_loc;              /*  source location information for taskq directive */
01200 } kmpc_task_queue_t;
01201 
01202 typedef void (*kmpc_task_t) (kmp_int32 global_tid, struct kmpc_thunk_t *thunk);
01203 
01204 /*  sizeof_shareds passed as arg to __kmpc_taskq call  */
01205 typedef struct kmpc_shared_vars_t {             /*  aligned during dynamic allocation */
01206     kmpc_task_queue_t         *sv_queue;
01207     /*  (pointers to) shared vars  */
01208 } kmpc_shared_vars_t;
01209 
01210 typedef struct KMP_ALIGN_CACHE kmpc_aligned_shared_vars_t {
01211     volatile struct kmpc_shared_vars_t     *ai_data;
01212 } kmpc_aligned_shared_vars_t;
01213 
01214 /*  sizeof_thunk passed as arg to kmpc_taskq call  */
01215 typedef struct kmpc_thunk_t {                   /*  aligned during dynamic allocation */
01216     union {                                     /*  field used for internal freelists too  */
01217         kmpc_shared_vars_t  *th_shareds;
01218         struct kmpc_thunk_t *th_next_free;      /*  freelist of individual thunks within queue, head at tq_free_thunks  */
01219     } th;
01220     kmpc_task_t th_task;                        /*  taskq_task if flags & TQF_TASKQ_TASK  */
01221     struct kmpc_thunk_t *th_encl_thunk;         /*  pointer to dynamically enclosing thunk on this thread's call stack */
01222     kmp_int32 th_flags;                         /*  TQF_xxx (tq_flags interface plus possible internal flags)  */
01223     kmp_int32 th_status;
01224     kmp_uint32 th_tasknum;                      /*  task number assigned in order of queuing, used for ordered sections */
01225     /*  private vars  */
01226 } kmpc_thunk_t;
01227 
01228 typedef struct KMP_ALIGN_CACHE kmp_taskq {
01229     int                 tq_curr_thunk_capacity;
01230 
01231     kmpc_task_queue_t  *tq_root;
01232     kmp_int32           tq_global_flags;
01233 
01234     kmp_lock_t          tq_freelist_lck;
01235     kmpc_task_queue_t  *tq_freelist;
01236 
01237     kmpc_thunk_t      **tq_curr_thunk;
01238 } kmp_taskq_t;
01239 
01240 /* END Taskq data structures */
01241 /* --------------------------------------------------------------------------- */
01242 
01243 typedef kmp_int32 kmp_critical_name[8];
01244 
01253 typedef void (*kmpc_micro)              ( kmp_int32 * global_tid, kmp_int32 * bound_tid, ... );
01254 typedef void (*kmpc_micro_bound)        ( kmp_int32 * bound_tid, kmp_int32 * bound_nth, ... );
01255 
01260 /* --------------------------------------------------------------------------- */
01261 /* Threadprivate initialization/finalization function declarations */
01262 
01263 /*  for non-array objects:  __kmpc_threadprivate_register()  */
01264 
01269 typedef void *(*kmpc_ctor)    (void *);
01270 
01275 typedef void (*kmpc_dtor)     (void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel compiler */
01280 typedef void *(*kmpc_cctor)   (void *, void *); 
01281 
01282 /*  for array objects: __kmpc_threadprivate_register_vec()  */
01283                                 /* First arg: "this" pointer */
01284                                 /* Last arg: number of array elements */
01290 typedef void *(*kmpc_ctor_vec)  (void *, size_t);
01296 typedef void (*kmpc_dtor_vec)   (void *, size_t);
01302 typedef void *(*kmpc_cctor_vec) (void *, void *, size_t); /* function unused by compiler */
01303 
01309 /* ------------------------------------------------------------------------ */
01310 
01311 /* keeps tracked of threadprivate cache allocations for cleanup later */
01312 typedef struct kmp_cached_addr {
01313     void                      **addr;           /* address of allocated cache */
01314     struct kmp_cached_addr     *next;           /* pointer to next cached address */
01315 } kmp_cached_addr_t;
01316 
01317 struct private_data {
01318     struct private_data *next;          /* The next descriptor in the list      */
01319     void                *data;          /* The data buffer for this descriptor  */
01320     int                  more;          /* The repeat count for this descriptor */
01321     size_t               size;          /* The data size for this descriptor    */
01322 };
01323 
01324 struct private_common {
01325     struct private_common     *next;
01326     struct private_common     *link;
01327     void                      *gbl_addr;
01328     void                      *par_addr;        /* par_addr == gbl_addr for MASTER thread */
01329     size_t                     cmn_size;
01330 };
01331 
01332 struct shared_common
01333 {
01334     struct shared_common      *next;
01335     struct private_data       *pod_init;
01336     void                      *obj_init;
01337     void                      *gbl_addr;
01338     union {
01339         kmpc_ctor              ctor;
01340         kmpc_ctor_vec          ctorv;
01341     } ct;
01342     union {
01343         kmpc_cctor             cctor;
01344         kmpc_cctor_vec         cctorv;
01345     } cct;
01346     union {
01347         kmpc_dtor              dtor;
01348         kmpc_dtor_vec          dtorv;
01349     } dt;
01350     size_t                     vec_len;
01351     int                        is_vec;
01352     size_t                     cmn_size;
01353 };
01354 
01355 #define KMP_HASH_TABLE_LOG2     9                               /* log2 of the hash table size */
01356 #define KMP_HASH_TABLE_SIZE     (1 << KMP_HASH_TABLE_LOG2)      /* size of the hash table */
01357 #define KMP_HASH_SHIFT          3                               /* throw away this many low bits from the address */
01358 #define KMP_HASH(x)             ((((kmp_uintptr_t) x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE-1))
01359 
01360 struct common_table {
01361     struct  private_common      *data[ KMP_HASH_TABLE_SIZE ];
01362 };
01363 
01364 struct shared_table {
01365     struct  shared_common       *data[ KMP_HASH_TABLE_SIZE ];
01366 };
01367 /* ------------------------------------------------------------------------ */
01368 /* ------------------------------------------------------------------------ */
01369 
01370 #ifdef KMP_STATIC_STEAL_ENABLED
01371 typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
01372     kmp_int32 count;
01373     kmp_int32 ub;
01374     /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
01375     kmp_int32 lb;
01376     kmp_int32 st;
01377     kmp_int32 tc;
01378     kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put after ub */
01379 
01380     // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on )
01381     //    a) parm3 is properly aligned and
01382     //    b) all parm1-4 are in the same cache line.
01383     // Because of parm1-4 are used together, performance seems to be better
01384     // if they are in the same line (not measured though).
01385 
01386     struct KMP_ALIGN( 32 ) { // AC: changed 16 to 32 in order to simplify template
01387         kmp_int32 parm1;     //     structures in kmp_dispatch.cpp. This should
01388         kmp_int32 parm2;     //     make no real change at least while padding is off.
01389         kmp_int32 parm3;
01390         kmp_int32 parm4;
01391     };
01392 
01393     kmp_uint32 ordered_lower;
01394     kmp_uint32 ordered_upper;
01395 #if KMP_OS_WINDOWS
01396     // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'.
01397     // It would be nice to measure execution times.
01398     // Conditional if/endif can be removed at all.
01399     kmp_int32 last_upper;
01400 #endif /* KMP_OS_WINDOWS */
01401 } dispatch_private_info32_t;
01402 
01403 typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
01404     kmp_int64 count;   /* current chunk number for static and static-steal scheduling*/
01405     kmp_int64 ub;      /* upper-bound */
01406     /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */
01407     kmp_int64 lb;      /* lower-bound */
01408     kmp_int64 st;      /* stride */
01409     kmp_int64 tc;      /* trip count (number of iterations) */
01410     kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put after ub */
01411 
01412     /* parm[1-4] are used in different ways by different scheduling algorithms */
01413 
01414     // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on )
01415     //    a) parm3 is properly aligned and
01416     //    b) all parm1-4 are in the same cache line.
01417     // Because of parm1-4 are used together, performance seems to be better
01418     // if they are in the same line (not measured though).
01419 
01420     struct KMP_ALIGN( 32 ) {
01421         kmp_int64 parm1;
01422         kmp_int64 parm2;
01423         kmp_int64 parm3;
01424         kmp_int64 parm4;
01425     };
01426 
01427     kmp_uint64 ordered_lower;
01428     kmp_uint64 ordered_upper;
01429 #if KMP_OS_WINDOWS
01430     // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'.
01431     // It would be nice to measure execution times.
01432     // Conditional if/endif can be removed at all.
01433     kmp_int64 last_upper;
01434 #endif /* KMP_OS_WINDOWS */
01435 } dispatch_private_info64_t;
01436 #else /* KMP_STATIC_STEAL_ENABLED */
01437 typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {
01438     kmp_int32 lb;
01439     kmp_int32 ub;
01440     kmp_int32 st;
01441     kmp_int32 tc;
01442 
01443     kmp_int32 parm1;
01444     kmp_int32 parm2;
01445     kmp_int32 parm3;
01446     kmp_int32 parm4;
01447 
01448     kmp_int32 count;
01449 
01450     kmp_uint32 ordered_lower;
01451     kmp_uint32 ordered_upper;
01452 #if KMP_OS_WINDOWS
01453     kmp_int32 last_upper;
01454 #endif /* KMP_OS_WINDOWS */
01455 } dispatch_private_info32_t;
01456 
01457 typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {
01458     kmp_int64 lb;      /* lower-bound */
01459     kmp_int64 ub;      /* upper-bound */
01460     kmp_int64 st;      /* stride */
01461     kmp_int64 tc;      /* trip count (number of iterations) */
01462 
01463     /* parm[1-4] are used in different ways by different scheduling algorithms */
01464     kmp_int64 parm1;
01465     kmp_int64 parm2;
01466     kmp_int64 parm3;
01467     kmp_int64 parm4;
01468 
01469     kmp_int64 count;   /* current chunk number for static scheduling */
01470 
01471     kmp_uint64 ordered_lower;
01472     kmp_uint64 ordered_upper;
01473 #if KMP_OS_WINDOWS
01474     kmp_int64 last_upper;
01475 #endif /* KMP_OS_WINDOWS */
01476 } dispatch_private_info64_t;
01477 #endif /* KMP_STATIC_STEAL_ENABLED */
01478 
01479 typedef struct KMP_ALIGN_CACHE dispatch_private_info {
01480     union private_info {
01481         dispatch_private_info32_t  p32;
01482         dispatch_private_info64_t  p64;
01483     } u;
01484     enum sched_type schedule;  /* scheduling algorithm */
01485     kmp_int32       ordered;   /* ordered clause specified */
01486     kmp_int32       ordered_bumped;
01487     kmp_int32   ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar
01488     struct dispatch_private_info * next; /* stack of buffers for nest of serial regions */
01489     kmp_int32       nomerge;   /* don't merge iters if serialized */
01490     kmp_int32       type_size; /* the size of types in private_info */
01491     enum cons_type  pushed_ws;
01492 } dispatch_private_info_t;
01493 
01494 typedef struct dispatch_shared_info32 {
01495     /* chunk index under dynamic, number of idle threads under static-steal;
01496        iteration index otherwise */
01497     volatile kmp_uint32      iteration;
01498     volatile kmp_uint32      num_done;
01499     volatile kmp_uint32      ordered_iteration;
01500     kmp_int32   ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar
01501 } dispatch_shared_info32_t;
01502 
01503 typedef struct dispatch_shared_info64 {
01504     /* chunk index under dynamic, number of idle threads under static-steal;
01505        iteration index otherwise */
01506     volatile kmp_uint64      iteration;
01507     volatile kmp_uint64      num_done;
01508     volatile kmp_uint64      ordered_iteration;
01509     kmp_int64   ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar
01510 } dispatch_shared_info64_t;
01511 
01512 typedef struct dispatch_shared_info {
01513     union shared_info {
01514         dispatch_shared_info32_t  s32;
01515         dispatch_shared_info64_t  s64;
01516     } u;
01517 /*    volatile kmp_int32      dispatch_abort;  depricated */
01518     volatile kmp_uint32     buffer_index;
01519 } dispatch_shared_info_t;
01520 
01521 typedef struct kmp_disp {
01522     /* Vector for ORDERED SECTION */
01523     void (*th_deo_fcn)( int * gtid, int * cid, ident_t *);
01524     /* Vector for END ORDERED SECTION */
01525     void (*th_dxo_fcn)( int * gtid, int * cid, ident_t *);
01526 
01527     dispatch_shared_info_t  *th_dispatch_sh_current;
01528     dispatch_private_info_t *th_dispatch_pr_current;
01529 
01530     dispatch_private_info_t *th_disp_buffer;
01531     kmp_int32                th_disp_index;
01532     void* dummy_padding[2]; // make it 64 bytes on Intel(R) 64
01533 } kmp_disp_t;
01534 
01535 /* ------------------------------------------------------------------------ */
01536 /* ------------------------------------------------------------------------ */
01537 
01538 /* Barrier stuff */
01539 
01540 /* constants for barrier state update */
01541 #define KMP_INIT_BARRIER_STATE  0       /* should probably start from zero */
01542 #define KMP_BARRIER_SLEEP_BIT   0       /* bit used for suspend/sleep part of state */
01543 #define KMP_BARRIER_UNUSED_BIT  1       /* bit that must never be set for valid state */
01544 #define KMP_BARRIER_BUMP_BIT    2       /* lsb used for bump of go/arrived state */
01545 
01546 #define KMP_BARRIER_SLEEP_STATE         ((kmp_uint) (1 << KMP_BARRIER_SLEEP_BIT))
01547 #define KMP_BARRIER_UNUSED_STATE        ((kmp_uint) (1 << KMP_BARRIER_UNUSED_BIT))
01548 #define KMP_BARRIER_STATE_BUMP          ((kmp_uint) (1 << KMP_BARRIER_BUMP_BIT))
01549 
01550 #if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT)
01551 # error "Barrier sleep bit must be smaller than barrier bump bit"
01552 #endif
01553 #if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT)
01554 # error "Barrier unused bit must be smaller than barrier bump bit"
01555 #endif
01556 
01557 
01558 enum barrier_type {
01559     bs_plain_barrier = 0,       /* 0, All non-fork/join barriers (except reduction barriers if enabled) */
01560     bs_forkjoin_barrier,        /* 1, All fork/join (parallel region) barriers */
01561     #if KMP_FAST_REDUCTION_BARRIER
01562         bs_reduction_barrier,   /* 2, All barriers that are used in reduction */
01563     #endif // KMP_FAST_REDUCTION_BARRIER
01564     bs_last_barrier             /* Just a placeholder to mark the end */
01565 };
01566 
01567 // to work with reduction barriers just like with plain barriers
01568 #if !KMP_FAST_REDUCTION_BARRIER
01569     #define bs_reduction_barrier bs_plain_barrier
01570 #endif // KMP_FAST_REDUCTION_BARRIER
01571 
01572 typedef enum kmp_bar_pat {      /* Barrier communication patterns */
01573     bp_linear_bar = 0,          /* Single level (degenerate) tree */
01574     bp_tree_bar = 1,            /* Balanced tree with branching factor 2^n */
01575     bp_hyper_bar = 2,           /* Hypercube-embedded tree with min branching factor 2^n */
01576     bp_last_bar = 3             /* Placeholder to mark the end */
01577 } kmp_bar_pat_e;
01578 
01579 /* Thread barrier needs volatile barrier fields */
01580 typedef struct KMP_ALIGN_CACHE kmp_bstate {
01581     volatile kmp_uint   b_arrived;              /* STATE => task reached synch point. */
01582     #if (KMP_PERF_V19 == KMP_ON)
01583         KMP_ALIGN_CACHE
01584     #endif
01585     volatile kmp_uint   b_go;                   /* STATE => task should proceed.      */
01586 } kmp_bstate_t;
01587 
01588 union KMP_ALIGN_CACHE kmp_barrier_union {
01589     double       b_align;        /* use worst case alignment */
01590     char         b_pad[ KMP_PAD(kmp_bstate_t, CACHE_LINE) ];
01591     kmp_bstate_t bb;
01592 };
01593 
01594 typedef union kmp_barrier_union kmp_balign_t;
01595 
01596 /* Team barrier needs only non-volatile arrived counter */
01597 union KMP_ALIGN_CACHE kmp_barrier_team_union {
01598     double       b_align;        /* use worst case alignment */
01599     char         b_pad[ CACHE_LINE ];
01600     struct {
01601         kmp_uint     b_arrived;       /* STATE => task reached synch point. */
01602     };
01603 };
01604 
01605 typedef union kmp_barrier_team_union kmp_balign_team_t;
01606 
01607 /*
01608  * Padding for Linux* OS pthreads condition variables and mutexes used to signal
01609  * threads when a condition changes.  This is to workaround an NPTL bug
01610  * where padding was added to pthread_cond_t which caused the initialization
01611  * routine to write outside of the structure if compiled on pre-NPTL threads.
01612  */
01613 
01614 #if KMP_OS_WINDOWS
01615 typedef struct kmp_win32_mutex
01616 {
01617     /* The Lock */
01618     CRITICAL_SECTION cs;
01619 } kmp_win32_mutex_t;
01620 
01621 typedef struct kmp_win32_cond
01622 {
01623     /* Count of the number of waiters. */
01624     int waiters_count_;
01625 
01626     /* Serialize access to <waiters_count_> */
01627     kmp_win32_mutex_t waiters_count_lock_;
01628 
01629     /* Number of threads to release via a <cond_broadcast> or a */
01630     /* <cond_signal> */
01631     int release_count_;
01632 
01633     /* Keeps track of the current "generation" so that we don't allow */
01634     /* one thread to steal all the "releases" from the broadcast. */
01635     int wait_generation_count_;
01636 
01637     /* A manual-reset event that's used to block and release waiting */
01638     /* threads. */
01639     HANDLE event_;
01640 } kmp_win32_cond_t;
01641 #endif
01642 
01643 #if KMP_OS_UNIX
01644 
01645 union KMP_ALIGN_CACHE kmp_cond_union {
01646     double              c_align;
01647     char                c_pad[ CACHE_LINE ];
01648     pthread_cond_t      c_cond;
01649 };
01650 
01651 typedef union kmp_cond_union kmp_cond_align_t;
01652 
01653 union KMP_ALIGN_CACHE kmp_mutex_union {
01654     double              m_align;
01655     char                m_pad[ CACHE_LINE ];
01656     pthread_mutex_t     m_mutex;
01657 };
01658 
01659 typedef union kmp_mutex_union kmp_mutex_align_t;
01660 
01661 #endif /* KMP_OS_UNIX */
01662 
01663 typedef struct kmp_desc_base {
01664     void    *ds_stackbase;
01665     size_t            ds_stacksize;
01666     int               ds_stackgrow;
01667     kmp_thread_t      ds_thread;
01668     volatile int      ds_tid;
01669     int               ds_gtid;
01670 #if KMP_OS_WINDOWS
01671     volatile int      ds_alive;
01672     DWORD             ds_thread_id;
01673         /*
01674             ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes. However,
01675             debugger support (libomp_db) cannot work with handles, because they uncomparable. For
01676             example, debugger requests info about thread with handle h. h is valid within debugger
01677             process, and meaningless within debugee process. Even if h is duped by call to
01678             DuplicateHandle(), so the result h' is valid within debugee process, but it is a *new*
01679             handle which does *not* equal to any other handle in debugee... The only way to
01680             compare handles is convert them to system-wide ids. GetThreadId() function is
01681             available only in Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is
01682             available on all Windows* OS flavours (including Windows* 95). Thus, we have to get thread id by
01683             call to GetCurrentThreadId() from within the thread and save it to let libomp_db
01684             identify threads.
01685         */
01686 #endif /* KMP_OS_WINDOWS */
01687 } kmp_desc_base_t;
01688 
01689 typedef union KMP_ALIGN_CACHE kmp_desc {
01690     double           ds_align;        /* use worst case alignment */
01691     char             ds_pad[ KMP_PAD(kmp_desc_base_t, CACHE_LINE) ];
01692     kmp_desc_base_t  ds;
01693 } kmp_desc_t;
01694 
01695 
01696 typedef struct kmp_local {
01697     volatile int           this_construct; /* count of single's encountered by thread */
01698     volatile int           last_construct; /* cache for team's count used by old algorithm */
01699     void                  *reduce_data;
01700 #if KMP_USE_BGET
01701     void                  *bget_data;
01702     void                  *bget_list;
01703 #if ! USE_CMP_XCHG_FOR_BGET
01704 #ifdef USE_QUEUING_LOCK_FOR_BGET
01705     kmp_lock_t             bget_lock;      /* Lock for accessing bget free list */
01706 #else
01707     kmp_bootstrap_lock_t   bget_lock;      /* Lock for accessing bget free list */
01708                                            /* Must be bootstrap lock so we can use it at library shutdown */
01709 #endif /* USE_LOCK_FOR_BGET */
01710 #endif /* ! USE_CMP_XCHG_FOR_BGET */
01711 #endif /* KMP_USE_BGET */
01712 
01713 #ifdef BUILD_TV
01714     struct tv_data        *tv_data;
01715 #endif
01716 
01717     PACKED_REDUCTION_METHOD_T packed_reduction_method; /* stored by __kmpc_reduce*(), used by __kmpc_end_reduce*() */
01718 
01719 } kmp_local_t;
01720 
01721 /* Record for holding the values of the internal controls stack records */
01722 
01723 typedef struct kmp_internal_control {
01724     int           serial_nesting_level;  /* corresponds to the value of the th_team_serialized field */
01725     int           nested;                /* internal control for nested parallelism (per thread) */
01726     int           dynamic;               /* internal control for dynamic adjustment of threads (per thread) */
01727     int           nproc;                 /* internal control for # of threads for next parallel region (per thread) */
01728     int           blocktime;             /* internal control for blocktime */
01729     int           bt_intervals;          /* internal control for blocktime intervals */
01730     int           bt_set;                /* internal control for whether blocktime is explicitly set */
01731 #if OMP_30_ENABLED
01732     int           max_active_levels;     /* internal control for max_active_levels */
01733     kmp_r_sched_t sched;                 /* internal control for runtime schedule {sched,chunk} pair */
01734 #endif // OMP_30_ENABLED
01735 #if OMP_40_ENABLED
01736     kmp_proc_bind_t proc_bind;           /* internal control for affinity  */
01737 #endif // OMP_40_ENABLED
01738     struct kmp_internal_control *next;
01739 
01740 } kmp_internal_control_t;
01741 
01742 #if OMP_30_ENABLED
01743 static inline void
01744 copy_icvs( kmp_internal_control_t *dst, kmp_internal_control_t *src ) {
01745     // int serial_nesting_level;        // Skip. There was no copy of this field in the original code.
01746     dst->nested  = src->nested;
01747     dst->dynamic = src->dynamic;
01748     dst->nproc   = src->nproc;
01749     dst->blocktime = src->blocktime;
01750     dst->bt_intervals = src->bt_intervals;
01751     dst->bt_set = src->bt_set;
01752 #if OMP_30_ENABLED
01753     dst->max_active_levels = src->max_active_levels;
01754     dst->sched = src->sched;
01755 #endif
01756 #if OMP_40_ENABLED
01757     dst->proc_bind = src->proc_bind;
01758 #endif
01759     //struct kmp_internal_control *next; // Skip. There was no copy of this field in the original code.
01760 }
01761 #endif // OMP_30_ENABLED
01762 
01763 #if OMP_30_ENABLED
01764 
01765     #define get__blocktime( xteam, xtid )     ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)
01766     #define get__bt_set( xteam, xtid )        ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)
01767     #define get__bt_intervals( xteam, xtid )  ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)
01768 
01769     #define get__nested_2(xteam,xtid)         ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested)
01770     #define get__dynamic_2(xteam,xtid)        ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)
01771     #define get__nproc_2(xteam,xtid)          ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)
01772     #define get__sched_2(xteam,xtid)          ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)
01773 
01774     #define set__blocktime_team( xteam, xtid, xval ) \
01775             ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime )    = (xval) )
01776 
01777     #define set__bt_intervals_team( xteam, xtid, xval ) \
01778             ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals ) = (xval) )
01779 
01780     #define set__bt_set_team( xteam, xtid, xval ) \
01781             ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set )       = (xval) )
01782 
01783 
01784 
01785     #define set__nested( xthread, xval )                            \
01786             ( ( (xthread)->th.th_serial_team->t.t_threads[0]                                ->th.th_current_task->td_icvs.nested ) = \
01787               ( (xthread)->th.th_team       ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.nested ) = \
01788               (xval) )
01789     #define get__nested( xthread ) \
01790             ( ( (xthread)->th.th_team       ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.nested ) \
01791             ? (FTN_TRUE) : (FTN_FALSE) )
01792 
01793     #define set__dynamic( xthread, xval )                            \
01794             ( ( (xthread)->th.th_serial_team->t.t_threads[0]                                ->th.th_current_task->td_icvs.dynamic ) = \
01795               ( (xthread)->th.th_team       ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.dynamic ) = \
01796               (xval) )
01797     #define get__dynamic( xthread ) \
01798             ( ( (xthread)->th.th_team       ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.dynamic ) \
01799             ? (FTN_TRUE) : (FTN_FALSE) )
01800 
01801     #define set__nproc( xthread, xval )                            \
01802             ( ( (xthread)->th.th_serial_team->t.t_threads[0]                                ->th.th_current_task->td_icvs.nproc ) = \
01803               ( (xthread)->th.th_team       ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.nproc ) = \
01804               (xval) )
01805 
01806     #define set__nproc_p( xthread, xval )                            \
01807             (                                                        \
01808               ( (xthread)->th.th_team       ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.nproc ) = \
01809               (xval) )
01810 
01811     #define set__max_active_levels( xthread, xval )                            \
01812             ( ( (xthread)->th.th_serial_team->t.t_threads[0]                                ->th.th_current_task->td_icvs.max_active_levels ) = \
01813               ( (xthread)->th.th_team       ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.max_active_levels ) = \
01814               (xval) )
01815 
01816     #define set__sched( xthread, xval )                            \
01817             ( ( (xthread)->th.th_serial_team->t.t_threads[0]                                ->th.th_current_task->td_icvs.sched ) = \
01818               ( (xthread)->th.th_team       ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.sched ) = \
01819               (xval) )
01820 
01821 #if OMP_40_ENABLED
01822 
01823     #define set__proc_bind( xthread, xval )                          \
01824             (                                                        \
01825               ( (xthread)->th.th_team       ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.proc_bind ) = \
01826               (xval) )
01827 
01828     #define get__proc_bind( xthread ) \
01829             ( (xthread)->th.th_team       ->t.t_threads[((xthread)->th.th_info.ds.ds_tid)]->th.th_current_task->td_icvs.proc_bind )
01830 
01831 #endif /* OMP_40_ENABLED */
01832 
01833 #else
01834 
01835     #define get__blocktime( xteam, xtid )    ((xteam)->t.t_set_blocktime[   (xtid)])
01836     #define get__bt_set( xteam, xtid )       ((xteam)->t.t_set_bt_set[      (xtid)])
01837     #define get__bt_intervals( xteam, xtid ) ((xteam)->t.t_set_bt_intervals[(xtid)])
01838 
01839     #define set__nested( xthread, xval )                            \
01840             ( ( (xthread)->th.th_serial_team->t.t_set_nested[0] ) = \
01841               ( (xthread)->th.th_team->t.t_set_nested[((xthread)->th.th_info.ds.ds_tid)] ) = \
01842               (xval) )
01843     #define get__nested( xthread ) \
01844             ( ( (xthread)->th.th_team->t.t_set_nested[((xthread)->th.th_info.ds.ds_tid)] ) \
01845             ? (FTN_TRUE) : (FTN_FALSE) )
01846 
01847     #define set__dynamic( xthread, xval )                            \
01848             ( ( (xthread)->th.th_serial_team->t.t_set_dynamic[0] ) = \
01849               ( (xthread)->th.th_team->t.t_set_dynamic[((xthread)->th.th_info.ds.ds_tid)] ) = \
01850               (xval) )
01851     #define get__dynamic( xthread ) \
01852             ( ( (xthread)->th.th_team->t.t_set_dynamic[((xthread)->th.th_info.ds.ds_tid)] ) \
01853             ? (FTN_TRUE) : (FTN_FALSE) )
01854 
01855     #define set__nproc( xthread, xval )                            \
01856             ( ( (xthread)->th.th_serial_team->t.t_set_nproc[0] ) = \
01857               ( (xthread)->th.th_team->t.t_set_nproc[((xthread)->th.th_info.ds.ds_tid)] ) = \
01858               (xval) )
01859 
01860     #define set__nproc_p( xthread, xval )                                                   \
01861             ( ( (xthread)->th.th_team->t.t_set_nproc[((xthread)->th.th_info.ds.ds_tid)] ) = (xval) )
01862 
01863     #define set__blocktime_team( xteam, xtid, xval ) \
01864             ( ( (xteam)->t.t_set_blocktime[(xtid)] ) = (xval) )
01865 
01866     #define set__bt_intervals_team( xteam, xtid, xval ) \
01867             ( ( (xteam)->t.t_set_bt_intervals[(xtid)] ) = (xval) )
01868 
01869     #define set__bt_set_team( xteam, xtid, xval ) \
01870             ( ( (xteam)->t.t_set_bt_set[(xtid)] ) = (xval) )
01871 
01872     #define get__nested_2(xteam,xtid)  ( (xteam)->t.t_set_nested[(xtid)] )
01873     #define get__dynamic_2(xteam,xtid) ( (xteam)->t.t_set_dynamic[(xtid)] )
01874     #define get__nproc_2(xteam,xtid)   ( (xteam)->t.t_set_nproc[(xtid)] )
01875     #define get__sched_2(xteam,xtid)   ( (xteam)->t.t_set_sched[(xtid)] )
01876 
01877 
01878 #endif
01879 
01880 #if OMP_30_ENABLED
01881 /* ------------------------------------------------------------------------ */
01882 // OpenMP tasking data structures
01883 //
01884 
01885 typedef enum kmp_tasking_mode {
01886     tskm_immediate_exec = 0,
01887     tskm_extra_barrier = 1,
01888     tskm_task_teams = 2,
01889     tskm_max = 2
01890 } kmp_tasking_mode_t;
01891 
01892 extern kmp_tasking_mode_t __kmp_tasking_mode;         /* determines how/when to execute tasks */
01893 extern kmp_int32 __kmp_task_stealing_constraint;
01894 
01895 /* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with taskdata first */
01896 #define KMP_TASK_TO_TASKDATA(task)     (((kmp_taskdata_t *) task) - 1)
01897 #define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *) (taskdata + 1)
01898 
01899 // The tt_found_tasks flag is a signal to all threads in the team that tasks were spawned and
01900 // queued since the previous barrier release.
01901 // State is used to alternate task teams for successive barriers
01902 #define KMP_TASKING_ENABLED(task_team,state) \
01903     ((TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE) && \
01904      (TCR_4((task_team)->tt.tt_state)       == (state)))
01905 
01912 typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, void * );
01913 
01914 /*  sizeof_kmp_task_t passed as arg to kmpc_omp_task call  */
01917 typedef struct kmp_task {                   /* GEH: Shouldn't this be aligned somehow? */
01918     void *              shareds;            
01919     kmp_routine_entry_t routine;            
01920     kmp_int32           part_id;            
01921     /*  private vars  */
01922 } kmp_task_t;
01927 #if OMP_40_ENABLED
01928 typedef struct kmp_taskgroup {
01929     kmp_uint32            count;   // number of allocated and not yet complete tasks
01930     struct kmp_taskgroup *parent;  // parent taskgroup
01931 } kmp_taskgroup_t;
01932 #endif
01933 
01934 #ifdef BUILD_TIED_TASK_STACK
01935 
01936 /* Tied Task stack definitions */
01937 typedef struct kmp_stack_block {
01938     kmp_taskdata_t *          sb_block[ TASK_STACK_BLOCK_SIZE ];
01939     struct kmp_stack_block *  sb_next;
01940     struct kmp_stack_block *  sb_prev;
01941 } kmp_stack_block_t;
01942 
01943 typedef struct kmp_task_stack {
01944     kmp_stack_block_t         ts_first_block;  // first block of stack entries
01945     kmp_taskdata_t **         ts_top;          // pointer to the top of stack
01946     kmp_int32                 ts_entries;      // number of entries on the stack
01947 } kmp_task_stack_t;
01948 
01949 #endif // BUILD_TIED_TASK_STACK
01950 
01951 typedef struct kmp_tasking_flags {          /* Total struct must be exactly 32 bits */
01952     /* Compiler flags */                    /* Total compiler flags must be 16 bits */
01953     unsigned tiedness    : 1;               /* task is either tied (1) or untied (0) */
01954     unsigned final       : 1;               /* task is final(1) so execute immediately */
01955     unsigned merged_if0  : 1;               /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */
01956     unsigned reserved13  : 13;              /* reserved for compiler use */
01957 
01958     /* Library flags */                     /* Total library flags must be 16 bits */
01959     unsigned tasktype    : 1;               /* task is either explicit(1) or implicit (0) */
01960     unsigned task_serial : 1;               /* this task is executed immediately (1) or deferred (0) */
01961     unsigned tasking_ser : 1;               /* all tasks in team are either executed immediately (1) or may be deferred (0) */
01962     unsigned team_serial : 1;               /* entire team is serial (1) [1 thread] or parallel (0) [>= 2 threads] */
01963                                             /* If either team_serial or tasking_ser is set, task team may be NULL */
01964     /* Task State Flags: */
01965     unsigned started     : 1;               /* 1==started, 0==not started     */
01966     unsigned executing   : 1;               /* 1==executing, 0==not executing */
01967     unsigned complete    : 1;               /* 1==complete, 0==not complete   */
01968     unsigned freed       : 1;               /* 1==freed, 0==allocateed        */
01969     unsigned native      : 1;               /* 1==gcc-compiled task, 0==intel */
01970     unsigned reserved31  : 7;               /* reserved for library use */
01971 
01972 } kmp_tasking_flags_t;
01973 
01974 
01975 struct kmp_taskdata {                                 /* aligned during dynamic allocation       */
01976     kmp_int32               td_task_id;               /* id, assigned by debugger                */
01977     kmp_tasking_flags_t     td_flags;                 /* task flags                              */
01978     kmp_team_t *            td_team;                  /* team for this task                      */
01979     kmp_info_p *            td_alloc_thread;          /* thread that allocated data structures   */
01980                                                       /* Currently not used except for perhaps IDB */
01981     kmp_taskdata_t *        td_parent;                /* parent task                             */
01982     kmp_int32               td_level;                 /* task nesting level                      */
01983     ident_t *               td_ident;                 /* task identifier                         */
01984                             // Taskwait data.
01985     ident_t *               td_taskwait_ident;
01986     kmp_uint32              td_taskwait_counter;
01987     kmp_int32               td_taskwait_thread;       /* gtid + 1 of thread encountered taskwait */
01988     kmp_internal_control_t  td_icvs;                  /* Internal control variables for the task */
01989     volatile kmp_uint32     td_allocated_child_tasks;  /* Child tasks (+ current task) not yet deallocated */
01990     volatile kmp_uint32     td_incomplete_child_tasks; /* Child tasks not yet complete */
01991 #if OMP_40_ENABLED
01992     kmp_taskgroup_t *       td_taskgroup;         // Each task keeps pointer to its current taskgroup
01993 #endif
01994 #if OMPT_SUPPORT
01995     ompt_task_info_t       ompt_task_info;
01996 #endif
01997     _Quad                   td_dummy;             // Align structure 16-byte size since allocated just before kmp_task_t
01998 }; // struct kmp_taskdata
01999 
02000 // Make sure padding above worked
02001 KMP_BUILD_ASSERT( sizeof(kmp_taskdata_t) % sizeof(void *) == 0 );
02002 
02003 // Data for task team but per thread
02004 typedef struct kmp_base_thread_data {
02005     kmp_info_p *            td_thr;                // Pointer back to thread info
02006                                                    // Used only in __kmp_execute_tasks, maybe not avail until task is queued?
02007     kmp_bootstrap_lock_t    td_deque_lock;         // Lock for accessing deque
02008     kmp_taskdata_t **       td_deque;              // Deque of tasks encountered by td_thr, dynamically allocated
02009     kmp_uint32              td_deque_head;         // Head of deque (will wrap)
02010     kmp_uint32              td_deque_tail;         // Tail of deque (will wrap)
02011     kmp_int32               td_deque_ntasks;       // Number of tasks in deque
02012                                                    // GEH: shouldn't this be volatile since used in while-spin?
02013     kmp_int32               td_deque_last_stolen;  // Thread number of last successful steal
02014 #ifdef BUILD_TIED_TASK_STACK
02015     kmp_task_stack_t        td_susp_tied_tasks;    // Stack of suspended tied tasks for task scheduling constraint
02016 #endif // BUILD_TIED_TASK_STACK
02017 } kmp_base_thread_data_t;
02018 
02019 typedef union KMP_ALIGN_CACHE kmp_thread_data {
02020     kmp_base_thread_data_t  td;
02021     double                  td_align;       /* use worst case alignment */
02022     char                    td_pad[ KMP_PAD(kmp_base_thread_data_t, CACHE_LINE) ];
02023 } kmp_thread_data_t;
02024 
02025 
02026 // Data for task teams which are used when tasking is enabled for the team
02027 typedef struct kmp_base_task_team {
02028     kmp_bootstrap_lock_t    tt_threads_lock;       /* Lock used to allocate per-thread part of task team */
02029                                                    /* must be bootstrap lock since used at library shutdown*/
02030     kmp_task_team_t *       tt_next;               /* For linking the task team free list */
02031     kmp_thread_data_t *     tt_threads_data;       /* Array of per-thread structures for task team */
02032                                                    /* Data survives task team deallocation */
02033     kmp_int32               tt_found_tasks;        /* Have we found tasks and queued them while executing this team? */
02034                                                    /* TRUE means tt_threads_data is set up and initialized */
02035     kmp_int32               tt_nproc;              /* #threads in team           */
02036     kmp_int32               tt_max_threads;        /* number of entries allocated for threads_data array */
02037 
02038     KMP_ALIGN_CACHE
02039     volatile kmp_uint32     tt_unfinished_threads; /* #threads still active      */
02040 
02041     KMP_ALIGN_CACHE
02042     volatile kmp_uint32     tt_active;             /* is the team still actively executing tasks */
02043 
02044     KMP_ALIGN_CACHE
02045     volatile kmp_uint32     tt_ref_ct;             /* #threads accessing struct  */
02046                                                    /* (not incl. master)         */
02047     kmp_int32               tt_state;              /* alternating 0/1 for task team identification */
02048                                                    /* Note: VERY sensitive to padding! */
02049 } kmp_base_task_team_t;
02050 
02051 union KMP_ALIGN_CACHE kmp_task_team {
02052     kmp_base_task_team_t tt;
02053     double               tt_align;       /* use worst case alignment */
02054     char                 tt_pad[ KMP_PAD(kmp_base_task_team_t, CACHE_LINE) ];
02055 };
02056 
02057 #endif  // OMP_30_ENABLED
02058 
02059 #if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 )
02060 // Free lists keep same-size free memory slots for fast memory allocation routines
02061 typedef struct kmp_free_list {
02062     void             *th_free_list_self;   // Self-allocated tasks free list
02063     void             *th_free_list_sync;   // Self-allocated tasks stolen/returned by other threads
02064     void             *th_free_list_other;  // Non-self free list (to be returned to owner's sync list)
02065 } kmp_free_list_t;
02066 #endif
02067 
02068 /* ------------------------------------------------------------------------ */
02069 // OpenMP thread data structures
02070 //
02071 
02072 typedef struct KMP_ALIGN_CACHE kmp_base_info {
02073 /*
02074  * Start with the readonly data which is cache aligned and padded.
02075  * this is written before the thread starts working by the master.
02076  * (uber masters may update themselves later)
02077  * (usage does not consider serialized regions)
02078  */
02079     kmp_desc_t        th_info;
02080     kmp_team_p       *th_team;       /* team we belong to */
02081     kmp_root_p       *th_root;       /* pointer to root of task hierarchy */
02082     kmp_info_p       *th_next_pool;  /* next available thread in the pool */
02083     kmp_disp_t       *th_dispatch;   /* thread's dispatch data */
02084     int               th_in_pool;    /* in thread pool (32 bits for TCR/TCW) */
02085 
02086     /* The following are cached from the team info structure */
02087     /* TODO use these in more places as determined to be needed via profiling */
02088     int               th_team_nproc;      /* number of threads in a team */
02089     kmp_info_p       *th_team_master;     /* the team's master thread */
02090     int               th_team_serialized; /* team is serialized */
02091 
02092     /* The blocktime info is copied from the team struct to the thread sruct */
02093     /* at the start of a barrier, and the values stored in the team are used */
02094     /* at points in the code where the team struct is no longer guaranteed   */
02095     /* to exist (from the POV of worker threads).                            */
02096     int               th_team_bt_intervals;
02097     int               th_team_bt_set;
02098 
02099 
02100 #if KMP_OS_WINDOWS || KMP_OS_LINUX
02101     kmp_affin_mask_t  *th_affin_mask; /* thread's current affinity mask */
02102 #endif
02103 
02104 
02105 /*
02106  * The data set by the master at reinit, then R/W by the worker
02107  */
02108     KMP_ALIGN_CACHE int     th_set_nproc;  /* if > 0, then only use this request for the next fork */
02109 #if OMP_40_ENABLED
02110     kmp_proc_bind_t         th_set_proc_bind; /* if != proc_bind_default, use request for next fork */
02111 # if (KMP_OS_WINDOWS || KMP_OS_LINUX)
02112     int                     th_current_place; /* place currently bound to */
02113     int                     th_new_place;     /* place to bind to in par reg */
02114     int                     th_first_place;   /* first place in partition */
02115     int                     th_last_place;    /* last place in partition */
02116 # endif
02117 #endif
02118     kmp_local_t             th_local;
02119     struct private_common  *th_pri_head;
02120 
02121 /*
02122  * Now the data only used by the worker (after initial allocation)
02123  */
02124     /* TODO the first serial team should actually be stored in the info_t
02125      * structure.  this will help reduce initial allocation overhead */
02126     KMP_ALIGN_CACHE kmp_team_p *th_serial_team; /*serialized team held in reserve*/
02127 
02128 #if OMPT_SUPPORT
02129     ompt_thread_info_t     ompt_thread_info;
02130 #endif
02131 
02132 /* The following are also read by the master during reinit */
02133     struct common_table    *th_pri_common;
02134 
02135     volatile kmp_uint32     th_spin_here;   /* thread-local location for spinning */
02136                                             /* while awaiting queuing lock acquire */
02137 
02138     volatile kmp_uint32    *th_sleep_loc;
02139 
02140 /*
02141  * Two variables used for consistency check - struct cons_header *th_cons and inte th_first moved below
02142  * from here in order to avoid performance regression
02143 */
02144     ident_t          *th_ident;
02145     unsigned         th_x;                     // Random number generator data
02146     unsigned         th_a;                     // Random number generator data
02147 
02148 #if OMP_30_ENABLED
02149 /*
02150  * Tasking-related data for the thread
02151  */
02152     kmp_task_team_t    * th_task_team;           // Task team struct
02153     kmp_taskdata_t     * th_current_task;        // Innermost Task being executed
02154     kmp_uint8            th_task_state;          // alternating 0/1 for task team identification
02155 #endif  // OMP_30_ENABLED
02156 
02157     /*
02158      * More stuff for keeping track of active/sleeping threads
02159      * (this part is written by the worker thread)
02160      */
02161     kmp_uint8            th_active_in_pool;      // included in count of
02162                                                  // #active threads in pool
02163     int                  th_active;              // ! sleeping
02164                                                  // 32 bits for TCR/TCW
02165 
02166 
02167     struct cons_header * th_cons;
02168     int                  th_first;
02169 
02170 /*
02171  * Add the syncronizing data which is cache aligned and padded.
02172  */
02173     KMP_ALIGN_CACHE kmp_balign_t      th_bar[ bs_last_barrier ];
02174 
02175     KMP_ALIGN_CACHE volatile     kmp_int32    th_next_waiting;  /* gtid+1 of next thread on lock wait queue, 0 if none */
02176 
02177 #if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 )
02178     #define NUM_LISTS 4
02179     kmp_free_list_t   th_free_lists[NUM_LISTS];   // Free lists for fast memory allocation routines
02180 #endif
02181 
02182 #if KMP_OS_WINDOWS
02183     kmp_win32_cond_t  th_suspend_cv;
02184     kmp_win32_mutex_t th_suspend_mx;
02185     int               th_suspend_init;
02186 #endif
02187 #if KMP_OS_UNIX
02188     kmp_cond_align_t  th_suspend_cv;
02189     kmp_mutex_align_t th_suspend_mx;
02190     int               th_suspend_init_count;
02191 #endif
02192 
02193 } kmp_base_info_t;
02194 
02195 typedef union KMP_ALIGN_CACHE kmp_info {
02196     double          th_align;        /* use worst case alignment */
02197     char            th_pad[ KMP_PAD(kmp_base_info_t, CACHE_LINE) ];
02198     kmp_base_info_t th;
02199 } kmp_info_t;
02200 
02201 /* ------------------------------------------------------------------------ */
02202 // OpenMP thread team data structures
02203 //
02204 typedef struct kmp_base_data {
02205     volatile kmp_uint32 t_value;
02206 } kmp_base_data_t;
02207 
02208 typedef union KMP_ALIGN_CACHE kmp_sleep_team {
02209     double              dt_align;        /* use worst case alignment */
02210     char                dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
02211     kmp_base_data_t     dt;
02212 } kmp_sleep_team_t;
02213 
02214 typedef union KMP_ALIGN_CACHE kmp_ordered_team {
02215     double              dt_align;        /* use worst case alignment */
02216     char                dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
02217     kmp_base_data_t     dt;
02218 } kmp_ordered_team_t;
02219 
02220 typedef int     (*launch_t)( int gtid );
02221 
02222 /* Minimum number of ARGV entries to malloc if necessary */
02223 #define KMP_MIN_MALLOC_ARGV_ENTRIES     100
02224 
02225 #if KMP_MIC && OMP_30_ENABLED
02226 # define KMP_BARRIER_ICV_PULL   1
02227 #else
02228 # define KMP_BARRIER_ICV_PULL   0
02229 #endif
02230 
02231 #if (KMP_PERF_V106 == KMP_ON)
02232 //
02233 // Set up how many argv pointers will fit in cache lines containing
02234 // *t_inline_argv. Historically, we have supported at least 96 bytes.
02235 //
02236 // Using a larger value for more space between the master write/worker read
02237 // section and read/write by all section seems to buy more performance
02238 // on EPCC PARALLEL.
02239 //
02240 //# define KMP_INLINE_ARGV_BYTES          ( 2 * CACHE_LINE )
02241 # if KMP_BARRIER_ICV_PULL
02242 #  define KMP_INLINE_ARGV_BYTES          192
02243 //#  define KMP_INLINE_ARGV_BYTES         ( 2 * CACHE_LINE - ( ( 5 * KMP_PTR_SKIP + 10 * sizeof(int) + sizeof(kmp_int64) ) % CACHE_LINE ) )
02244 # elif KMP_ARCH_X86 || KMP_ARCH_X86_64
02245 #  define KMP_INLINE_ARGV_BYTES         ( 4 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32) ) % CACHE_LINE ) )
02246 # else
02247 #  define KMP_INLINE_ARGV_BYTES         ( 2 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) ) % CACHE_LINE ) )
02248 # endif
02249 # define KMP_INLINE_ARGV_ENTRIES        ( KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP )
02250 #endif
02251 
02252 typedef struct KMP_ALIGN_CACHE kmp_base_team {
02253 /*
02254  * Syncronization Data
02255  */
02256     KMP_ALIGN_CACHE kmp_ordered_team_t       t_ordered;
02257     kmp_balign_team_t        t_bar[ bs_last_barrier ];
02258 
02259     /* count of single directive encountered by team */
02260     volatile int             t_construct;
02261     kmp_lock_t               t_single_lock;  /* team specific lock */
02262 
02263 /*
02264  * Master only
02265  */
02266     KMP_ALIGN_CACHE int      t_master_tid;   /* tid of master in parent team */
02267     int                      t_master_this_cons; /* "this_construct" single counter of master in parent team */
02268     int                      t_master_last_cons; /* "last_construct" single counter of master in parent team */
02269     ident_t                 *t_ident;        /* if volatile, have to change too much other crud to volatile too */
02270     kmp_team_p              *t_parent;       /* parent team */
02271     kmp_team_p              *t_next_pool;    /* next free team in the team pool */
02272     kmp_disp_t              *t_dispatch;     /* thread's dispatch data */
02273 #if OMP_30_ENABLED
02274     kmp_task_team_t         *t_task_team;    /* Task team struct */
02275 #endif /* OMP_30_ENABLED */
02276 #if OMP_40_ENABLED
02277     kmp_proc_bind_t          t_proc_bind;    /* bind type for par region */
02278 #endif // OMP_40_ENABLED
02279 
02280 /*
02281  * Master write, workers read
02282  */
02283     KMP_ALIGN_CACHE
02284     void                     **t_argv;
02285     int                      t_argc;
02286 #if (KMP_PERF_V106 == KMP_ON)
02287     /* swap cache lines  for t_nproc and t_max_argc */
02288     int                      t_nproc;        /* number of threads in team */
02289 #else
02290     int                      t_max_argc;
02291 #endif
02292     microtask_t              t_pkfn;
02293     launch_t                 t_invoke;       /* procedure to launch the microtask */
02294 
02295 #if OMPT_SUPPORT
02296     ompt_team_info_t        ompt_team_info;  
02297 #endif
02298 
02299 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
02300     kmp_int8                 t_fp_control_saved;
02301     kmp_int8                 t_pad2b;
02302     kmp_int16                t_x87_fpu_control_word; /* FP control regs */
02303     kmp_uint32               t_mxcsr;                
02304 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
02305 
02306 #if KMP_BARRIER_ICV_PULL
02307    //
02308    // Note: Putting ICV's before the fp control info causes a very slight
02309    // ~1% improvement for EPCC parallel on fxe256lin01 / 256 threads, but
02310    // causes a 17% regression on fxe64lin01 / 64 threads.
02311    //
02312    kmp_internal_control_t    t_initial_icvs;
02313 #endif // KMP_BARRIER_ICV_PULL
02314 
02315 #if (KMP_PERF_V106 == KMP_ON)
02316     void                    *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ];
02317 #endif
02318 
02319 #if (KMP_PERF_V19 == KMP_ON)
02320     KMP_ALIGN_CACHE
02321 #endif
02322     kmp_info_t             **t_threads;
02323 #if (KMP_PERF_V106 == KMP_ON)
02324     /* swap cache lines  for t_nproc and t_max_argc */
02325     int                      t_max_argc;
02326 #else
02327     int                      t_nproc;        /* number of threads in team */
02328 #endif
02329     int                      t_max_nproc;    /* maximum threads this team can handle (this is dynamicly expandable) */
02330     int                      t_serialized;   /* levels deep of serialized teams */
02331     dispatch_shared_info_t  *t_disp_buffer;  /* buffers for dispatch system */
02332     int                      t_id;           // team's id, assigned by debugger.
02333 #if OMP_30_ENABLED
02334     int                      t_level;        /* nested parallel level */
02335     int                      t_active_level; /* nested active parallel level */
02336     kmp_r_sched_t            t_sched;        /* run-time schedule for the team */
02337 #endif // OMP_30_ENABLED
02338 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
02339     int                      t_first_place;  /* first & last place in      */
02340     int                      t_last_place;   /* parent thread's partition. */
02341                                              /* Restore these values to    */
02342                                              /* master after par region.   */
02343 #endif // OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
02344 #if KMP_MIC
02345     int                      t_size_changed; /* team size was changed?: 0 - no, 1 - yes, -1 - changed via omp_set_num_threads() call */
02346 #endif
02347 
02348 /*
02349  * Read/write by workers as well
02350  */
02351 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
02352     // Using CACHE_LINE=64 reduces memory footprint,
02353     //    but causes a big perf regression of epcc 'parallel' and 'barrier' on fxe256lin01.
02354     // This extra padding serves to fix the performance of epcc 'parallel' and 'barrier' when CACHE_LINE=64.
02355     // TODO: investigate more and get rid if this padding.
02356     char dummy_padding[1024];
02357 #endif
02358     KMP_ALIGN_CACHE
02359 #if OMP_30_ENABLED
02360     kmp_taskdata_t          *t_implicit_task_taskdata;  // Taskdata for the thread's implicit task
02361 #else
02362     // Internal control variables for current thread team
02363     // TODO  Convert these fields to an array of  kmp_internal_control_t which simplifies parameter passing
02364     //       and also prevents performance degradation due to false sharing when all threads set a control var
02365     int                     *t_set_nproc;    /* internal control for # of threads for next
02366                                                 parallel region (per thread) */
02367     int                     *t_set_nested;   /* internal control for nested parallelism (per thread) */
02368     int                     *t_set_dynamic;  /* internal control for dynamic adjustment of threads (per thread) */
02369     int                     *t_set_blocktime; /* internal control for blocktime */
02370     int                     *t_set_bt_intervals; /* internal control for blocktime intervals */
02371     int                     *t_set_bt_set;   /* internal control for whether blocktime is explicitly set */
02372 #endif // OMP_30_ENABLED
02373 
02374     kmp_internal_control_t  *t_control_stack_top;  /* internal control stack for additional nested teams.
02375                                                       for SERIALIZED teams nested 2 or more levels deep */
02376 
02377     int                      t_master_active;/* save on fork, restore on join */
02378     kmp_taskq_t              t_taskq;        /* this team's task queue */
02379     void                    *t_copypriv_data;  /* team specific pointer to copyprivate data array */
02380     kmp_uint32               t_copyin_counter; 
02381 } kmp_base_team_t;
02382 
02383 union KMP_ALIGN_CACHE kmp_team {
02384     kmp_base_team_t     t;
02385     double              t_align;       /* use worst case alignment */
02386     char                t_pad[ KMP_PAD(kmp_base_team_t, CACHE_LINE) ];
02387 };
02388 
02389 
02390 typedef union KMP_ALIGN_CACHE kmp_time_global {
02391     double              dt_align;        /* use worst case alignment */
02392     char                dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ];
02393     kmp_base_data_t     dt;
02394 } kmp_time_global_t;
02395 
02396 typedef struct kmp_base_global {
02397     /* cache-aligned */
02398     kmp_time_global_t   g_time;
02399 
02400     /* non cache-aligned */
02401     volatile int        g_abort;
02402     volatile int        g_done;
02403 
02404     int                 g_dynamic;
02405     enum dynamic_mode   g_dynamic_mode;
02406 
02407 } kmp_base_global_t;
02408 
02409 typedef union KMP_ALIGN_CACHE kmp_global {
02410     kmp_base_global_t   g;
02411     double              g_align;        /* use worst case alignment */
02412     char                g_pad[ KMP_PAD(kmp_base_global_t, CACHE_LINE) ];
02413 } kmp_global_t;
02414 
02415 
02416 typedef struct kmp_base_root {
02417     // TODO: GEH - combine r_active with r_in_parallel then r_active == (r_in_parallel>= 0)
02418     // TODO: GEH - then replace r_active with t_active_levels if we can to reduce the synch
02419     //             overhead or keeping r_active
02420 
02421     volatile int        r_active;       /* TRUE if some region in a nest has > 1 thread */
02422                                         // GEH: This is misnamed, should be r_in_parallel
02423     volatile int        r_nested;       // TODO: GEH - This is unused, just remove it entirely.
02424     int                 r_in_parallel;  /* keeps a count of active parallel regions per root */
02425                                         // GEH: This is misnamed, should be r_active_levels
02426     kmp_team_t         *r_root_team;
02427     kmp_team_t         *r_hot_team;
02428     kmp_info_t         *r_uber_thread;
02429     kmp_lock_t          r_begin_lock;
02430     volatile int        r_begin;
02431     int                 r_blocktime; /* blocktime for this root and descendants */
02432 } kmp_base_root_t;
02433 
02434 typedef union KMP_ALIGN_CACHE kmp_root {
02435     kmp_base_root_t     r;
02436     double              r_align;        /* use worst case alignment */
02437     char                r_pad[ KMP_PAD(kmp_base_root_t, CACHE_LINE) ];
02438 } kmp_root_t;
02439 
02440 struct fortran_inx_info {
02441     kmp_int32   data;
02442 };
02443 
02444 /* ------------------------------------------------------------------------ */
02445 
02446 /* ------------------------------------------------------------------------ */
02447 /* ------------------------------------------------------------------------ */
02448 
02449 extern int      __kmp_settings;
02450 extern int      __kmp_duplicate_library_ok;
02451 extern int      __kmp_forkjoin_frames;
02452 extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;
02453 extern int      __kmp_determ_red;
02454 
02455 #ifdef KMP_DEBUG
02456 extern int      kmp_a_debug;
02457 extern int      kmp_b_debug;
02458 extern int      kmp_c_debug;
02459 extern int      kmp_d_debug;
02460 extern int      kmp_e_debug;
02461 extern int      kmp_f_debug;
02462 #endif /* KMP_DEBUG */
02463 
02464 /* For debug information logging using rotating buffer */
02465 #define KMP_DEBUG_BUF_LINES_INIT        512
02466 #define KMP_DEBUG_BUF_LINES_MIN         1
02467 
02468 #define KMP_DEBUG_BUF_CHARS_INIT        128
02469 #define KMP_DEBUG_BUF_CHARS_MIN         2
02470 
02471 extern int     __kmp_debug_buf;            /* TRUE means use buffer, FALSE means print to stderr */
02472 extern int     __kmp_debug_buf_lines;      /* How many lines of debug stored in buffer */
02473 extern int     __kmp_debug_buf_chars;      /* How many characters allowed per line in buffer */
02474 extern int     __kmp_debug_buf_atomic;     /* TRUE means use atomic update of buffer entry pointer */
02475 
02476 extern char   *__kmp_debug_buffer;         /* Debug buffer itself */
02477 extern int     __kmp_debug_count;          /* Counter for number of lines printed in buffer so far */
02478 extern int     __kmp_debug_buf_warn_chars; /* Keep track of char increase recommended in warnings */
02479 /* end rotating debug buffer */
02480 
02481 extern int      __kmp_par_range;           /* +1 => only go par for constructs in range */
02482 
02483 #define KMP_PAR_RANGE_ROUTINE_LEN       1024
02484 extern char     __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN];
02485 #define KMP_PAR_RANGE_FILENAME_LEN      1024
02486 extern char     __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN];
02487 extern int      __kmp_par_range_lb;
02488 extern int      __kmp_par_range_ub;
02489 
02490 /* For printing out dynamic storage map for threads and teams */
02491 extern int      __kmp_storage_map;         /* True means print storage map for threads and teams */
02492 extern int      __kmp_storage_map_verbose; /* True means storage map includes placement info */
02493 extern int      __kmp_storage_map_verbose_specified;
02494 
02495 extern kmp_cpuinfo_t    __kmp_cpuinfo;
02496 extern kmp_uint64       __kmp_cpu_frequency;
02497     // CPU frequency, in Hz. Set by __kmp_runtime_initialize(). 0 means "is not set yet",
02498     // ~ 0 signals an errror.
02499 
02500 extern volatile int __kmp_init_serial;
02501 extern volatile int __kmp_init_gtid;
02502 extern volatile int __kmp_init_common;
02503 extern volatile int __kmp_init_middle;
02504 extern volatile int __kmp_init_parallel;
02505 extern volatile int __kmp_init_monitor;
02506 extern volatile int __kmp_init_user_locks;
02507 extern int __kmp_init_counter;
02508 extern int __kmp_root_counter;
02509 extern int __kmp_version;
02510 
02511 /* list of address of allocated caches for commons */
02512 extern kmp_cached_addr_t *__kmp_threadpriv_cache_list;
02513 
02514 /* Barrier algorithm types and options */
02515 extern kmp_uint32    __kmp_barrier_gather_bb_dflt;
02516 extern kmp_uint32    __kmp_barrier_release_bb_dflt;
02517 extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;
02518 extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;
02519 extern kmp_uint32    __kmp_barrier_gather_branch_bits  [ bs_last_barrier ];
02520 extern kmp_uint32    __kmp_barrier_release_branch_bits [ bs_last_barrier ];
02521 extern kmp_bar_pat_e __kmp_barrier_gather_pattern      [ bs_last_barrier ];
02522 extern kmp_bar_pat_e __kmp_barrier_release_pattern     [ bs_last_barrier ];
02523 extern char const   *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ];
02524 extern char const   *__kmp_barrier_pattern_env_name    [ bs_last_barrier ];
02525 extern char const   *__kmp_barrier_type_name           [ bs_last_barrier ];
02526 extern char const   *__kmp_barrier_pattern_name        [ bp_last_bar ];
02527 
02528 /* Global Locks */
02529 extern kmp_bootstrap_lock_t __kmp_initz_lock;     /* control initialization */
02530 extern kmp_bootstrap_lock_t __kmp_forkjoin_lock;  /* control fork/join access and load calculation if rml is used*/
02531 extern kmp_bootstrap_lock_t __kmp_exit_lock;      /* exit() is not always thread-safe */
02532 extern kmp_bootstrap_lock_t __kmp_monitor_lock;   /* control monitor thread creation */
02533 extern kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */
02534 
02535 extern kmp_lock_t __kmp_global_lock;    /* control OS/global access  */
02536 extern kmp_queuing_lock_t __kmp_dispatch_lock;  /* control dispatch access  */
02537 extern kmp_lock_t __kmp_debug_lock;     /* control I/O access for KMP_DEBUG */
02538 
02539 /* used for yielding spin-waits */
02540 extern unsigned int __kmp_init_wait;    /* initial number of spin-tests   */
02541 extern unsigned int __kmp_next_wait;    /* susequent number of spin-tests */
02542 
02543 extern enum library_type __kmp_library;
02544 
02545 extern enum sched_type  __kmp_sched;    /* default runtime scheduling */
02546 extern enum sched_type  __kmp_static;   /* default static scheduling method */
02547 extern enum sched_type  __kmp_guided;   /* default guided scheduling method */
02548 #if OMP_30_ENABLED
02549 extern enum sched_type  __kmp_auto;     /* default auto scheduling method */
02550 #endif // OMP_30_ENABLED
02551 extern int              __kmp_chunk;    /* default runtime chunk size */
02552 
02553 extern size_t     __kmp_stksize;        /* stack size per thread         */
02554 extern size_t     __kmp_monitor_stksize;/* stack size for monitor thread */
02555 extern size_t     __kmp_stkoffset;      /* stack offset per thread       */
02556 
02557 extern size_t     __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */
02558 extern int        __kmp_env_chunk;      /* was KMP_CHUNK specified?     */
02559 extern int        __kmp_env_stksize;    /* was KMP_STACKSIZE specified? */
02560 extern int        __kmp_env_omp_stksize;/* was OMP_STACKSIZE specified? */
02561 extern int        __kmp_env_all_threads;    /* was KMP_ALL_THREADS or KMP_MAX_THREADS specified? */
02562 extern int        __kmp_env_omp_all_threads;/* was OMP_THREAD_LIMIT specified? */
02563 extern int        __kmp_env_blocktime;  /* was KMP_BLOCKTIME specified? */
02564 extern int        __kmp_env_checks;     /* was KMP_CHECKS specified?    */
02565 extern int        __kmp_env_consistency_check;     /* was KMP_CONSISTENCY_CHECK specified?    */
02566 extern int        __kmp_generate_warnings; /* should we issue warnings? */
02567 extern int        __kmp_reserve_warn;   /* have we issued reserve_threads warning? */
02568 
02569 #ifdef DEBUG_SUSPEND
02570 extern int        __kmp_suspend_count;  /* count inside __kmp_suspend() */
02571 #endif
02572 
02573 extern kmp_uint32 __kmp_yield_init;
02574 extern kmp_uint32 __kmp_yield_next;
02575 extern kmp_uint32 __kmp_yielding_on;
02576 extern kmp_uint32 __kmp_yield_cycle;
02577 extern kmp_int32  __kmp_yield_on_count;
02578 extern kmp_int32  __kmp_yield_off_count;
02579 
02580 
02581 /* ------------------------------------------------------------------------- */
02582 extern int        __kmp_allThreadsSpecified;
02583 
02584 extern size_t     __kmp_align_alloc;
02585 /* following data protected by initialization routines */
02586 extern int        __kmp_xproc;          /* number of processors in the system */
02587 extern int        __kmp_avail_proc;      /* number of processors available to the process */
02588 extern int        __kmp_sys_min_stksize; /* system-defined minimum stack size */
02589 extern int        __kmp_sys_max_nth;    /* system-imposed maximum number of threads */
02590 extern int        __kmp_max_nth;        /* maximum total number of concurrently-existing threads */
02591 extern int        __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and __kmp_root */
02592 extern int        __kmp_dflt_team_nth;  /* default number of threads in a parallel region a la OMP_NUM_THREADS */
02593 extern int        __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial initialization */
02594 extern int        __kmp_tp_capacity;    /* capacity of __kmp_threads if threadprivate is used (fixed) */
02595 extern int        __kmp_tp_cached;      /* whether threadprivate cache has been created (__kmpc_threadprivate_cached()) */
02596 extern int        __kmp_dflt_nested;    /* nested parallelism enabled by default a la OMP_NESTED */
02597 extern int        __kmp_dflt_blocktime; /* number of milliseconds to wait before blocking (env setting) */
02598 extern int        __kmp_monitor_wakeups;/* number of times monitor wakes up per second */
02599 extern int        __kmp_bt_intervals;   /* number of monitor timestamp intervals before blocking */
02600 #ifdef KMP_ADJUST_BLOCKTIME
02601 extern int        __kmp_zero_bt;        /* whether blocktime has been forced to zero */
02602 #endif /* KMP_ADJUST_BLOCKTIME */
02603 extern int        __kmp_ht_capable;     /* whether CPUs support Intel(R) Hyper-Threading Technology */
02604 extern int        __kmp_ht_enabled;     /* whether Intel(R) Hyper-Threading Technology is enabled in OS */
02605 extern int        __kmp_ncores;         /* Number of physical procs in HT machine */
02606 extern int        __kmp_ht_log_per_phy; /* Maximum possible number of logical processors per package */
02607 extern int        __kmp_nThreadsPerCore;/* Number of hyperthreads per core in HT machine. */
02608 extern int        __kmp_abort_delay;    /* Number of millisecs to delay on abort for VTune */
02609 
02610 extern int        __kmp_need_register_atfork_specified;
02611 extern int        __kmp_need_register_atfork;/* At initialization, call pthread_atfork to install fork handler */
02612 extern int        __kmp_gtid_mode;      /* Method of getting gtid, values:
02613                                            0 - not set, will be set at runtime
02614                                            1 - using stack search
02615                                            2 - dynamic TLS (pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS))
02616                                            3 - static TLS (__declspec(thread) __kmp_gtid), Linux* OS .so only.
02617                                          */
02618 extern int        __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */
02619 #ifdef KMP_TDATA_GTID
02620 #if KMP_OS_WINDOWS
02621 extern __declspec(thread) int __kmp_gtid; /* This thread's gtid, if __kmp_gtid_mode == 3 */
02622 #else
02623 extern __thread int __kmp_gtid;
02624 #endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */
02625 #endif
02626 extern int        __kmp_tls_gtid_min;   /* #threads below which use sp search for gtid */
02627 extern int        __kmp_foreign_tp;     /* If true, separate TP var for each foreign thread */
02628 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
02629 extern int        __kmp_inherit_fp_control; /* copy fp creg(s) parent->workers at fork */
02630 extern kmp_int16  __kmp_init_x87_fpu_control_word; /* init thread's FP control reg */
02631 extern kmp_uint32 __kmp_init_mxcsr;      /* init thread's mxscr */
02632 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
02633 
02634 #if OMP_30_ENABLED
02635 extern int        __kmp_dflt_max_active_levels; /* max_active_levels for nested parallelism enabled by default a la OMP_MAX_ACTIVE_LEVELS */
02636 #endif // OMP_30_ENABLED
02637 
02638 # if KMP_OS_LINUX
02639 extern enum clock_function_type __kmp_clock_function;
02640 extern int __kmp_clock_function_param;
02641 # endif /* KMP_OS_LINUX */
02642 
02643 # ifdef USE_LOAD_BALANCE
02644 extern double      __kmp_load_balance_interval;   /* Interval for the load balance algorithm */
02645 # endif /* USE_LOAD_BALANCE */
02646 
02647 // OpenMP 3.1 - Nested num threads array
02648 struct kmp_nested_nthreads_t {
02649     int * nth;
02650     int   size;
02651     int   used;
02652 };
02653 
02654 extern struct kmp_nested_nthreads_t __kmp_nested_nth;
02655 
02656 /* ------------------------------------------------------------------------- */
02657 
02658 /* --------------------------------------------------------------------------- */
02659 /* the following are protected by the fork/join lock */
02660 /* write: lock  read: anytime */
02661 extern          kmp_info_t **__kmp_threads;      /* Descriptors for the threads */
02662 /* read/write: lock */
02663 extern volatile kmp_team_t  *     __kmp_team_pool;
02664 extern volatile kmp_info_t  *     __kmp_thread_pool;
02665 
02666 /* total number of threads reachable from some root thread including all root threads*/
02667 extern volatile int __kmp_nth;
02668 /* total number of threads reachable from some root thread including all root threads,
02669    and those in the thread pool */
02670 extern volatile int __kmp_all_nth;
02671 extern int __kmp_thread_pool_nth;
02672 extern volatile int __kmp_thread_pool_active_nth;
02673 
02674 extern kmp_root_t **__kmp_root;         /* root of thread hierarchy */
02675 /* end data protected by fork/join lock */
02676 /* --------------------------------------------------------------------------- */
02677 
02678 extern kmp_global_t  __kmp_global;         /* global status */
02679 
02680 extern kmp_info_t __kmp_monitor;
02681 extern volatile kmp_uint32 __kmp_team_counter;      // Used by Debugging Support Library.
02682 extern volatile kmp_uint32 __kmp_task_counter;      // Used by Debugging Support Library.
02683 
02684 #define _KMP_GEN_ID( counter )                                         \
02685     (                                                                  \
02686         ~ 0                                                            \
02687     )
02688 
02689 
02690 
02691 #define KMP_GEN_TASK_ID()    _KMP_GEN_ID( __kmp_task_counter )
02692 #define KMP_GEN_TEAM_ID()    _KMP_GEN_ID( __kmp_team_counter )
02693 
02694 /* ------------------------------------------------------------------------ */
02695 /* ------------------------------------------------------------------------ */
02696 
02697 extern void __kmp_print_storage_map_gtid( int gtid, void *p1, void* p2, size_t size, char const *format, ... );
02698 
02699 extern void __kmp_serial_initialize( void );
02700 extern void __kmp_middle_initialize( void );
02701 extern void __kmp_parallel_initialize( void );
02702 
02703 extern void __kmp_internal_begin( void );
02704 extern void __kmp_internal_end_library( int gtid );
02705 extern void __kmp_internal_end_thread( int gtid );
02706 extern void __kmp_internal_end_atexit( void );
02707 extern void __kmp_internal_end_fini( void );
02708 extern void __kmp_internal_end_dtor( void );
02709 extern void __kmp_internal_end_dest( void* );
02710 
02711 extern int  __kmp_register_root( int initial_thread );
02712 extern void __kmp_unregister_root( int gtid );
02713 
02714 extern int  __kmp_ignore_mppbeg( void );
02715 extern int  __kmp_ignore_mppend( void );
02716 
02717 extern int  __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws );
02718 extern void __kmp_exit_single( int gtid );
02719 
02720 extern void __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref );
02721 extern void __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref );
02722 
02723 
02724 #ifdef USE_LOAD_BALANCE
02725 extern int  __kmp_get_load_balance( int );
02726 #endif
02727 
02728 #ifdef BUILD_TV
02729 extern void __kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr );
02730 #endif
02731 
02732 extern int  __kmp_get_global_thread_id( void );
02733 extern int  __kmp_get_global_thread_id_reg( void );
02734 extern void __kmp_exit_thread( int exit_status );
02735 extern void __kmp_abort( char const * format, ... );
02736 extern void __kmp_abort_thread( void );
02737 extern void __kmp_abort_process( void );
02738 extern void __kmp_warn( char const * format, ... );
02739 
02740 extern void __kmp_set_num_threads( int new_nth, int gtid );
02741 
02742 // Returns current thread (pointer to kmp_info_t). Current thread *must* be registered.
02743 inline kmp_info_t * __kmp_entry_thread() 
02744 {
02745       int gtid = __kmp_entry_gtid();
02746 
02747       return __kmp_threads[gtid];
02748 }
02749 
02750 #if OMP_30_ENABLED
02751 
02752 extern void __kmp_set_max_active_levels( int gtid, int new_max_active_levels );
02753 extern int  __kmp_get_max_active_levels( int gtid );
02754 extern int  __kmp_get_ancestor_thread_num( int gtid, int level );
02755 extern int  __kmp_get_team_size( int gtid, int level );
02756 extern void __kmp_set_schedule( int gtid, kmp_sched_t new_sched, int chunk );
02757 extern void __kmp_get_schedule( int gtid, kmp_sched_t * sched, int * chunk );
02758 
02759 #endif // OMP_30_ENABLED
02760 
02761 extern unsigned short __kmp_get_random( kmp_info_t * thread );
02762 extern void __kmp_init_random( kmp_info_t * thread );
02763 
02764 extern kmp_r_sched_t __kmp_get_schedule_global( void );
02765 extern void __kmp_adjust_num_threads( int new_nproc );
02766 
02767 extern void * ___kmp_allocate( size_t size KMP_SRC_LOC_DECL );
02768 extern void * ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL );
02769 extern void   ___kmp_free( void * ptr KMP_SRC_LOC_DECL );
02770 #define __kmp_allocate( size )      ___kmp_allocate( (size) KMP_SRC_LOC_CURR )
02771 #define __kmp_page_allocate( size ) ___kmp_page_allocate( (size) KMP_SRC_LOC_CURR )
02772 #define __kmp_free( ptr )           ___kmp_free( (ptr) KMP_SRC_LOC_CURR )
02773 
02774 #if USE_FAST_MEMORY
02775 extern void * ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL );
02776 extern void   ___kmp_fast_free( kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL );
02777 extern void   __kmp_free_fast_memory( kmp_info_t *this_thr );
02778 extern void   __kmp_initialize_fast_memory( kmp_info_t *this_thr );
02779 #define __kmp_fast_allocate( this_thr, size ) ___kmp_fast_allocate( (this_thr), (size) KMP_SRC_LOC_CURR )
02780 #define __kmp_fast_free( this_thr, ptr )      ___kmp_fast_free( (this_thr), (ptr) KMP_SRC_LOC_CURR )
02781 #endif
02782 
02783 extern void * ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL );
02784 extern void * ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL );
02785 extern void * ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL );
02786 extern void   ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL );
02787 #define __kmp_thread_malloc(  th, size )          ___kmp_thread_malloc(  (th), (size)            KMP_SRC_LOC_CURR )
02788 #define __kmp_thread_calloc(  th, nelem, elsize ) ___kmp_thread_calloc(  (th), (nelem), (elsize) KMP_SRC_LOC_CURR )
02789 #define __kmp_thread_realloc( th, ptr, size )     ___kmp_thread_realloc( (th), (ptr), (size)     KMP_SRC_LOC_CURR )
02790 #define __kmp_thread_free(    th, ptr )           ___kmp_thread_free(    (th), (ptr)             KMP_SRC_LOC_CURR )
02791 
02792 #define KMP_INTERNAL_MALLOC(sz)    malloc(sz)
02793 #define KMP_INTERNAL_FREE(p)       free(p)
02794 #define KMP_INTERNAL_REALLOC(p,sz) realloc((p),(sz))
02795 #define KMP_INTERNAL_CALLOC(n,sz)  calloc((n),(sz))
02796 
02797 extern void __kmp_push_num_threads( ident_t *loc, int gtid, int num_threads );
02798 
02799 #if OMP_40_ENABLED
02800 extern void __kmp_push_proc_bind( ident_t *loc, int gtid, kmp_proc_bind_t proc_bind );
02801 #endif
02802 
02803 extern void __kmp_yield( int cond );
02804 extern void __kmp_release( kmp_info_t *target_thr, volatile kmp_uint *spin,
02805                            enum kmp_mem_fence_type fetchadd_fence );
02806 
02807 extern void __kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid,
02808     enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
02809     kmp_int32 chunk );
02810 extern void __kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid,
02811     enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
02812     kmp_int32 chunk );
02813 extern void __kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid,
02814     enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
02815     kmp_int64 chunk );
02816 extern void __kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid,
02817     enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
02818     kmp_int64 chunk );
02819 
02820 extern int __kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid,
02821     kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st );
02822 extern int __kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid,
02823     kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st );
02824 extern int __kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid,
02825     kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st );
02826 extern int __kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid,
02827     kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st );
02828 
02829 extern void __kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid );
02830 extern void __kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid );
02831 extern void __kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid );
02832 extern void __kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid );
02833 
02834 
02835 #ifdef KMP_GOMP_COMPAT
02836 
02837 extern void __kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid,
02838     enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
02839     kmp_int32 chunk, int push_ws );
02840 extern void __kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid,
02841     enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
02842     kmp_int32 chunk, int push_ws );
02843 extern void __kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid,
02844     enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
02845     kmp_int64 chunk, int push_ws );
02846 extern void __kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid,
02847     enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
02848     kmp_int64 chunk, int push_ws );
02849 extern void __kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid );
02850 extern void __kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid );
02851 extern void __kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid );
02852 extern void __kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid );
02853 
02854 #endif /* KMP_GOMP_COMPAT */
02855 
02856 
02857 extern kmp_uint32 __kmp_eq_4(  kmp_uint32 value, kmp_uint32 checker );
02858 extern kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker );
02859 extern kmp_uint32 __kmp_lt_4(  kmp_uint32 value, kmp_uint32 checker );
02860 extern kmp_uint32 __kmp_ge_4(  kmp_uint32 value, kmp_uint32 checker );
02861 extern kmp_uint32 __kmp_le_4(  kmp_uint32 value, kmp_uint32 checker );
02862 
02863 extern kmp_uint32 __kmp_eq_8(  kmp_uint64 value, kmp_uint64 checker );
02864 extern kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker );
02865 extern kmp_uint32 __kmp_lt_8(  kmp_uint64 value, kmp_uint64 checker );
02866 extern kmp_uint32 __kmp_ge_8(  kmp_uint64 value, kmp_uint64 checker );
02867 extern kmp_uint32 __kmp_le_8(  kmp_uint64 value, kmp_uint64 checker );
02868 
02869 extern kmp_uint32 __kmp_wait_yield_4( kmp_uint32 volatile * spinner, kmp_uint32 checker, kmp_uint32 (*pred) (kmp_uint32, kmp_uint32), void * obj );
02870 extern kmp_uint64 __kmp_wait_yield_8( kmp_uint64 volatile * spinner, kmp_uint64 checker, kmp_uint32 (*pred) (kmp_uint64, kmp_uint64), void * obj );
02871 
02872 extern void __kmp_wait_sleep( kmp_info_t *this_thr, volatile kmp_uint *spinner, kmp_uint checker, kmp_int final_spin
02873 );
02874 extern void __kmp_infinite_loop( void );
02875 
02876 extern void __kmp_cleanup( void );
02877 
02878 #if KMP_HANDLE_SIGNALS
02879     extern int  __kmp_handle_signals;
02880     extern void __kmp_install_signals( int parallel_init );
02881     extern void __kmp_remove_signals( void );
02882 #endif
02883 
02884 extern void __kmp_clear_system_time( void );
02885 extern void __kmp_read_system_time( double *delta );
02886 
02887 extern void __kmp_check_stack_overlap( kmp_info_t *thr );
02888 
02889 extern void __kmp_expand_host_name( char *buffer, size_t size );
02890 extern void __kmp_expand_file_name( char *result, size_t rlen, char *pattern );
02891 
02892 #if KMP_OS_WINDOWS
02893 extern void __kmp_initialize_system_tick( void );  /* Initialize timer tick value */
02894 #endif
02895 
02896 extern void __kmp_runtime_initialize( void );  /* machine specific initialization */
02897 extern void __kmp_runtime_destroy( void );
02898 
02899 #if KMP_OS_LINUX || KMP_OS_WINDOWS
02900 extern char *__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask);
02901 extern void __kmp_affinity_initialize(void);
02902 extern void __kmp_affinity_uninitialize(void);
02903 extern void __kmp_affinity_set_init_mask(int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
02904 #if OMP_40_ENABLED
02905 extern void __kmp_affinity_set_place(int gtid);
02906 #endif
02907 extern void __kmp_change_thread_affinity_mask( int gtid, kmp_affin_mask_t *new_mask,
02908                                                kmp_affin_mask_t *old_mask );
02909 extern void __kmp_affinity_determine_capable( const char *env_var );
02910 extern int __kmp_aux_set_affinity(void **mask);
02911 extern int __kmp_aux_get_affinity(void **mask);
02912 extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);
02913 extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);
02914 extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);
02915 extern void __kmp_balanced_affinity( int tid, int team_size );
02916 
02917 #endif /* KMP_OS_LINUX || KMP_OS_WINDOWS */
02918 
02919 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
02920 
02921 extern int __kmp_futex_determine_capable( void );
02922 
02923 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
02924 
02925 extern void __kmp_gtid_set_specific( int gtid );
02926 extern int  __kmp_gtid_get_specific( void );
02927 
02928 extern double __kmp_read_cpu_time( void );
02929 
02930 extern int  __kmp_read_system_info( struct kmp_sys_info *info );
02931 
02932 extern void __kmp_create_monitor( kmp_info_t *th );
02933 
02934 extern void *__kmp_launch_thread( kmp_info_t *thr );
02935 
02936 extern void __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size );
02937 
02938 #if KMP_OS_WINDOWS
02939 extern int  __kmp_still_running(kmp_info_t *th);
02940 extern int  __kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val );
02941 extern void __kmp_free_handle( kmp_thread_t tHandle );
02942 #endif
02943 
02944 extern void __kmp_reap_monitor( kmp_info_t *th );
02945 extern void __kmp_reap_worker( kmp_info_t *th );
02946 extern void __kmp_terminate_thread( int gtid );
02947 
02948 extern void __kmp_suspend( int th_gtid, volatile kmp_uint *spinner, kmp_uint checker );
02949 extern void __kmp_resume( int target_gtid, volatile kmp_uint *spinner );
02950 
02951 extern void __kmp_elapsed( double * );
02952 extern void __kmp_elapsed_tick( double * );
02953 
02954 extern void __kmp_enable( int old_state );
02955 extern void __kmp_disable( int *old_state );
02956 
02957 extern void __kmp_thread_sleep( int millis );
02958 
02959 extern void __kmp_common_initialize( void );
02960 extern void __kmp_common_destroy( void );
02961 extern void __kmp_common_destroy_gtid( int gtid );
02962 
02963 #if KMP_OS_UNIX
02964 extern void __kmp_register_atfork( void );
02965 #endif
02966 extern void __kmp_suspend_initialize( void );
02967 extern void __kmp_suspend_uninitialize_thread( kmp_info_t *th );
02968 
02969 extern kmp_info_t * __kmp_allocate_thread( kmp_root_t *root,
02970                                            kmp_team_t *team, int tid);
02971 #if OMP_40_ENABLED
02972 extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
02973 #if OMPT_SUPPORT
02974                                          ompt_parallel_id_t ompt_parallel_id,
02975 #endif
02976                                          kmp_proc_bind_t proc_bind,
02977                                          kmp_internal_control_t *new_icvs,
02978                                          int argc );
02979 #elif OMP_30_ENABLED
02980 extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
02981 #if OMPT_SUPPORT
02982                                          ompt_parallel_id_t ompt_parallel_id,
02983 #endif
02984                                          kmp_internal_control_t *new_icvs,
02985                                          int argc );
02986 #else
02987 extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
02988 #if OMPT_SUPPORT
02989                                          ompt_parallel_id_t ompt_parallel_id,
02990 #endif
02991                                          int new_set_nproc, int new_set_dynamic, int new_set_nested,
02992                                          int new_set_blocktime, int new_bt_intervals, int new_bt_set,
02993                                          int argc );
02994 #endif // OMP_30_ENABLED
02995 extern void __kmp_free_thread( kmp_info_t * );
02996 extern void __kmp_free_team( kmp_root_t *, kmp_team_t * );
02997 extern kmp_team_t * __kmp_reap_team( kmp_team_t * );
02998 
02999 /* ------------------------------------------------------------------------ */
03000 
03001 extern void __kmp_initialize_bget( kmp_info_t *th );
03002 extern void __kmp_finalize_bget( kmp_info_t *th );
03003 
03004 KMP_EXPORT void *kmpc_malloc( size_t size );
03005 KMP_EXPORT void *kmpc_calloc( size_t nelem, size_t elsize );
03006 KMP_EXPORT void *kmpc_realloc( void *ptr, size_t size );
03007 KMP_EXPORT void  kmpc_free( void *ptr );
03008 
03009 /* ------------------------------------------------------------------------ */
03010 /* declarations for internal use */
03011 
03012 extern int  __kmp_barrier( enum barrier_type bt, int gtid, int is_split,
03013                            size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *) );
03014 extern void __kmp_end_split_barrier ( enum barrier_type bt, int gtid );
03015 
03016 extern int __kmp_fork_call( ident_t *loc, int gtid, int exec_master,
03017   kmp_int32 argc, microtask_t microtask, launch_t invoker,
03018 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
03019 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
03020                              va_list *ap
03021 #else
03022                              va_list ap
03023 #endif
03024                              );
03025 
03026 extern void __kmp_join_call( ident_t *loc, int gtid );
03027 
03028 extern void __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team );
03029 extern void __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team );
03030 extern int __kmp_invoke_task_func( int gtid );
03031 extern void __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team );
03032 extern void __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team );
03033 
03034 // should never have been exported
03035 KMP_EXPORT int __kmpc_invoke_task_func( int gtid );
03036 
03037 extern void __kmp_save_internal_controls( kmp_info_t * thread );
03038 extern void __kmp_user_set_library (enum library_type arg);
03039 extern void __kmp_aux_set_library (enum library_type arg);
03040 extern void __kmp_aux_set_stacksize( size_t arg);
03041 extern void __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid);
03042 extern void __kmp_aux_set_defaults( char const * str, int len );
03043 
03044 /* Functions below put here to call them from __kmp_aux_env_initialize() in kmp_settings.c */
03045 void kmpc_set_blocktime (int arg);
03046 void ompc_set_nested( int flag );
03047 void ompc_set_dynamic( int flag );
03048 void ompc_set_num_threads( int arg );
03049 
03050 #if OMP_30_ENABLED
03051 extern void __kmp_push_current_task_to_thread( kmp_info_t *this_thr,
03052                   kmp_team_t *team, int tid );
03053 extern void __kmp_pop_current_task_from_thread( kmp_info_t *this_thr );
03054 extern kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid,
03055   kmp_tasking_flags_t *flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
03056   kmp_routine_entry_t task_entry );
03057 extern void __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr,
03058                   kmp_team_t *team, int tid, int set_curr_task );
03059 
03060 extern int  __kmp_execute_tasks( kmp_info_t *thread, kmp_int32 gtid, volatile kmp_uint *spinner,
03061                                  kmp_uint checker, int final_spin, int *thread_finished, 
03062                                  int c = 0 );
03063 extern void __kmp_reap_task_teams( void );
03064 extern void __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread );
03065 extern void __kmp_wait_to_unref_task_teams( void );
03066 extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team );
03067 extern void __kmp_task_team_sync  ( kmp_info_t *this_thr, kmp_team_t *team );
03068 extern void __kmp_task_team_wait  ( kmp_info_t *this_thr, kmp_team_t *team
03069 );
03070 extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid );
03071 
03072 #endif // OMP_30_ENABLED
03073 
03074 /* declarations in the assembler library for internal use */
03075 
03076 /* 32-bit */
03077 extern kmp_int32 __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d );
03078 extern kmp_int32 __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 d );
03079 extern kmp_int32 __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d );
03080 extern kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d );
03081 extern kmp_int64 __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d );
03082 extern kmp_int64 __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 d );
03083 
03084 #define KMP_COMPARE_AND_STORE_ACQ8     __kmp_compare_and_store8
03085 #define KMP_COMPARE_AND_STORE_REL8     __kmp_compare_and_store8
03086 extern kmp_int8 __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv );
03087 #define KMP_COMPARE_AND_STORE_ACQ16     __kmp_compare_and_store16
03088 #define KMP_COMPARE_AND_STORE_REL16     __kmp_compare_and_store16
03089 extern kmp_int16 __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
03090 
03091 /* Define KMP_COMPARE_AND_STORE* in kmp_os.h for MIC, here for other platforms */
03092 #ifndef KMP_COMPARE_AND_STORE_ACQ32
03093 #define KMP_COMPARE_AND_STORE_ACQ32     __kmp_compare_and_store32
03094 #define KMP_COMPARE_AND_STORE_REL32     __kmp_compare_and_store32
03095 #endif /* KMP_COMPARE_AND_STORE_ACQ32 */
03096 extern kmp_int32 __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
03097 #ifndef KMP_COMPARE_AND_STORE_ACQ64
03098 #define KMP_COMPARE_AND_STORE_ACQ64     __kmp_compare_and_store64
03099 #define KMP_COMPARE_AND_STORE_REL64     __kmp_compare_and_store64
03100 #endif /* KMP_COMPARE_AND_STORE_ACQ64 */
03101 extern kmp_int32 __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
03102 
03103 #if KMP_ARCH_X86
03104 #define KMP_COMPARE_AND_STORE_PTR(dst,cmp,src) \
03105     KMP_COMPARE_AND_STORE_REL32((volatile kmp_int32 *)dst,(kmp_int32)cmp,(kmp_int32)src)
03106 #else /* 64 bit pointers */
03107 #ifndef KMP_COMPARE_AND_STORE_PTR
03108 #define KMP_COMPARE_AND_STORE_PTR(dst,cmp,src) \
03109     KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)dst,(kmp_int64)cmp,(kmp_int64)src)
03110 #endif /* KMP_COMPARE_AND_STORE_PTR */
03111 #endif /* KMP_ARCH_X86 */
03112 
03113 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
03114 extern void       __kmp_query_cpuid( kmp_cpuinfo_t *p );
03115 #if KMP_MIC
03116 // no routines for floating addition on MIC
03117 #else
03118 extern kmp_real32 __kmp_test_then_add_real32 ( volatile kmp_real32 *addr, kmp_real32 data );
03119 extern kmp_real64 __kmp_test_then_add_real64 ( volatile kmp_real64 *addr, kmp_real64 data );
03120 #endif
03121 extern kmp_int8  __kmp_compare_and_store_ret8(  volatile kmp_int8  *p, kmp_int8  cv, kmp_int8  sv );
03122 extern kmp_int16 __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv );
03123 extern kmp_int32 __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv );
03124 extern kmp_int64 __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv );
03125 
03126 extern kmp_int8  __kmp_xchg_fixed8(  volatile kmp_int8  *addr, kmp_int8  data );
03127 extern kmp_int16 __kmp_xchg_fixed16( volatile kmp_int16 *addr, kmp_int16 data );
03128 extern kmp_int32 __kmp_xchg_fixed32( volatile kmp_int32 *addr, kmp_int32 data );
03129 
03130 #if KMP_MIC
03131 static kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ) {
03132     kmp_int32 tmp = __sync_lock_test_and_set( (kmp_int32*)addr, *(kmp_int32*)&data );
03133     return *(kmp_real32*)&tmp;
03134 }
03135 #else
03136 extern kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data );
03137 #endif // KMP_MIC
03138 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
03139 
03140 #if KMP_ARCH_X86_64
03141 #if KMP_MIC
03142 static kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ) {
03143     kmp_int64 tmp = __sync_lock_test_and_set( (kmp_int64*)addr, *(kmp_int64*)&data );
03144     return *(kmp_real64*)&tmp;
03145 }
03146 #else
03147 extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data );
03148 #endif // KMP_MIC
03149 extern kmp_int64 __kmp_xchg_fixed64( volatile kmp_int64 *addr, kmp_int64 data );
03150 #endif /* KMP_ARCH_X86_64 */
03151 
03152 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
03153 
03154 #if KMP_MIC
03155     #if KMP_MIC1
03156         // no sse in LRB1
03157         static inline void __kmp_load_mxcsr( kmp_uint32 *p ) { _mm_setvxcsr( *p ); }
03158         static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getvxcsr(); }
03159     #else // KMP_MIC1
03160         #if __MIC2__
03161             static inline void __kmp_load_mxcsr( kmp_uint32 *p ) { _mm_setcsr( *p ); }
03162             static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getcsr(); }
03163        #else
03164             #error "Non LRB1 detected. Re-evaluate if KMP_INHERIT_FP_CONTROL works."
03165         #endif
03166     #endif // KMP_MIC1
03167 #else // KMP_MIC
03168 extern void __kmp_load_mxcsr( kmp_uint32 *p );
03169 extern void __kmp_store_mxcsr( kmp_uint32 *p );
03170 #endif // KMP_MIC
03171 
03172 //static inline void __kmp_load_mxcsr ( kmp_uint32 *p ) { _mm_setcsr( *p ); }
03173 //static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getcsr(); }
03174 
03175 extern void __kmp_load_x87_fpu_control_word( kmp_int16 *p );
03176 extern void __kmp_store_x87_fpu_control_word( kmp_int16 *p );
03177 extern void __kmp_clear_x87_fpu_status_word();
03178 # define KMP_X86_MXCSR_MASK      0xffffffc0   /* ignore status flags (6 lsb) */
03179 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
03180 
03181 /* OMPT - add exit runtime frame as arg to microtask */
03182 extern int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int npr, int argc, void *argv[], void **exit_frame_ptr);
03183 
03184 extern int  __kmp_is_address_mapped( void *addr );
03185 extern kmp_uint64 __kmp_hardware_timestamp(void);
03186 
03187 /* ------------------------------------------------------------------------ */
03188 
03189 KMP_EXPORT void   __kmpc_begin                ( ident_t *, kmp_int32 flags );
03190 KMP_EXPORT void   __kmpc_end                  ( ident_t * );
03191 
03192 KMP_EXPORT void   __kmpc_threadprivate_register_vec ( ident_t *, void * data, kmpc_ctor_vec ctor,
03193                                                   kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length );
03194 KMP_EXPORT void   __kmpc_threadprivate_register     ( ident_t *, void * data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor );
03195 KMP_EXPORT void * __kmpc_threadprivate              ( ident_t *, kmp_int32 global_tid, void * data, size_t size );
03196 
03197 KMP_EXPORT kmp_int32  __kmpc_global_thread_num  ( ident_t * );
03198 KMP_EXPORT kmp_int32  __kmpc_global_num_threads ( ident_t * );
03199 KMP_EXPORT kmp_int32  __kmpc_bound_thread_num   ( ident_t * );
03200 KMP_EXPORT kmp_int32  __kmpc_bound_num_threads  ( ident_t * );
03201 
03202 KMP_EXPORT kmp_int32  __kmpc_ok_to_fork     ( ident_t * );
03203 KMP_EXPORT void   __kmpc_fork_call          ( ident_t *, kmp_int32 nargs, kmpc_micro microtask, ... );
03204 
03205 KMP_EXPORT void   __kmpc_serialized_parallel     ( ident_t *, kmp_int32 global_tid );
03206 KMP_EXPORT void   __kmpc_end_serialized_parallel ( ident_t *, kmp_int32 global_tid );
03207 
03208 KMP_EXPORT void   __kmpc_flush              ( ident_t *, ... );
03209 KMP_EXPORT void   __kmpc_barrier            ( ident_t *, kmp_int32 global_tid );
03210 KMP_EXPORT kmp_int32  __kmpc_master         ( ident_t *, kmp_int32 global_tid );
03211 KMP_EXPORT void   __kmpc_end_master         ( ident_t *, kmp_int32 global_tid );
03212 KMP_EXPORT void   __kmpc_ordered            ( ident_t *, kmp_int32 global_tid );
03213 KMP_EXPORT void   __kmpc_end_ordered        ( ident_t *, kmp_int32 global_tid );
03214 KMP_EXPORT void   __kmpc_critical           ( ident_t *, kmp_int32 global_tid, kmp_critical_name * );
03215 KMP_EXPORT void   __kmpc_end_critical       ( ident_t *, kmp_int32 global_tid, kmp_critical_name * );
03216 
03217 KMP_EXPORT kmp_int32  __kmpc_barrier_master ( ident_t *, kmp_int32 global_tid );
03218 KMP_EXPORT void   __kmpc_end_barrier_master ( ident_t *, kmp_int32 global_tid );
03219 
03220 KMP_EXPORT kmp_int32  __kmpc_barrier_master_nowait ( ident_t *, kmp_int32 global_tid );
03221 
03222 KMP_EXPORT kmp_int32  __kmpc_single         ( ident_t *, kmp_int32 global_tid );
03223 KMP_EXPORT void   __kmpc_end_single         ( ident_t *, kmp_int32 global_tid );
03224 
03225 KMP_EXPORT void KMPC_FOR_STATIC_INIT    ( ident_t *loc, kmp_int32 global_tid, kmp_int32 schedtype, kmp_int32 *plastiter,
03226                                           kmp_int *plower, kmp_int *pupper, kmp_int *pstride, kmp_int incr, kmp_int chunk );
03227 
03228 KMP_EXPORT void __kmpc_for_static_fini  ( ident_t *loc, kmp_int32 global_tid );
03229 
03230 KMP_EXPORT void __kmpc_copyprivate( ident_t *loc, kmp_int32 global_tid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit );
03231 
03232 extern void KMPC_SET_NUM_THREADS        ( int arg );
03233 extern void KMPC_SET_DYNAMIC            ( int flag );
03234 extern void KMPC_SET_NESTED             ( int flag );
03235 
03236 /* --------------------------------------------------------------------------- */
03237 
03238 /*
03239  * Taskq interface routines
03240  */
03241 
03242 KMP_EXPORT kmpc_thunk_t * __kmpc_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task, size_t sizeof_thunk,
03243                                         size_t sizeof_shareds, kmp_int32 flags, kmpc_shared_vars_t **shareds);
03244 KMP_EXPORT void __kmpc_end_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
03245 KMP_EXPORT kmp_int32 __kmpc_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
03246 KMP_EXPORT void __kmpc_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status);
03247 KMP_EXPORT void __kmpc_end_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk);
03248 KMP_EXPORT kmpc_thunk_t * __kmpc_task_buffer (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task);
03249 
03250 /* ------------------------------------------------------------------------ */
03251 
03252 #if OMP_30_ENABLED
03253 /*
03254  * OMP 3.0 tasking interface routines
03255  */
03256 
03257 KMP_EXPORT kmp_int32
03258 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task );
03259 KMP_EXPORT kmp_task_t*
03260 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
03261                        size_t sizeof_kmp_task_t, size_t sizeof_shareds,
03262                        kmp_routine_entry_t task_entry );
03263 KMP_EXPORT void
03264 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task );
03265 KMP_EXPORT void
03266 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task );
03267 KMP_EXPORT kmp_int32
03268 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task );
03269 KMP_EXPORT kmp_int32
03270 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid );
03271 
03272 KMP_EXPORT kmp_int32
03273 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part );
03274 
03275 #if TASK_UNUSED
03276 void __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task );
03277 void __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task );
03278 #endif // TASK_UNUSED
03279 
03280 /* ------------------------------------------------------------------------ */
03281 #endif // OMP_30_ENABLED
03282 
03283 #if OMP_40_ENABLED
03284 KMP_EXPORT void __kmpc_taskgroup( ident* loc, int gtid );
03285 KMP_EXPORT void __kmpc_end_taskgroup( ident* loc, int gtid );
03286 #endif
03287 
03288 /*
03289  * Lock interface routines (fast versions with gtid passed in)
03290  */
03291 KMP_EXPORT void __kmpc_init_lock( ident_t *loc, kmp_int32 gtid,  void **user_lock );
03292 KMP_EXPORT void __kmpc_init_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
03293 KMP_EXPORT void __kmpc_destroy_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
03294 KMP_EXPORT void __kmpc_destroy_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
03295 KMP_EXPORT void __kmpc_set_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
03296 KMP_EXPORT void __kmpc_set_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
03297 KMP_EXPORT void __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
03298 KMP_EXPORT void __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
03299 KMP_EXPORT int __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
03300 KMP_EXPORT int __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock );
03301 
03302 /* ------------------------------------------------------------------------ */
03303 
03304 /*
03305  * Interface to fast scalable reduce methods routines
03306  */
03307 
03308 KMP_EXPORT kmp_int32 __kmpc_reduce_nowait( ident_t *loc, kmp_int32 global_tid,
03309                                            kmp_int32 num_vars, size_t reduce_size,
03310                                            void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
03311                                            kmp_critical_name *lck );
03312 KMP_EXPORT void __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck );
03313 KMP_EXPORT kmp_int32 __kmpc_reduce( ident_t *loc, kmp_int32 global_tid,
03314                                     kmp_int32 num_vars, size_t reduce_size,
03315                                     void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
03316                                     kmp_critical_name *lck );
03317 KMP_EXPORT void __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck );
03318 
03319 /*
03320  * internal fast reduction routines
03321  */
03322 
03323 extern PACKED_REDUCTION_METHOD_T
03324 __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,
03325                                   kmp_int32 num_vars, size_t reduce_size,
03326                                   void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
03327                                   kmp_critical_name *lck );
03328 
03329 // this function is for testing set/get/determine reduce method
03330 KMP_EXPORT kmp_int32 __kmp_get_reduce_method( void );
03331 
03332 KMP_EXPORT kmp_uint64 __kmpc_get_taskid();
03333 KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();
03334 
03335 KMP_EXPORT void __kmpc_place_threads(int,int,int);
03336 
03337 /* ------------------------------------------------------------------------ */
03338 /* ------------------------------------------------------------------------ */
03339 
03340 // C++ port
03341 // missing 'extern "C"' declarations
03342 
03343 KMP_EXPORT kmp_int32 __kmpc_in_parallel( ident_t *loc );
03344 KMP_EXPORT void __kmpc_pop_num_threads(  ident_t *loc, kmp_int32 global_tid );
03345 KMP_EXPORT void __kmpc_push_num_threads( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads );
03346 
03347 #if OMP_40_ENABLED
03348 KMP_EXPORT void __kmpc_push_proc_bind( ident_t *loc, kmp_int32 global_tid, int proc_bind );
03349 #endif
03350 
03351 KMP_EXPORT void*
03352 __kmpc_threadprivate_cached( ident_t * loc, kmp_int32 global_tid,
03353                              void * data, size_t size, void *** cache );
03354 
03355 // Symbols for MS mutual detection.
03356 extern int _You_must_link_with_exactly_one_OpenMP_library;
03357 extern int _You_must_link_with_Intel_OpenMP_library;
03358 #if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 )
03359     extern int _You_must_link_with_Microsoft_OpenMP_library;
03360 #endif
03361 
03362 
03363 // The routines below are not exported.
03364 // Consider making them 'static' in corresponding source files.
03365 void
03366 kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
03367 struct private_common *
03368 kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size );
03369 
03370 
03371 
03372 #ifdef __cplusplus
03373 }
03374 #endif
03375 
03376 #endif /* KMP_H */
03377 

Generated on 25 Aug 2013 for libomp_oss by  doxygen 1.6.1