kmp_alloc.c

Go to the documentation of this file.
00001 /*
00002  * kmp_alloc.c -- private/shared dyanmic memory allocation and management
00003  * $Revision: 42195 $
00004  * $Date: 2013-03-27 16:10:35 -0500 (Wed, 27 Mar 2013) $
00005  */
00006 
00007 /* <copyright>
00008     Copyright (c) 1997-2013 Intel Corporation.  All Rights Reserved.
00009 
00010     Redistribution and use in source and binary forms, with or without
00011     modification, are permitted provided that the following conditions
00012     are met:
00013 
00014       * Redistributions of source code must retain the above copyright
00015         notice, this list of conditions and the following disclaimer.
00016       * Redistributions in binary form must reproduce the above copyright
00017         notice, this list of conditions and the following disclaimer in the
00018         documentation and/or other materials provided with the distribution.
00019       * Neither the name of Intel Corporation nor the names of its
00020         contributors may be used to endorse or promote products derived
00021         from this software without specific prior written permission.
00022 
00023     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00024     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00025     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00026     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00027     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00028     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00029     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00030     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00031     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00032     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00033     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00034 
00035 
00036 ------------------------------------------------------------------------
00037 
00038     Portions of this software are protected under the following patents:
00039         U.S. Patent 5,812,852
00040         U.S. Patent 6,792,599
00041         U.S. Patent 7,069,556
00042         U.S. Patent 7,328,433
00043         U.S. Patent 7,500,242
00044 
00045 </copyright> */
00046 
00047 #include "kmp.h"
00048 #include "kmp_wrapper_malloc.h"
00049 #include "kmp_io.h"
00050 
00051 // Disable bget when it is not used
00052 #if KMP_USE_BGET
00053 
00054 /* Thread private buffer management code */
00055 
00056 typedef int   (*bget_compact_t)(size_t, int);
00057 typedef void *(*bget_acquire_t)(size_t);
00058 typedef void  (*bget_release_t)(void *);
00059 
00060 /* NOTE: bufsize must be a signed datatype */
00061 
00062 #if KMP_OS_WINDOWS
00063 # if KMP_ARCH_X86
00064    typedef kmp_int32 bufsize;
00065 # else
00066    typedef kmp_int64 bufsize;
00067 # endif
00068 #else
00069   typedef ssize_t bufsize;
00070 #endif
00071 
00072 /* The three modes of operation are, fifo search, lifo search, and best-fit */
00073 
00074 typedef enum bget_mode {
00075     bget_mode_fifo = 0,
00076     bget_mode_lifo = 1,
00077     bget_mode_best = 2
00078 } bget_mode_t;
00079 
00080 
00081 static void    bpool( kmp_info_t *th, void *buffer, bufsize len);
00082 static void   *bget( kmp_info_t *th, bufsize size);
00083 static void   *bgetz( kmp_info_t *th, bufsize size);
00084 static void   *bgetr( kmp_info_t *th, void *buffer, bufsize newsize);
00085 static void    brel( kmp_info_t *th, void *buf);
00086 static void    bectl(  kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr );
00087 
00088 #ifdef KMP_DEBUG
00089 static void    bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel);
00090 static void    bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel);
00091 static void    bufdump( kmp_info_t *th, void *buf);
00092 static void    bpoold( kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
00093 static int     bpoolv( kmp_info_t *th, void *pool);
00094 #endif
00095 
00096 /* BGET CONFIGURATION */
00097                                       /* Buffer allocation size quantum:
00098                                          all buffers allocated are a
00099                                          multiple of this size.  This
00100                                          MUST be a power of two. */
00101 
00102                                       /* On IA-32 architecture with  Linux* OS,
00103                      malloc() does not
00104                                          ensure 16 byte alignmnent */
00105 
00106 #if KMP_ARCH_X86
00107 
00108 #define SizeQuant   8
00109 #define AlignType   double
00110 
00111 #else
00112 
00113 #define SizeQuant   16
00114 #define AlignType   _Quad
00115 
00116 #endif
00117 
00118 #define BufStats    1                 /* Define this symbol to enable the
00119                                          bstats() function which calculates
00120                                          the total free space in the buffer
00121                                          pool, the largest available
00122                                          buffer, and the total space
00123                                          currently allocated. */
00124 
00125 #ifdef KMP_DEBUG
00126 
00127 #define BufDump     1                 /* Define this symbol to enable the
00128                                          bpoold() function which dumps the
00129                                          buffers in a buffer pool. */
00130 
00131 #define BufValid    1                 /* Define this symbol to enable the
00132                                          bpoolv() function for validating
00133                                          a buffer pool. */
00134 
00135 #define DumpData    1                 /* Define this symbol to enable the
00136                                          bufdump() function which allows
00137                                          dumping the contents of an allocated
00138                                          or free buffer. */
00139 #ifdef NOT_USED_NOW
00140 
00141 #define FreeWipe    1                 /* Wipe free buffers to a guaranteed
00142                                          pattern of garbage to trip up
00143                                          miscreants who attempt to use
00144                                          pointers into released buffers. */
00145 
00146 #define BestFit     1                 /* Use a best fit algorithm when
00147                                          searching for space for an
00148                                          allocation request.  This uses
00149                                          memory more efficiently, but
00150                                          allocation will be much slower. */
00151 #endif /* NOT_USED_NOW */
00152 #endif /* KMP_DEBUG */
00153 
00154 
00155 static bufsize bget_bin_size[ ] = {
00156     0,
00157 //    1 << 6,    /* .5 Cache line */
00158     1 << 7,    /* 1 Cache line, new */
00159     1 << 8,    /* 2 Cache lines */
00160     1 << 9,    /* 4 Cache lines, new */
00161     1 << 10,   /* 8 Cache lines */
00162     1 << 11,   /* 16 Cache lines, new */
00163     1 << 12,
00164     1 << 13,   /* new */
00165     1 << 14,
00166     1 << 15,   /* new */
00167     1 << 16,
00168     1 << 17,
00169     1 << 18,
00170     1 << 19,
00171     1 << 20,    /*  1MB */
00172     1 << 21,    /*  2MB */
00173     1 << 22,    /*  4MB */
00174     1 << 23,    /*  8MB */
00175     1 << 24,    /* 16MB */
00176     1 << 25,    /* 32MB */
00177 };
00178 
00179 #define MAX_BGET_BINS   (sizeof(bget_bin_size) / sizeof(bufsize))
00180 
00181 struct bfhead;
00182 
00183 /*  Declare the interface, including the requested buffer size type,
00184     bufsize.  */
00185 
00186 /* Queue links */
00187 
00188 typedef struct qlinks {
00189     struct bfhead *flink;             /* Forward link */
00190     struct bfhead *blink;             /* Backward link */
00191 } qlinks_t;
00192 
00193 /* Header in allocated and free buffers */
00194 
00195 typedef struct bhead2 {
00196     kmp_info_t *bthr;                 /* The thread which owns the buffer pool */
00197     bufsize     prevfree;             /* Relative link back to previous
00198                                          free buffer in memory or 0 if
00199                                          previous buffer is allocated.  */
00200     bufsize     bsize;                /* Buffer size: positive if free,
00201                                          negative if allocated. */
00202 } bhead2_t;
00203 
00204 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
00205 
00206 typedef union bhead {
00207     KMP_ALIGN( SizeQuant )
00208     AlignType           b_align;
00209     char                b_pad[ sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant)) ];
00210     bhead2_t            bb;
00211 } bhead_t;
00212 #define BH(p)   ((bhead_t *) (p))
00213 
00214 /*  Header in directly allocated buffers (by acqfcn) */
00215 
00216 typedef struct bdhead
00217 {
00218     bufsize tsize;                    /* Total size, including overhead */
00219     bhead_t bh;                       /* Common header */
00220 } bdhead_t;
00221 #define BDH(p)  ((bdhead_t *) (p))
00222 
00223 /* Header in free buffers */
00224 
00225 typedef struct bfhead {
00226     bhead_t  bh;                      /* Common allocated/free header */
00227     qlinks_t ql;                      /* Links on free list */
00228 } bfhead_t;
00229 #define BFH(p)  ((bfhead_t *) (p))
00230 
00231 typedef struct thr_data {
00232     bfhead_t freelist[ MAX_BGET_BINS ];
00233 #if BufStats
00234     size_t totalloc;               /* Total space currently allocated */
00235     long numget, numrel;           /* Number of bget() and brel() calls */
00236     long numpblk;                  /* Number of pool blocks */
00237     long numpget, numprel;         /* Number of block gets and rels */
00238     long numdget, numdrel;         /* Number of direct gets and rels */
00239 #endif /* BufStats */
00240 
00241     /* Automatic expansion block management functions */
00242     bget_compact_t compfcn;
00243     bget_acquire_t acqfcn;
00244     bget_release_t relfcn;
00245 
00246     bget_mode_t    mode;          /* what allocation mode to use? */
00247 
00248     bufsize exp_incr;                 /* Expansion block size */
00249     bufsize pool_len;                 /* 0: no bpool calls have been made
00250                                          -1: not all pool blocks are
00251                                              the same size
00252                                          >0: (common) block size for all
00253                                              bpool calls made so far
00254                                       */
00255     bfhead_t * last_pool;         /* Last pool owned by this thread (delay dealocation) */
00256 } thr_data_t;
00257 
00258 /*  Minimum allocation quantum: */
00259 
00260 #define QLSize  (sizeof(qlinks_t))
00261 #define SizeQ   ((SizeQuant > QLSize) ? SizeQuant : QLSize)
00262 #define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) )
00263     // Maximun for the requested size.
00264 
00265 /* End sentinel: value placed in bsize field of dummy block delimiting
00266    end of pool block.  The most negative number which will  fit  in  a
00267    bufsize, defined in a way that the compiler will accept. */
00268 
00269 #define ESent   ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2))
00270 
00271 /* ------------------------------------------------------------------------ */
00272 
00273 /* Thread Data management routines */
00274 
00275 static int
00276 bget_get_bin( bufsize size )
00277 {
00278     // binary chop bins
00279     int lo = 0, hi = MAX_BGET_BINS - 1;
00280 
00281     KMP_DEBUG_ASSERT( size > 0 );
00282 
00283     while ( (hi - lo) > 1 ) {
00284     int mid = (lo + hi) >> 1;
00285     if (size < bget_bin_size[ mid ])
00286         hi = mid - 1;
00287     else
00288         lo = mid;
00289     }
00290 
00291     KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) );
00292 
00293     return lo;
00294 }
00295 
00296 static void
00297 set_thr_data( kmp_info_t *th )
00298 {
00299     int i;
00300     thr_data_t *data;
00301 
00302     data =
00303         (thr_data_t *)(
00304             ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data
00305         );
00306 
00307     memset( data, '\0', sizeof( *data ) );
00308 
00309     for (i = 0; i < MAX_BGET_BINS; ++i) {
00310     data->freelist[ i ].ql.flink = & data->freelist[ i ];
00311     data->freelist[ i ].ql.blink = & data->freelist[ i ];
00312     }
00313 
00314     th->th.th_local.bget_data = data;
00315     th->th.th_local.bget_list = 0;
00316 #if ! USE_CMP_XCHG_FOR_BGET
00317 #ifdef USE_QUEUING_LOCK_FOR_BGET
00318     __kmp_init_lock( & th->th.th_local.bget_lock );
00319 #else
00320     __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock );
00321 #endif /* USE_LOCK_FOR_BGET */
00322 #endif /* ! USE_CMP_XCHG_FOR_BGET */
00323 }
00324 
00325 static thr_data_t *
00326 get_thr_data( kmp_info_t *th )
00327 {
00328     thr_data_t *data;
00329 
00330     data = (thr_data_t *) th->th.th_local.bget_data;
00331 
00332     KMP_DEBUG_ASSERT( data != 0 );
00333 
00334     return data;
00335 }
00336 
00337 
00338 #ifdef KMP_DEBUG
00339 
00340 static void
00341 __kmp_bget_validate_queue( kmp_info_t *th )
00342 {
00343     /* NOTE: assume that the global_lock is held */
00344 
00345     void *p = (void *) th->th.th_local.bget_list;
00346 
00347     while (p != 0) {
00348     bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
00349 
00350     KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
00351     p = (void *) b->ql.flink;
00352     }
00353 }
00354 
00355 #endif
00356 
00357 /* Walk the free list and release the enqueued buffers */
00358 
00359 static void
00360 __kmp_bget_dequeue( kmp_info_t *th )
00361 {
00362     void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
00363 
00364     if (p != 0) {
00365     #if USE_CMP_XCHG_FOR_BGET
00366         {
00367         volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
00368                 while ( ! KMP_COMPARE_AND_STORE_PTR(
00369             & th->th.th_local.bget_list, old_value, NULL ) )
00370                 {
00371                     KMP_CPU_PAUSE();
00372                     old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
00373                 }
00374                 p = (void *) old_value;
00375         }
00376     #else /* ! USE_CMP_XCHG_FOR_BGET */
00377         #ifdef USE_QUEUING_LOCK_FOR_BGET
00378         __kmp_acquire_lock( & th->th.th_local.bget_lock,
00379                     __kmp_gtid_from_thread(th) );
00380         #else
00381         __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
00382         #endif /* USE_QUEUING_LOCK_FOR_BGET */
00383 
00384          p = (void *) th->th.th_local.bget_list;
00385          th->th.th_local.bget_list = 0;
00386 
00387         #ifdef USE_QUEUING_LOCK_FOR_BGET
00388         __kmp_release_lock( & th->th.th_local.bget_lock,
00389                     __kmp_gtid_from_thread(th) );
00390         #else
00391         __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
00392         #endif
00393     #endif /* USE_CMP_XCHG_FOR_BGET */
00394 
00395         /* Check again to make sure the list is not empty */
00396 
00397         while (p != 0) {
00398             void *buf = p;
00399             bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
00400 
00401         KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
00402             KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
00403                                 (kmp_uintptr_t)th ); // clear possible mark
00404         KMP_DEBUG_ASSERT( b->ql.blink == 0 );
00405 
00406             p = (void *) b->ql.flink;
00407 
00408             brel( th, buf );
00409         }
00410     }
00411 }
00412 
00413 /* Chain together the free buffers by using the thread owner field */
00414 
00415 static void
00416 __kmp_bget_enqueue( kmp_info_t *th, void *buf
00417 #ifdef USE_QUEUING_LOCK_FOR_BGET
00418             , kmp_int32 rel_gtid
00419 #endif
00420                   )
00421 {
00422     bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t));
00423 
00424     KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
00425     KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
00426                         (kmp_uintptr_t)th ); // clear possible mark
00427 
00428     b->ql.blink = 0;
00429 
00430     KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n",
00431                     __kmp_gtid_from_thread( th ) ) );
00432 
00433     #if USE_CMP_XCHG_FOR_BGET
00434     {
00435         volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
00436         /* the next pointer must be set before setting bget_list to buf to avoid
00437            exposing a broken list to other threads, even for an instant. */
00438         b->ql.flink = BFH( old_value );
00439             while ( ! KMP_COMPARE_AND_STORE_PTR(
00440             & th->th.th_local.bget_list, old_value, buf ) )
00441             {
00442                 KMP_CPU_PAUSE();
00443                 old_value = TCR_PTR(th->th.th_local.bget_list);
00444                 /* the next pointer must be set before setting bget_list to buf to avoid
00445                    exposing a broken list to other threads, even for an instant. */
00446                 b->ql.flink = BFH( old_value );
00447             }
00448     }
00449     #else /* ! USE_CMP_XCHG_FOR_BGET */
00450     #ifdef USE_QUEUING_LOCK_FOR_BGET
00451         __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid );
00452     #else
00453         __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
00454     #endif
00455 
00456         b->ql.flink = BFH( th->th.th_local.bget_list );
00457         th->th.th_local.bget_list = (void *) buf;
00458 
00459     #ifdef USE_QUEUING_LOCK_FOR_BGET
00460         __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid );
00461     #else
00462         __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
00463     #endif
00464     #endif /* USE_CMP_XCHG_FOR_BGET */
00465 }
00466 
00467 /* insert buffer back onto a new freelist */
00468 
00469 static void
00470 __kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b )
00471 {
00472     int bin;
00473 
00474     KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 );
00475     KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 );
00476 
00477     bin = bget_get_bin( b->bh.bb.bsize );
00478 
00479     KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]);
00480     KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]);
00481 
00482     b->ql.flink = &thr->freelist[ bin ];
00483     b->ql.blink = thr->freelist[ bin ].ql.blink;
00484 
00485     thr->freelist[ bin ].ql.blink = b;
00486     b->ql.blink->ql.flink = b;
00487 }
00488 
00489 /* unlink the buffer from the old freelist */
00490 
00491 static void
00492 __kmp_bget_remove_from_freelist( bfhead_t *b )
00493 {
00494     KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
00495     KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
00496 
00497     b->ql.blink->ql.flink = b->ql.flink;
00498     b->ql.flink->ql.blink = b->ql.blink;
00499 }
00500 
00501 /* ------------------------------------------------------------------------ */
00502 
00503 /*  GET STATS -- check info on free list */
00504 
00505 static void
00506 bcheck(  kmp_info_t *th, bufsize *max_free, bufsize *total_free )
00507 {
00508     thr_data_t *thr = get_thr_data( th );
00509     int bin;
00510 
00511     *total_free = *max_free = 0;
00512 
00513     for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
00514     bfhead_t *b, *best;
00515 
00516     best = &thr->freelist[ bin ];
00517     b = best->ql.flink;
00518 
00519     while (b != &thr->freelist[ bin ]) {
00520         *total_free += (b->bh.bb.bsize - sizeof( bhead_t ));
00521         if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize))
00522         best = b;
00523 
00524         /* Link to next buffer */
00525         b = b->ql.flink;
00526     }
00527 
00528     if (*max_free < best->bh.bb.bsize)
00529         *max_free = best->bh.bb.bsize;
00530     }
00531 
00532     if (*max_free > sizeof( bhead_t ))
00533     *max_free -= sizeof( bhead_t );
00534 }
00535 
00536 /* ------------------------------------------------------------------------ */
00537 
00538 /*  BGET  --  Allocate a buffer.  */
00539 
00540 static void *
00541 bget(  kmp_info_t *th, bufsize requested_size )
00542 {
00543     thr_data_t *thr = get_thr_data( th );
00544     bufsize size = requested_size;
00545     bfhead_t *b;
00546     void *buf;
00547     int compactseq = 0;
00548     int use_blink = 0;
00549 /* For BestFit */
00550     bfhead_t *best;
00551 
00552     if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) {
00553         return NULL;
00554     }; // if
00555 
00556     __kmp_bget_dequeue( th );         /* Release any queued buffers */
00557 
00558     if (size < SizeQ) {               /* Need at least room for the */
00559         size = SizeQ;                 /*    queue links.  */
00560     }
00561     #if defined( SizeQuant ) && ( SizeQuant > 1 )
00562         size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
00563     #endif
00564 
00565     size += sizeof(bhead_t);     /* Add overhead in allocated buffer
00566                                          to size required. */
00567     KMP_DEBUG_ASSERT( size >= 0 );
00568     KMP_DEBUG_ASSERT( size % SizeQuant == 0 );
00569 
00570     use_blink = ( thr->mode == bget_mode_lifo );
00571 
00572     /* If a compact function was provided in the call to bectl(), wrap
00573        a loop around the allocation process  to  allow  compaction  to
00574        intervene in case we don't find a suitable buffer in the chain. */
00575 
00576     for (;;) {
00577     int bin;
00578 
00579     for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) {
00580         /* Link to next buffer */
00581         b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink );
00582 
00583         if (thr->mode == bget_mode_best) {
00584         best = &thr->freelist[ bin ];
00585 
00586         /* Scan the free list searching for the first buffer big enough
00587            to hold the requested size buffer. */
00588 
00589         while (b != &thr->freelist[ bin ]) {
00590             if (b->bh.bb.bsize >= (bufsize) size) {
00591             if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) {
00592                 best = b;
00593             }
00594             }
00595 
00596             /* Link to next buffer */
00597             b = ( use_blink ? b->ql.blink : b->ql.flink );
00598         }
00599         b = best;
00600         }
00601 
00602         while (b != &thr->freelist[ bin ]) {
00603         if ((bufsize) b->bh.bb.bsize >= (bufsize) size) {
00604 
00605             /* Buffer  is big enough to satisfy  the request.  Allocate it
00606                to the caller.  We must decide whether the buffer is  large
00607                enough  to  split  into  the part given to the caller and a
00608                free buffer that remains on the free list, or  whether  the
00609                entire  buffer  should  be  removed  from the free list and
00610                given to the caller in its entirety.   We  only  split  the
00611                buffer if enough room remains for a header plus the minimum
00612                quantum of allocation. */
00613 
00614             if ((b->bh.bb.bsize - (bufsize) size) > (SizeQ + (sizeof(bhead_t)))) {
00615             bhead_t *ba, *bn;
00616 
00617             ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size));
00618             bn = BH(((char *) ba) + size);
00619 
00620             KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
00621 
00622             /* Subtract size from length of free block. */
00623             b->bh.bb.bsize -= (bufsize) size;
00624 
00625             /* Link allocated buffer to the previous free buffer. */
00626             ba->bb.prevfree = b->bh.bb.bsize;
00627 
00628             /* Plug negative size into user buffer. */
00629             ba->bb.bsize = -size;
00630 
00631             /* Mark this buffer as owned by this thread. */
00632             TCW_PTR(ba->bb.bthr, th);   // not an allocated address (do not mark it)
00633             /* Mark buffer after this one not preceded by free block. */
00634             bn->bb.prevfree = 0;
00635 
00636             /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
00637             __kmp_bget_remove_from_freelist( b );
00638             __kmp_bget_insert_into_freelist( thr, b );
00639 #if BufStats
00640             thr->totalloc += (size_t) size;
00641             thr->numget++;        /* Increment number of bget() calls */
00642 #endif
00643             buf = (void *) ((((char *) ba) + sizeof(bhead_t)));
00644                         KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
00645             return buf;
00646             } else {
00647             bhead_t *ba;
00648 
00649             ba = BH(((char *) b) + b->bh.bb.bsize);
00650 
00651             KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
00652 
00653             /* The buffer isn't big enough to split.  Give  the  whole
00654                shebang to the caller and remove it from the free list. */
00655 
00656                __kmp_bget_remove_from_freelist( b );
00657 #if BufStats
00658             thr->totalloc += (size_t) b->bh.bb.bsize;
00659             thr->numget++;        /* Increment number of bget() calls */
00660 #endif
00661             /* Negate size to mark buffer allocated. */
00662             b->bh.bb.bsize = -(b->bh.bb.bsize);
00663 
00664             /* Mark this buffer as owned by this thread. */
00665             TCW_PTR(ba->bb.bthr, th);   // not an allocated address (do not mark it)
00666             /* Zero the back pointer in the next buffer in memory
00667                to indicate that this buffer is allocated. */
00668             ba->bb.prevfree = 0;
00669 
00670             /* Give user buffer starting at queue links. */
00671             buf =  (void *) &(b->ql);
00672                         KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
00673             return buf;
00674             }
00675         }
00676 
00677         /* Link to next buffer */
00678         b = ( use_blink ? b->ql.blink : b->ql.flink );
00679         }
00680     }
00681 
00682         /* We failed to find a buffer.  If there's a compact  function
00683            defined,  notify  it  of the size requested.  If it returns
00684            TRUE, try the allocation again. */
00685 
00686         if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
00687             break;
00688         }
00689     }
00690 
00691     /* No buffer available with requested size free. */
00692 
00693     /* Don't give up yet -- look in the reserve supply. */
00694 
00695     if (thr->acqfcn != 0) {
00696         if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) {
00697 
00698             /* Request  is  too  large  to  fit in a single expansion
00699                block.  Try to satisy it by a direct buffer acquisition. */
00700 
00701             bdhead_t *bdh;
00702 
00703             size += sizeof(bdhead_t) - sizeof(bhead_t);
00704 
00705         KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) );
00706 
00707         /* richryan */
00708         bdh = BDH((*thr->acqfcn)((bufsize) size));
00709             if (bdh != NULL) {
00710 
00711                 /*  Mark the buffer special by setting the size field
00712                     of its header to zero.  */
00713                 bdh->bh.bb.bsize = 0;
00714 
00715                 /* Mark this buffer as owned by this thread. */
00716                 TCW_PTR(bdh->bh.bb.bthr, th);  // don't mark buffer as allocated,
00717                                                // because direct buffer never goes to free list
00718                 bdh->bh.bb.prevfree = 0;
00719                 bdh->tsize = size;
00720 #if BufStats
00721                 thr->totalloc += (size_t) size;
00722                 thr->numget++;        /* Increment number of bget() calls */
00723                 thr->numdget++;       /* Direct bget() call count */
00724 #endif
00725                 buf =  (void *) (bdh + 1);
00726                 KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
00727                 return buf;
00728             }
00729 
00730         } else {
00731 
00732             /*  Try to obtain a new expansion block */
00733 
00734             void *newpool;
00735 
00736         KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) );
00737 
00738         /* richryan */
00739         newpool = (*thr->acqfcn)((bufsize) thr->exp_incr);
00740             KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 );
00741             if (newpool != NULL) {
00742                 bpool( th, newpool, thr->exp_incr);
00743                 buf =  bget( th, requested_size);  /* This can't, I say, can't get into a loop. */
00744                 return buf;
00745             }
00746         }
00747     }
00748 
00749     /*  Still no buffer available */
00750 
00751     return NULL;
00752 }
00753 
00754 /*  BGETZ  --  Allocate a buffer and clear its contents to zero.  We clear
00755                the  entire  contents  of  the buffer to zero, not just the
00756                region requested by the caller. */
00757 
00758 static void *
00759 bgetz(  kmp_info_t *th, bufsize size )
00760 {
00761     char *buf = (char *) bget( th, size);
00762 
00763     if (buf != NULL) {
00764         bhead_t *b;
00765         bufsize rsize;
00766 
00767         b = BH(buf - sizeof(bhead_t));
00768         rsize = -(b->bb.bsize);
00769         if (rsize == 0) {
00770             bdhead_t *bd;
00771 
00772             bd = BDH(buf - sizeof(bdhead_t));
00773             rsize = bd->tsize - (bufsize) sizeof(bdhead_t);
00774         } else {
00775             rsize -= sizeof(bhead_t);
00776         }
00777 
00778         KMP_DEBUG_ASSERT(rsize >= size);
00779 
00780         (void) memset(buf, 0, (bufsize) rsize);
00781     }
00782     return ((void *) buf);
00783 }
00784 
00785 /*  BGETR  --  Reallocate a buffer.  This is a minimal implementation,
00786                simply in terms of brel()  and  bget().   It  could  be
00787                enhanced to allow the buffer to grow into adjacent free
00788                blocks and to avoid moving data unnecessarily.  */
00789 
00790 static void *
00791 bgetr(  kmp_info_t *th, void *buf, bufsize size)
00792 {
00793     void *nbuf;
00794     bufsize osize;                    /* Old size of buffer */
00795     bhead_t *b;
00796 
00797     nbuf = bget( th, size );
00798     if ( nbuf == NULL ) { /* Acquire new buffer */
00799         return NULL;
00800     }
00801     if ( buf == NULL ) {
00802         return nbuf;
00803     }
00804     b = BH(((char *) buf) - sizeof(bhead_t));
00805     osize = -b->bb.bsize;
00806     if (osize == 0) {
00807         /*  Buffer acquired directly through acqfcn. */
00808         bdhead_t *bd;
00809 
00810         bd = BDH(((char *) buf) - sizeof(bdhead_t));
00811         osize = bd->tsize - (bufsize) sizeof(bdhead_t);
00812     } else {
00813         osize -= sizeof(bhead_t);
00814     };
00815 
00816     KMP_DEBUG_ASSERT(osize > 0);
00817 
00818     (void) memcpy((char *) nbuf, (char *) buf, /* Copy the data */
00819              (size_t) ((size < osize) ? size : osize));
00820     brel( th, buf );
00821 
00822     return nbuf;
00823 }
00824 
00825 /*  BREL  --  Release a buffer.  */
00826 
00827 static void
00828 brel(  kmp_info_t *th, void *buf )
00829 {
00830     thr_data_t *thr = get_thr_data( th );
00831     bfhead_t *b, *bn;
00832     kmp_info_t *bth;
00833 
00834     KMP_DEBUG_ASSERT(buf != NULL);
00835     KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
00836 
00837     b = BFH(((char *) buf) - sizeof(bhead_t));
00838 
00839     if (b->bh.bb.bsize == 0) {        /* Directly-acquired buffer? */
00840         bdhead_t *bdh;
00841 
00842         bdh = BDH(((char *) buf) - sizeof(bdhead_t));
00843         KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
00844 #if BufStats
00845         thr->totalloc -= (size_t) bdh->tsize;
00846         thr->numdrel++;               /* Number of direct releases */
00847     thr->numrel++;                /* Increment number of brel() calls */
00848 #endif /* BufStats */
00849 #ifdef FreeWipe
00850         (void) memset((char *) buf, 0x55,
00851                  (size_t) (bdh->tsize - sizeof(bdhead_t)));
00852 #endif /* FreeWipe */
00853 
00854     KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) );
00855 
00856         KMP_DEBUG_ASSERT( thr->relfcn != 0 );
00857         (*thr->relfcn)((void *) bdh);      /* Release it directly. */
00858         return;
00859     }
00860 
00861     bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison
00862     if ( bth != th ) {
00863         /* Add this buffer to be released by the owning thread later */
00864         __kmp_bget_enqueue( bth, buf
00865 #ifdef USE_QUEUING_LOCK_FOR_BGET
00866                     , __kmp_gtid_from_thread( th )
00867 #endif
00868     );
00869         return;
00870     }
00871 
00872     /* Buffer size must be negative, indicating that the buffer is
00873        allocated. */
00874 
00875     if (b->bh.bb.bsize >= 0) {
00876         bn = NULL;
00877     }
00878     KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
00879 
00880     /*  Back pointer in next buffer must be zero, indicating the
00881         same thing: */
00882 
00883     KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0);
00884 
00885 #if BufStats
00886     thr->numrel++;                    /* Increment number of brel() calls */
00887     thr->totalloc += (size_t) b->bh.bb.bsize;
00888 #endif
00889 
00890     /* If the back link is nonzero, the previous buffer is free.  */
00891 
00892     if (b->bh.bb.prevfree != 0) {
00893         /* The previous buffer is free.  Consolidate this buffer  with  it
00894            by  adding  the  length  of  this  buffer  to the previous free
00895            buffer.  Note that we subtract the size  in  the  buffer  being
00896            released,  since  it's  negative to indicate that the buffer is
00897            allocated. */
00898 
00899         register bufsize size = b->bh.bb.bsize;
00900 
00901         /* Make the previous buffer the one we're working on. */
00902         KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree);
00903         b = BFH(((char *) b) - b->bh.bb.prevfree);
00904         b->bh.bb.bsize -= size;
00905 
00906     /* unlink the buffer from the old freelist */
00907     __kmp_bget_remove_from_freelist( b );
00908     }
00909     else {
00910         /* The previous buffer isn't allocated.  Mark this buffer
00911            size as positive (i.e. free) and fall throught to place
00912        the buffer on the free list as an isolated free block. */
00913 
00914         b->bh.bb.bsize = -b->bh.bb.bsize;
00915     }
00916 
00917     /* insert buffer back onto a new freelist */
00918     __kmp_bget_insert_into_freelist( thr, b );
00919 
00920 
00921     /* Now we look at the next buffer in memory, located by advancing from
00922        the  start  of  this  buffer  by its size, to see if that buffer is
00923        free.  If it is, we combine  this  buffer  with  the  next  one  in
00924        memory, dechaining the second buffer from the free list. */
00925 
00926     bn =  BFH(((char *) b) + b->bh.bb.bsize);
00927     if (bn->bh.bb.bsize > 0) {
00928 
00929         /* The buffer is free.  Remove it from the free list and add
00930            its size to that of our buffer. */
00931 
00932         KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize);
00933 
00934     __kmp_bget_remove_from_freelist( bn );
00935 
00936         b->bh.bb.bsize += bn->bh.bb.bsize;
00937 
00938     /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
00939 
00940     __kmp_bget_remove_from_freelist( b );
00941     __kmp_bget_insert_into_freelist( thr, b );
00942 
00943         /* Finally,  advance  to   the  buffer  that   follows  the  newly
00944            consolidated free block.  We must set its  backpointer  to  the
00945            head  of  the  consolidated free block.  We know the next block
00946            must be an allocated block because the process of recombination
00947            guarantees  that  two  free  blocks will never be contiguous in
00948            memory.  */
00949 
00950         bn = BFH(((char *) b) + b->bh.bb.bsize);
00951     }
00952 #ifdef FreeWipe
00953     (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
00954             (size_t) (b->bh.bb.bsize - sizeof(bfhead_t)));
00955 #endif
00956     KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
00957 
00958     /* The next buffer is allocated.  Set the backpointer in it  to  point
00959        to this buffer; the previous free buffer in memory. */
00960 
00961     bn->bh.bb.prevfree = b->bh.bb.bsize;
00962 
00963     /*  If  a  block-release function is defined, and this free buffer
00964         constitutes the entire block, release it.  Note that  pool_len
00965         is  defined  in  such a way that the test will fail unless all
00966         pool blocks are the same size.  */
00967 
00968     if (thr->relfcn != 0 &&
00969         b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
00970     {
00971 #if BufStats
00972     if (thr->numpblk != 1) {    /* Do not release the last buffer until finalization time */
00973 #endif
00974 
00975         KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
00976         KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
00977         KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
00978 
00979         /*  Unlink the buffer from the free list  */
00980         __kmp_bget_remove_from_freelist( b );
00981 
00982         KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
00983 
00984         (*thr->relfcn)(b);
00985 #if BufStats
00986         thr->numprel++;               /* Nr of expansion block releases */
00987         thr->numpblk--;               /* Total number of blocks */
00988         KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
00989 
00990             /* avoid leaving stale last_pool pointer around if it is being dealloced */
00991         if (thr->last_pool == b) thr->last_pool = 0;
00992     }
00993     else {
00994         thr->last_pool = b;
00995     }
00996 #endif /* BufStats */
00997     }
00998 }
00999 
01000 /*  BECTL  --  Establish automatic pool expansion control  */
01001 
01002 static void
01003 bectl(  kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr)
01004 {
01005     thr_data_t *thr = get_thr_data( th );
01006 
01007     thr->compfcn = compact;
01008     thr->acqfcn = acquire;
01009     thr->relfcn = release;
01010     thr->exp_incr = pool_incr;
01011 }
01012 
01013 /*  BPOOL  --  Add a region of memory to the buffer pool.  */
01014 
01015 static void
01016 bpool(  kmp_info_t *th, void *buf, bufsize len)
01017 {
01018 /*    int bin = 0; */
01019     thr_data_t *thr = get_thr_data( th );
01020     bfhead_t *b = BFH(buf);
01021     bhead_t *bn;
01022 
01023     __kmp_bget_dequeue( th );         /* Release any queued buffers */
01024 
01025 #ifdef SizeQuant
01026     len &= ~(SizeQuant - 1);
01027 #endif
01028     if (thr->pool_len == 0) {
01029         thr->pool_len = len;
01030     } else if (len != thr->pool_len) {
01031         thr->pool_len = -1;
01032     }
01033 #if BufStats
01034     thr->numpget++;                   /* Number of block acquisitions */
01035     thr->numpblk++;                   /* Number of blocks total */
01036     KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
01037 #endif /* BufStats */
01038 
01039     /* Since the block is initially occupied by a single free  buffer,
01040        it  had  better  not  be  (much) larger than the largest buffer
01041        whose size we can store in bhead.bb.bsize. */
01042 
01043     KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1));
01044 
01045     /* Clear  the  backpointer at  the start of the block to indicate that
01046        there  is  no  free  block  prior  to  this   one.    That   blocks
01047        recombination when the first block in memory is released. */
01048 
01049     b->bh.bb.prevfree = 0;
01050 
01051     /* Create a dummy allocated buffer at the end of the pool.  This dummy
01052        buffer is seen when a buffer at the end of the pool is released and
01053        blocks  recombination  of  the last buffer with the dummy buffer at
01054        the end.  The length in the dummy buffer  is  set  to  the  largest
01055        negative  number  to  denote  the  end  of  the pool for diagnostic
01056        routines (this specific value is  not  counted  on  by  the  actual
01057        allocation and release functions). */
01058 
01059     len -= sizeof(bhead_t);
01060     b->bh.bb.bsize = (bufsize) len;
01061     /* Set the owner of this buffer */
01062     TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address
01063 
01064     /* Chain the new block to the free list. */
01065     __kmp_bget_insert_into_freelist( thr, b );
01066 
01067 #ifdef FreeWipe
01068     (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
01069              (size_t) (len - sizeof(bfhead_t)));
01070 #endif
01071     bn = BH(((char *) b) + len);
01072     bn->bb.prevfree = (bufsize) len;
01073     /* Definition of ESent assumes two's complement! */
01074     KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) );
01075 
01076     bn->bb.bsize = ESent;
01077 }
01078 
01079 /* ------------------------------------------------------------------------ */
01080 
01081 /*  BFREED  --  Dump the free lists for this thread. */
01082 
01083 static void
01084 bfreed(  kmp_info_t *th )
01085 {
01086     int bin = 0, count = 0;
01087     int gtid = __kmp_gtid_from_thread( th );
01088     thr_data_t *thr = get_thr_data( th );
01089 
01090 #if BufStats
01091     __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \
01092            KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \
01093        " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n",
01094        gtid, (kmp_uint64) thr->totalloc,
01095        (kmp_int64) thr->numget,  (kmp_int64) thr->numrel,
01096        (kmp_int64) thr->numpblk,
01097        (kmp_int64) thr->numpget, (kmp_int64) thr->numprel,
01098        (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel );
01099 #endif
01100 
01101     for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
01102     bfhead_t *b;
01103 
01104     for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) {
01105         bufsize bs = b->bh.bb.bsize;
01106 
01107             KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b );
01108         KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b );
01109             KMP_DEBUG_ASSERT( bs > 0 );
01110 
01111         count += 1;
01112 
01113             __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs );
01114 #ifdef FreeWipe
01115             {
01116         char *lerr = ((char *) b) + sizeof(bfhead_t);
01117         if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
01118             __kmp_printf_no_lock( "__kmp_printpool: T#%d     (Contents of above free block have been overstored.)\n", gtid );
01119         }
01120         }
01121 #endif
01122     }
01123     }
01124 
01125     if (count == 0)
01126     __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid );
01127 }
01128 
01129 /* ------------------------------------------------------------------------ */
01130 
01131 #ifdef KMP_DEBUG
01132 
01133 #if BufStats
01134 
01135 /*  BSTATS  --  Return buffer allocation free space statistics.  */
01136 
01137 static void
01138 bstats(  kmp_info_t *th, bufsize *curalloc,  bufsize *totfree,  bufsize *maxfree, long *nget, long *nrel)
01139 {
01140     int bin = 0;
01141     thr_data_t *thr = get_thr_data( th );
01142 
01143     *nget = thr->numget;
01144     *nrel = thr->numrel;
01145     *curalloc = (bufsize) thr->totalloc;
01146     *totfree = 0;
01147     *maxfree = -1;
01148 
01149     for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
01150     bfhead_t *b = thr->freelist[ bin ].ql.flink;
01151 
01152     while (b != &thr->freelist[ bin ]) {
01153         KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
01154         *totfree += b->bh.bb.bsize;
01155         if (b->bh.bb.bsize > *maxfree) {
01156         *maxfree = b->bh.bb.bsize;
01157         }
01158         b = b->ql.flink;              /* Link to next buffer */
01159     }
01160     }
01161 }
01162 
01163 /*  BSTATSE  --  Return extended statistics  */
01164 
01165 static void
01166 bstatse(  kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel)
01167 {
01168     thr_data_t *thr = get_thr_data( th );
01169 
01170     *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
01171     *npool = thr->numpblk;
01172     *npget = thr->numpget;
01173     *nprel = thr->numprel;
01174     *ndget = thr->numdget;
01175     *ndrel = thr->numdrel;
01176 }
01177 
01178 #endif /* BufStats */
01179 
01180 /*  BUFDUMP  --  Dump the data in a buffer.  This is called with the  user
01181                  data pointer, and backs up to the buffer header.  It will
01182                  dump either a free block or an allocated one.  */
01183 
01184 static void
01185 bufdump(  kmp_info_t *th, void *buf )
01186 {
01187     bfhead_t *b;
01188     unsigned char *bdump;
01189     bufsize bdlen;
01190 
01191     b = BFH(((char *) buf) - sizeof(bhead_t));
01192     KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
01193     if (b->bh.bb.bsize < 0) {
01194         bdump = (unsigned char *) buf;
01195         bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t);
01196     } else {
01197         bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t));
01198         bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t);
01199     }
01200 
01201     while (bdlen > 0) {
01202         int i, dupes = 0;
01203         bufsize l = bdlen;
01204         char bhex[50], bascii[20];
01205 
01206         if (l > 16) {
01207             l = 16;
01208         }
01209 
01210         for (i = 0; i < l; i++) {
01211             (void) sprintf(bhex + i * 3, "%02X ", bdump[i]);
01212         if (bdump[i] > 0x20 && bdump[i] < 0x7F)
01213         bascii[ i ] = bdump[ i ];
01214         else
01215         bascii[ i ] = ' ';
01216         }
01217         bascii[i] = 0;
01218         (void) __kmp_printf_no_lock("%-48s   %s\n", bhex, bascii);
01219         bdump += l;
01220         bdlen -= l;
01221         while ((bdlen > 16) && (memcmp((char *) (bdump - 16),
01222                                        (char *) bdump, 16) == 0)) {
01223             dupes++;
01224             bdump += 16;
01225             bdlen -= 16;
01226         }
01227         if (dupes > 1) {
01228             (void) __kmp_printf_no_lock(
01229                 "     (%d lines [%d bytes] identical to above line skipped)\n",
01230                 dupes, dupes * 16);
01231         } else if (dupes == 1) {
01232             bdump -= 16;
01233             bdlen += 16;
01234         }
01235     }
01236 }
01237 
01238 /*  BPOOLD  --  Dump a buffer pool.  The buffer headers are always listed.
01239                 If DUMPALLOC is nonzero, the contents of allocated buffers
01240                 are  dumped.   If  DUMPFREE  is  nonzero,  free blocks are
01241                 dumped as well.  If FreeWipe  checking  is  enabled,  free
01242                 blocks  which  have  been clobbered will always be dumped. */
01243 
01244 static void
01245 bpoold(  kmp_info_t *th, void *buf, int dumpalloc, int dumpfree)
01246 {
01247     bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t));
01248 
01249     while (b->bh.bb.bsize != ESent) {
01250         bufsize bs = b->bh.bb.bsize;
01251 
01252         if (bs < 0) {
01253             bs = -bs;
01254             (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs);
01255             if (dumpalloc) {
01256                 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
01257             }
01258         } else {
01259             char *lerr = "";
01260 
01261             KMP_DEBUG_ASSERT(bs > 0);
01262             if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
01263                 lerr = "  (Bad free list links)";
01264             }
01265             (void) __kmp_printf_no_lock("Free block:       size %6ld bytes.%s\n",
01266                 (long) bs, lerr);
01267 #ifdef FreeWipe
01268             lerr = ((char *) b) + sizeof(bfhead_t);
01269             if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
01270                 (memcmp(lerr, lerr + 1,
01271                   (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
01272                 (void) __kmp_printf_no_lock(
01273                     "(Contents of above free block have been overstored.)\n");
01274                 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
01275             } else
01276 #endif
01277             if (dumpfree) {
01278                 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
01279             }
01280         }
01281         b = BFH(((char *) b) + bs);
01282     }
01283 }
01284 
01285 /*  BPOOLV  --  Validate a buffer pool. */
01286 
01287 static int
01288 bpoolv(  kmp_info_t *th, void *buf )
01289 {
01290     bfhead_t *b = BFH(buf);
01291 
01292     while (b->bh.bb.bsize != ESent) {
01293         bufsize bs = b->bh.bb.bsize;
01294 
01295         if (bs < 0) {
01296             bs = -bs;
01297         } else {
01298 #ifdef FreeWipe
01299             char *lerr = "";
01300 #endif
01301 
01302             KMP_DEBUG_ASSERT(bs > 0);
01303             if (bs <= 0) {
01304                 return 0;
01305             }
01306             if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
01307                 (void) __kmp_printf_no_lock("Free block: size %6ld bytes.  (Bad free list links)\n",
01308                      (long) bs);
01309                 KMP_DEBUG_ASSERT(0);
01310                 return 0;
01311             }
01312 #ifdef FreeWipe
01313             lerr = ((char *) b) + sizeof(bfhead_t);
01314             if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
01315                 (memcmp(lerr, lerr + 1,
01316                   (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
01317                 (void) __kmp_printf_no_lock(
01318                     "(Contents of above free block have been overstored.)\n");
01319                 bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
01320                 KMP_DEBUG_ASSERT(0);
01321                 return 0;
01322             }
01323 #endif /* FreeWipe */
01324         }
01325         b = BFH(((char *) b) + bs);
01326     }
01327     return 1;
01328 }
01329 
01330 #endif /* KMP_DEBUG */
01331 
01332 /* ------------------------------------------------------------------------ */
01333 
01334 void
01335 __kmp_initialize_bget( kmp_info_t *th )
01336 {
01337     KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) );
01338 
01339     set_thr_data( th );
01340 
01341     bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free,
01342            (bufsize) __kmp_malloc_pool_incr );
01343 }
01344 
01345 void
01346 __kmp_finalize_bget( kmp_info_t *th )
01347 {
01348     thr_data_t *thr;
01349     bfhead_t *b;
01350 
01351     KMP_DEBUG_ASSERT( th != 0 );
01352 
01353 #if BufStats
01354     thr = (thr_data_t *) th->th.th_local.bget_data;
01355     KMP_DEBUG_ASSERT( thr != NULL );
01356     b = thr->last_pool;
01357 
01358     /*  If  a  block-release function is defined, and this free buffer
01359         constitutes the entire block, release it.  Note that  pool_len
01360         is  defined  in  such a way that the test will fail unless all
01361         pool blocks are the same size.  */
01362 
01363     /* Deallocate the last pool if one exists because we no longer do it in brel() */
01364     if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
01365         b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
01366     {
01367     KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
01368     KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
01369     KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
01370 
01371     /*  Unlink the buffer from the free list  */
01372     __kmp_bget_remove_from_freelist( b );
01373 
01374     KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
01375 
01376     (*thr->relfcn)(b);
01377     thr->numprel++;               /* Nr of expansion block releases */
01378     thr->numpblk--;               /* Total number of blocks */
01379     KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
01380     }
01381 #endif /* BufStats */
01382 
01383     /* Deallocate bget_data */
01384     if ( th->th.th_local.bget_data != NULL ) {
01385         __kmp_free( th->th.th_local.bget_data );
01386         th->th.th_local.bget_data = NULL;
01387     }; // if
01388 }
01389 
01390 void
01391 kmpc_set_poolsize( size_t size )
01392 {
01393     bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc,
01394            (bget_release_t) free, (bufsize) size );
01395 }
01396 
01397 size_t
01398 kmpc_get_poolsize( void )
01399 {
01400     thr_data_t *p;
01401 
01402     p = get_thr_data( __kmp_get_thread() );
01403 
01404     return p->exp_incr;
01405 }
01406 
01407 void
01408 kmpc_set_poolmode( int mode )
01409 {
01410     thr_data_t *p;
01411 
01412     if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) {
01413     p = get_thr_data( __kmp_get_thread() );
01414     p->mode = (bget_mode_t) mode;
01415     }
01416 }
01417 
01418 int
01419 kmpc_get_poolmode( void )
01420 {
01421     thr_data_t *p;
01422 
01423     p = get_thr_data( __kmp_get_thread() );
01424 
01425     return p->mode;
01426 }
01427 
01428 void
01429 kmpc_get_poolstat( size_t *maxmem, size_t *allmem )
01430 {
01431     kmp_info_t *th = __kmp_get_thread();
01432     bufsize a, b;
01433 
01434     __kmp_bget_dequeue( th );         /* Release any queued buffers */
01435 
01436     bcheck( th, &a, &b );
01437 
01438     *maxmem = a;
01439     *allmem = b;
01440 }
01441 
01442 void
01443 kmpc_poolprint( void )
01444 {
01445     kmp_info_t *th = __kmp_get_thread();
01446 
01447     __kmp_bget_dequeue( th );         /* Release any queued buffers */
01448 
01449     bfreed( th );
01450 }
01451 
01452 #endif // #if KMP_USE_BGET
01453 
01454 /* ------------------------------------------------------------------------ */
01455 
01456 void *
01457 kmpc_malloc( size_t size )
01458 {
01459     void * ptr;
01460         ptr = bget( __kmp_entry_thread(), (bufsize) size );
01461 
01462     return ptr;
01463 }
01464 
01465 void *
01466 kmpc_calloc( size_t nelem, size_t elsize )
01467 {
01468     void * ptr;
01469         ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize) );
01470 
01471     return ptr;
01472 }
01473 
01474 void *
01475 kmpc_realloc( void * ptr, size_t size )
01476 {
01477     void * result = NULL;
01478 
01479         if ( ptr == NULL ) {
01480             // If pointer is NULL, realloc behaves like malloc.
01481             result = bget( __kmp_entry_thread(), (bufsize) size );
01482         } else if ( size == 0 ) {
01483             // If size is 0, realloc behaves like free.
01484             // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before.
01485             // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread().
01486             brel( __kmp_get_thread(), ptr );
01487         } else {
01488             result = bgetr( __kmp_entry_thread(), ptr, (bufsize) size );
01489         }; // if
01490 
01491     return result;
01492 }
01493 
01494 /* NOTE: the library must have already been initialized by a previous allocate */
01495 
01496 void
01497 kmpc_free( void * ptr )
01498 {
01499     if ( ! __kmp_init_serial ) {
01500         return;
01501     }; // if
01502     if ( ptr != NULL ) {
01503             kmp_info_t *th = __kmp_get_thread();
01504             __kmp_bget_dequeue( th );         /* Release any queued buffers */
01505             brel( th, ptr );
01506     };
01507 }
01508 
01509 
01510 /* ------------------------------------------------------------------------ */
01511 
01512 void *
01513 ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL )
01514 {
01515     void * ptr;
01516     KE_TRACE( 30, (
01517         "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n",
01518         th,
01519         (int) size
01520         KMP_SRC_LOC_PARM
01521     ) );
01522         ptr = bget( th, (bufsize) size );
01523     KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) );
01524     return ptr;
01525 }
01526 
01527 void *
01528 ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL )
01529 {
01530     void * ptr;
01531     KE_TRACE( 30, (
01532         "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n",
01533         th,
01534         (int) nelem,
01535         (int) elsize
01536         KMP_SRC_LOC_PARM
01537     ) );
01538         ptr = bgetz( th, (bufsize) (nelem * elsize) );
01539     KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) );
01540     return ptr;
01541 }
01542 
01543 void *
01544 ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL )
01545 {
01546     KE_TRACE( 30, (
01547         "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n",
01548         th,
01549         ptr,
01550         (int) size
01551         KMP_SRC_LOC_PARM
01552     ) );
01553         ptr = bgetr( th, ptr, (bufsize) size );
01554     KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) );
01555     return ptr;
01556 }
01557 
01558 void
01559 ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL )
01560 {
01561     KE_TRACE( 30, (
01562         "-> __kmp_thread_free( %p, %p ) called from %s:%d\n",
01563         th,
01564         ptr
01565         KMP_SRC_LOC_PARM
01566     ) );
01567     if ( ptr != NULL ) {
01568             __kmp_bget_dequeue( th );         /* Release any queued buffers */
01569             brel( th, ptr );
01570     }
01571     KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) );
01572 }
01573 
01574 /* ------------------------------------------------------------------------ */
01575 /* ------------------------------------------------------------------------ */
01576 /*
01577     If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it
01578     may be useful for debugging memory corruptions, used freed pointers, etc.
01579 */
01580 /* #define LEAK_MEMORY */
01581 
01582 struct kmp_mem_descr {      // Memory block descriptor.
01583     void * ptr_allocated;   // Pointer returned by malloc(), subject for free().
01584     size_t size_allocated;  // Size of allocated memory block.
01585     void * ptr_aligned;     // Pointer to aligned memory, to be used by client code.
01586     size_t size_aligned;    // Size of aligned memory block.
01587 };
01588 typedef struct kmp_mem_descr kmp_mem_descr_t;
01589 
01590 /*
01591     Allocate memory on requested boundary, fill allocated memory with 0x00.
01592     NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
01593     Must use __kmp_free when freeing memory allocated by this routine!
01594  */
01595 static
01596 void *
01597 ___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL )
01598 {
01599     /*
01600             __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to
01601         return properly aligned pointer. Original pointer returned by malloc() and size of allocated
01602         block is saved in descriptor just before the aligned pointer. This information used by
01603         __kmp_free() -- it has to pass to free() original pointer, not aligned one.
01604 
01605             +---------+------------+-----------------------------------+---------+
01606             | padding | descriptor |           aligned block           | padding |
01607             +---------+------------+-----------------------------------+---------+
01608             ^                      ^
01609             |                      |
01610             |                      +- Aligned pointer returned to caller
01611             +- Pointer returned by malloc()
01612 
01613         Aligned block is filled with zeros, paddings are filled with 0xEF.
01614     */
01615 
01616     kmp_mem_descr_t  descr;
01617     kmp_uintptr_t    addr_allocated;        // Address returned by malloc().
01618     kmp_uintptr_t    addr_aligned;          // Aligned address to return to caller.
01619     kmp_uintptr_t    addr_descr;            // Address of memory block descriptor.
01620 
01621     KE_TRACE( 25, (
01622         "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
01623         (int) size,
01624         (int) alignment
01625         KMP_SRC_LOC_PARM
01626     ) );
01627 
01628     KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too
01629     KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) );
01630         // Make sure kmp_uintptr_t is enough to store addresses.
01631 
01632     descr.size_aligned = size;
01633     descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment;
01634 
01635     descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM );
01636     KE_TRACE( 10, (
01637         "   malloc( %d ) returned %p\n",
01638         (int) descr.size_allocated,
01639         descr.ptr_allocated
01640     ) );
01641     if ( descr.ptr_allocated == NULL ) {
01642         KMP_FATAL( OutOfHeapMemory );
01643     };
01644 
01645     addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
01646     addr_aligned =
01647         ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment )
01648         & ~ ( alignment - 1 );
01649     addr_descr = addr_aligned - sizeof( kmp_mem_descr_t );
01650 
01651     descr.ptr_aligned = (void *) addr_aligned;
01652 
01653     KE_TRACE( 26, (
01654         "   ___kmp_allocate_align: "
01655             "ptr_allocated=%p, size_allocated=%d, "
01656             "ptr_aligned=%p, size_aligned=%d\n",
01657         descr.ptr_allocated,
01658         (int) descr.size_allocated,
01659         descr.ptr_aligned,
01660         (int) descr.size_aligned
01661     ) );
01662 
01663     KMP_DEBUG_ASSERT( addr_allocated <= addr_descr );
01664     KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned );
01665     KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
01666     KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 );
01667 
01668     #ifdef KMP_DEBUG
01669         memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
01670             // Fill allocated memory block with 0xEF.
01671     #endif
01672     memset( descr.ptr_aligned, 0x00, descr.size_aligned );
01673         // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not
01674         // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding
01675         // bytes remain filled with 0xEF in debugging library.)
01676     * ( (kmp_mem_descr_t *) addr_descr ) = descr;
01677 
01678     KMP_MB();
01679 
01680     KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) );
01681     return descr.ptr_aligned;
01682 
01683 } // func ___kmp_allocate_align
01684 
01685 
01686 /*
01687     Allocate memory on cache line boundary, fill allocated memory with 0x00.
01688     Do not call this func directly! Use __kmp_allocate macro instead.
01689     NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
01690     Must use __kmp_free when freeing memory allocated by this routine!
01691  */
01692 void *
01693 ___kmp_allocate( size_t size KMP_SRC_LOC_DECL )
01694 {
01695 
01696     void * ptr;
01697     KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) );
01698         ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM );
01699     KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) );
01700     return ptr;
01701 
01702 } // func ___kmp_allocate
01703 
01704 #if (BUILD_MEMORY==FIRST_TOUCH)
01705 void *
01706 __kmp_ft_page_allocate(size_t size)
01707 {
01708   void *adr, *aadr;
01709 #if KMP_OS_LINUX
01710   /* TODO: Use this function to get page size everywhere */
01711   int page_size = getpagesize();
01712 #else
01713   /* TODO: Find windows function to get page size and use it everywhere */
01714   int page_size = PAGE_SIZE;
01715 #endif /* KMP_OS_LINUX */
01716 
01717   adr = (void *) __kmp_thread_malloc( __kmp_get_thread(),
01718                                     size + page_size + KMP_PTR_SKIP);
01719   if ( adr == 0 )
01720     KMP_FATAL( OutOfHeapMemory );
01721 
01722   /* check to see if adr is on a page boundary. */
01723   if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0)
01724     /* nothing to do if adr is already on a page boundary. */
01725     aadr = adr;
01726   else
01727     /* else set aadr to the first page boundary in the allocated memory. */
01728     aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) );
01729 
01730   /* the first touch by the owner thread. */
01731   *((void**)aadr) = adr;
01732 
01733   /* skip the memory space used for storing adr above. */
01734   return (void*)((char*)aadr + KMP_PTR_SKIP);
01735 }
01736 #endif
01737 
01738 /*
01739     Allocate memory on page boundary, fill allocated memory with 0x00.
01740     Does not call this func directly! Use __kmp_page_allocate macro instead.
01741     NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
01742     Must use __kmp_free when freeing memory allocated by this routine!
01743  */
01744 void *
01745 ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL )
01746 {
01747     int    page_size = 8 * 1024;
01748     void * ptr;
01749 
01750     KE_TRACE( 25, (
01751         "-> __kmp_page_allocate( %d ) called from %s:%d\n",
01752         (int) size
01753         KMP_SRC_LOC_PARM
01754     ) );
01755         ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM );
01756     KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) );
01757     return ptr;
01758 } // ___kmp_page_allocate
01759 
01760 /*
01761     Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
01762     In debug mode, fill the memory block with 0xEF before call to free().
01763 */
01764 void
01765 ___kmp_free( void * ptr KMP_SRC_LOC_DECL )
01766 {
01767 
01768         kmp_mem_descr_t descr;
01769         kmp_uintptr_t   addr_allocated;        // Address returned by malloc().
01770         kmp_uintptr_t   addr_aligned;          // Aligned address passed by caller.
01771 
01772         KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) );
01773         KMP_ASSERT( ptr != NULL );
01774 
01775         descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) );
01776 
01777         KE_TRACE( 26, ( "   __kmp_free:     "
01778                         "ptr_allocated=%p, size_allocated=%d, "
01779                         "ptr_aligned=%p, size_aligned=%d\n",
01780                         descr.ptr_allocated, (int) descr.size_allocated,
01781                         descr.ptr_aligned, (int) descr.size_aligned ));
01782 
01783         addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
01784         addr_aligned   = (kmp_uintptr_t) descr.ptr_aligned;
01785 
01786         KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 );
01787         KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr );
01788         KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned );
01789         KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated );
01790         KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
01791 
01792         #ifdef KMP_DEBUG
01793             memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
01794                 // Fill memory block with 0xEF, it helps catch using freed memory.
01795         #endif
01796 
01797         #ifndef LEAK_MEMORY
01798             KE_TRACE( 10, ( "   free( %p )\n", descr.ptr_allocated ) );
01799             free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM );
01800         #endif
01801 
01802     KMP_MB();
01803 
01804     KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) );
01805 
01806 } // func ___kmp_free
01807 
01808 /* ------------------------------------------------------------------------ */
01809 /* ------------------------------------------------------------------------ */
01810 
01811 #if USE_FAST_MEMORY == 3
01812 // Allocate fast memory by first scanning the thread's free lists
01813 // If a chunk the right size exists, grab it off the free list.
01814 // Otherwise allocate normally using kmp_thread_malloc.
01815 
01816 // AC: How to choose the limit? Just get 16 for now...
01817 static int const __kmp_free_list_limit = 16;
01818 
01819 // Always use 128 bytes for determining buckets for caching memory blocks
01820 #define DCACHE_LINE  128
01821 
01822 void *
01823 ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL )
01824 {
01825     void            * ptr;
01826     int               num_lines;
01827     int               idx;
01828     int               index;
01829     void            * alloc_ptr;
01830     size_t            alloc_size;
01831     kmp_mem_descr_t * descr;
01832 
01833     KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
01834       __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) );
01835 
01836     num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE;
01837     idx = num_lines - 1;
01838     KMP_DEBUG_ASSERT( idx >= 0 );
01839     if ( idx < 2 ) {
01840         index = 0;       // idx is [ 0, 1 ], use first free list
01841         num_lines = 2;   // 1, 2 cache lines or less than cache line
01842     } else if ( ( idx >>= 2 ) == 0 ) {
01843         index = 1;       // idx is [ 2, 3 ], use second free list
01844         num_lines = 4;   // 3, 4 cache lines
01845     } else if ( ( idx >>= 2 ) == 0 ) {
01846         index = 2;       // idx is [ 4, 15 ], use third free list
01847         num_lines = 16;  // 5, 6, ..., 16 cache lines
01848     } else if ( ( idx >>= 2 ) == 0 ) {
01849         index = 3;       // idx is [ 16, 63 ], use fourth free list
01850         num_lines = 64;  // 17, 18, ..., 64 cache lines
01851     } else {
01852         goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
01853     }
01854 
01855     ptr = this_thr->th.th_free_lists[index].th_free_list_self;
01856     if ( ptr != NULL ) {
01857         // pop the head of no-sync free list
01858         this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
01859         KMP_DEBUG_ASSERT( this_thr ==
01860             ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
01861         goto end;
01862     };
01863     ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
01864     if ( ptr != NULL ) {
01865         // no-sync free list is empty, use sync free list (filled in by other threads only)
01866         // pop the head of the sync free list, push NULL instead
01867         while ( ! KMP_COMPARE_AND_STORE_PTR(
01868             &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) )
01869         {
01870             KMP_CPU_PAUSE();
01871             ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
01872         }
01873         // push the rest of chain into no-sync free list (can be NULL if there was the only block)
01874         this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
01875         KMP_DEBUG_ASSERT( this_thr ==
01876             ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
01877         goto end;
01878     }
01879 
01880     alloc_call:
01881     // haven't found block in the free lists, thus allocate it
01882     size = num_lines * DCACHE_LINE;
01883 
01884     alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE;
01885     KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n",
01886                    __kmp_gtid_from_thread( this_thr ), alloc_size ) );
01887     alloc_ptr = bget( this_thr, (bufsize) alloc_size );
01888 
01889     // align ptr to DCACHE_LINE
01890     ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 ));
01891     descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
01892 
01893     descr->ptr_allocated = alloc_ptr;        // remember allocated pointer
01894     // we don't need size_allocated
01895     descr->ptr_aligned   = (void *)this_thr; // remember allocating thread
01896                                              // (it is already saved in bget buffer,
01897                                              // but we may want to use another allocator in future)
01898     descr->size_aligned  = size;
01899 
01900     end:
01901     KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n",
01902                     __kmp_gtid_from_thread( this_thr ), ptr ) );
01903     return ptr;
01904 } // func __kmp_fast_allocate
01905 
01906 // Free fast memory and place it on the thread's free list if it is of
01907 // the correct size.
01908 void
01909 ___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL )
01910 {
01911     kmp_mem_descr_t * descr;
01912     kmp_info_t      * alloc_thr;
01913     size_t            size;
01914     size_t            idx;
01915     int               index;
01916 
01917     KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
01918       __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) );
01919     KMP_ASSERT( ptr != NULL );
01920 
01921     descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
01922 
01923     KE_TRACE(26, ("   __kmp_fast_free:     size_aligned=%d\n",
01924                   (int) descr->size_aligned ) );
01925 
01926     size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
01927 
01928     idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
01929     if ( idx == size ) {
01930         index = 0;       // 2 cache lines
01931     } else if ( ( idx <<= 1 ) == size ) {
01932         index = 1;       // 4 cache lines
01933     } else if ( ( idx <<= 2 ) == size ) {
01934         index = 2;       // 16 cache lines
01935     } else if ( ( idx <<= 2 ) == size ) {
01936         index = 3;       // 64 cache lines
01937     } else {
01938         KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 );
01939         goto free_call;  // 65 or more cache lines ( > 8KB )
01940     }
01941 
01942     alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
01943     if ( alloc_thr == this_thr ) {
01944         // push block to self no-sync free list, linking previous head (LIFO)
01945         *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
01946         this_thr->th.th_free_lists[index].th_free_list_self = ptr;
01947     } else {
01948         void * head = this_thr->th.th_free_lists[index].th_free_list_other;
01949         if ( head == NULL ) {
01950             // Create new free list
01951             this_thr->th.th_free_lists[index].th_free_list_other = ptr;
01952             *((void **)ptr) = NULL;             // mark the tail of the list
01953             descr->size_allocated = (size_t)1;  // head of the list keeps its length
01954         } else {
01955             // need to check existed "other" list's owner thread and size of queue
01956             kmp_mem_descr_t * dsc  = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) );
01957             kmp_info_t      * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes
01958             size_t            q_sz = dsc->size_allocated + 1;          // new size in case we add current task
01959             if ( q_th == alloc_thr && q_sz <= __kmp_free_list_limit ) {
01960                 // we can add current task to "other" list, no sync needed
01961                 *((void **)ptr) = head;
01962                 descr->size_allocated = q_sz;
01963                 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
01964             } else {
01965                 // either queue blocks owner is changing or size limit exceeded
01966                 // return old queue to allocating thread (q_th) synchroneously,
01967                 // and start new list for alloc_thr's tasks
01968                 void * old_ptr;
01969                 void * tail = head;
01970                 void * next = *((void **)head);
01971                 while ( next != NULL ) {
01972                     KMP_DEBUG_ASSERT(
01973                         // queue size should decrease by 1 each step through the list
01974                         ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 ==
01975                         ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated );
01976                     tail = next;   // remember tail node
01977                     next = *((void **)next);
01978                 }
01979 
01980                 KMP_DEBUG_ASSERT( q_th != NULL );
01981                 // push block to owner's sync free list
01982                 old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
01983                 /* the next pointer must be set before setting free_list to ptr to avoid
01984                    exposing a broken list to other threads, even for an instant. */
01985                 *((void **)tail) = old_ptr;
01986 
01987                 while ( ! KMP_COMPARE_AND_STORE_PTR(
01988                     &q_th->th.th_free_lists[index].th_free_list_sync,
01989                     old_ptr,
01990                     head ) )
01991                 {
01992                     KMP_CPU_PAUSE();
01993                     old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
01994                     *((void **)tail) = old_ptr;
01995                 }
01996 
01997                 // start new list of not-selt tasks
01998                 this_thr->th.th_free_lists[index].th_free_list_other = ptr;
01999                 *((void **)ptr) = NULL;
02000                 descr->size_allocated = (size_t)1;  // head of queue keeps its length
02001             }
02002         }
02003     }
02004     goto end;
02005 
02006     free_call:
02007     KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
02008                    __kmp_gtid_from_thread( this_thr), size ) );
02009     __kmp_bget_dequeue( this_thr );         /* Release any queued buffers */
02010     brel( this_thr, descr->ptr_allocated );
02011 
02012     end:
02013     KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) );
02014 
02015 } // func __kmp_fast_free
02016 
02017 
02018 // Initialize the thread free lists related to fast memory
02019 // Only do this when a thread is initially created.
02020 void
02021 __kmp_initialize_fast_memory( kmp_info_t *this_thr )
02022 {
02023     KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) );
02024 
02025     memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) );
02026 }
02027 
02028 // Free the memory in the thread free lists related to fast memory
02029 // Only do this when a thread is being reaped (destroyed).
02030 void
02031 __kmp_free_fast_memory( kmp_info_t *th )
02032 {
02033     // Suppose we use BGET underlying allocator, walk through its structures...
02034     int          bin;
02035     thr_data_t * thr = get_thr_data( th );
02036     void      ** lst = NULL;
02037 
02038     KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n",
02039                    __kmp_gtid_from_thread( th ) ) );
02040 
02041     __kmp_bget_dequeue( th );         // Release any queued buffers
02042 
02043     // Dig through free lists and extract all allocated blocks
02044     for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) {
02045         bfhead_t * b = thr->freelist[ bin ].ql.flink;
02046     while ( b != &thr->freelist[ bin ] ) {
02047             if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) {   // if the buffer is an allocated address?
02048                 *((void**)b) = lst;   // link the list (override bthr, but keep flink yet)
02049                 lst = (void**)b;      // push b into lst
02050             }
02051             b = b->ql.flink;          // get next buffer
02052     }
02053     }
02054     while ( lst != NULL ) {
02055         void * next = *lst;
02056         KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
02057                       lst, next, th, __kmp_gtid_from_thread( th ) ) );
02058         (*thr->relfcn)(lst);
02059         #if BufStats
02060             // count blocks to prevent problems in __kmp_finalize_bget()
02061             thr->numprel++;       /* Nr of expansion block releases */
02062             thr->numpblk--;       /* Total number of blocks */
02063         #endif
02064         lst = (void**)next;
02065     }
02066 
02067     KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n",
02068                   __kmp_gtid_from_thread( th ) ) );
02069 }
02070 
02071 #endif // USE_FAST_MEMORY

Generated on 25 Aug 2013 for libomp_oss by  doxygen 1.6.1