Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_alloc.c
1 /*
2  * kmp_alloc.c -- private/shared dyanmic memory allocation and management
3  * $Revision: 42489 $
4  * $Date: 2013-07-08 11:00:09 -0500 (Mon, 08 Jul 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2013 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include "kmp.h"
38 #include "kmp_wrapper_malloc.h"
39 #include "kmp_io.h"
40 
41 // Disable bget when it is not used
42 #if KMP_USE_BGET
43 
44 /* Thread private buffer management code */
45 
46 typedef int (*bget_compact_t)(size_t, int);
47 typedef void *(*bget_acquire_t)(size_t);
48 typedef void (*bget_release_t)(void *);
49 
50 /* NOTE: bufsize must be a signed datatype */
51 
52 #if KMP_OS_WINDOWS
53 # if KMP_ARCH_X86
54  typedef kmp_int32 bufsize;
55 # else
56  typedef kmp_int64 bufsize;
57 # endif
58 #else
59  typedef ssize_t bufsize;
60 #endif
61 
62 /* The three modes of operation are, fifo search, lifo search, and best-fit */
63 
64 typedef enum bget_mode {
65  bget_mode_fifo = 0,
66  bget_mode_lifo = 1,
67  bget_mode_best = 2
68 } bget_mode_t;
69 
70 
71 static void bpool( kmp_info_t *th, void *buffer, bufsize len);
72 static void *bget( kmp_info_t *th, bufsize size);
73 static void *bgetz( kmp_info_t *th, bufsize size);
74 static void *bgetr( kmp_info_t *th, void *buffer, bufsize newsize);
75 static void brel( kmp_info_t *th, void *buf);
76 static void bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr );
77 
78 #ifdef KMP_DEBUG
79 static void bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel);
80 static void bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel);
81 static void bufdump( kmp_info_t *th, void *buf);
82 static void bpoold( kmp_info_t *th, void *pool, int dumpalloc, int dumpfree);
83 static int bpoolv( kmp_info_t *th, void *pool);
84 #endif
85 
86 /* BGET CONFIGURATION */
87  /* Buffer allocation size quantum:
88  all buffers allocated are a
89  multiple of this size. This
90  MUST be a power of two. */
91 
92  /* On IA-32 architecture with Linux* OS,
93  malloc() does not
94  ensure 16 byte alignmnent */
95 
96 #if KMP_ARCH_X86
97 
98 #define SizeQuant 8
99 #define AlignType double
100 
101 #else
102 
103 #define SizeQuant 16
104 #define AlignType _Quad
105 
106 #endif
107 
108 #define BufStats 1 /* Define this symbol to enable the
109  bstats() function which calculates
110  the total free space in the buffer
111  pool, the largest available
112  buffer, and the total space
113  currently allocated. */
114 
115 #ifdef KMP_DEBUG
116 
117 #define BufDump 1 /* Define this symbol to enable the
118  bpoold() function which dumps the
119  buffers in a buffer pool. */
120 
121 #define BufValid 1 /* Define this symbol to enable the
122  bpoolv() function for validating
123  a buffer pool. */
124 
125 #define DumpData 1 /* Define this symbol to enable the
126  bufdump() function which allows
127  dumping the contents of an allocated
128  or free buffer. */
129 #ifdef NOT_USED_NOW
130 
131 #define FreeWipe 1 /* Wipe free buffers to a guaranteed
132  pattern of garbage to trip up
133  miscreants who attempt to use
134  pointers into released buffers. */
135 
136 #define BestFit 1 /* Use a best fit algorithm when
137  searching for space for an
138  allocation request. This uses
139  memory more efficiently, but
140  allocation will be much slower. */
141 #endif /* NOT_USED_NOW */
142 #endif /* KMP_DEBUG */
143 
144 
145 static bufsize bget_bin_size[ ] = {
146  0,
147 // 1 << 6, /* .5 Cache line */
148  1 << 7, /* 1 Cache line, new */
149  1 << 8, /* 2 Cache lines */
150  1 << 9, /* 4 Cache lines, new */
151  1 << 10, /* 8 Cache lines */
152  1 << 11, /* 16 Cache lines, new */
153  1 << 12,
154  1 << 13, /* new */
155  1 << 14,
156  1 << 15, /* new */
157  1 << 16,
158  1 << 17,
159  1 << 18,
160  1 << 19,
161  1 << 20, /* 1MB */
162  1 << 21, /* 2MB */
163  1 << 22, /* 4MB */
164  1 << 23, /* 8MB */
165  1 << 24, /* 16MB */
166  1 << 25, /* 32MB */
167 };
168 
169 #define MAX_BGET_BINS (sizeof(bget_bin_size) / sizeof(bufsize))
170 
171 struct bfhead;
172 
173 /* Declare the interface, including the requested buffer size type,
174  bufsize. */
175 
176 /* Queue links */
177 
178 typedef struct qlinks {
179  struct bfhead *flink; /* Forward link */
180  struct bfhead *blink; /* Backward link */
181 } qlinks_t;
182 
183 /* Header in allocated and free buffers */
184 
185 typedef struct bhead2 {
186  kmp_info_t *bthr; /* The thread which owns the buffer pool */
187  bufsize prevfree; /* Relative link back to previous
188  free buffer in memory or 0 if
189  previous buffer is allocated. */
190  bufsize bsize; /* Buffer size: positive if free,
191  negative if allocated. */
192 } bhead2_t;
193 
194 /* Make sure the bhead structure is a multiple of SizeQuant in size. */
195 
196 typedef union bhead {
197  KMP_ALIGN( SizeQuant )
198  AlignType b_align;
199  char b_pad[ sizeof(bhead2_t) + (SizeQuant - (sizeof(bhead2_t) % SizeQuant)) ];
200  bhead2_t bb;
201 } bhead_t;
202 #define BH(p) ((bhead_t *) (p))
203 
204 /* Header in directly allocated buffers (by acqfcn) */
205 
206 typedef struct bdhead
207 {
208  bufsize tsize; /* Total size, including overhead */
209  bhead_t bh; /* Common header */
210 } bdhead_t;
211 #define BDH(p) ((bdhead_t *) (p))
212 
213 /* Header in free buffers */
214 
215 typedef struct bfhead {
216  bhead_t bh; /* Common allocated/free header */
217  qlinks_t ql; /* Links on free list */
218 } bfhead_t;
219 #define BFH(p) ((bfhead_t *) (p))
220 
221 typedef struct thr_data {
222  bfhead_t freelist[ MAX_BGET_BINS ];
223 #if BufStats
224  size_t totalloc; /* Total space currently allocated */
225  long numget, numrel; /* Number of bget() and brel() calls */
226  long numpblk; /* Number of pool blocks */
227  long numpget, numprel; /* Number of block gets and rels */
228  long numdget, numdrel; /* Number of direct gets and rels */
229 #endif /* BufStats */
230 
231  /* Automatic expansion block management functions */
232  bget_compact_t compfcn;
233  bget_acquire_t acqfcn;
234  bget_release_t relfcn;
235 
236  bget_mode_t mode; /* what allocation mode to use? */
237 
238  bufsize exp_incr; /* Expansion block size */
239  bufsize pool_len; /* 0: no bpool calls have been made
240  -1: not all pool blocks are
241  the same size
242  >0: (common) block size for all
243  bpool calls made so far
244  */
245  bfhead_t * last_pool; /* Last pool owned by this thread (delay dealocation) */
246 } thr_data_t;
247 
248 /* Minimum allocation quantum: */
249 
250 #define QLSize (sizeof(qlinks_t))
251 #define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize)
252 #define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) )
253  // Maximun for the requested size.
254 
255 /* End sentinel: value placed in bsize field of dummy block delimiting
256  end of pool block. The most negative number which will fit in a
257  bufsize, defined in a way that the compiler will accept. */
258 
259 #define ESent ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2))
260 
261 /* ------------------------------------------------------------------------ */
262 
263 /* Thread Data management routines */
264 
265 static int
266 bget_get_bin( bufsize size )
267 {
268  // binary chop bins
269  int lo = 0, hi = MAX_BGET_BINS - 1;
270 
271  KMP_DEBUG_ASSERT( size > 0 );
272 
273  while ( (hi - lo) > 1 ) {
274  int mid = (lo + hi) >> 1;
275  if (size < bget_bin_size[ mid ])
276  hi = mid - 1;
277  else
278  lo = mid;
279  }
280 
281  KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) );
282 
283  return lo;
284 }
285 
286 static void
287 set_thr_data( kmp_info_t *th )
288 {
289  int i;
290  thr_data_t *data;
291 
292  data =
293  (thr_data_t *)(
294  ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data
295  );
296 
297  memset( data, '\0', sizeof( *data ) );
298 
299  for (i = 0; i < MAX_BGET_BINS; ++i) {
300  data->freelist[ i ].ql.flink = & data->freelist[ i ];
301  data->freelist[ i ].ql.blink = & data->freelist[ i ];
302  }
303 
304  th->th.th_local.bget_data = data;
305  th->th.th_local.bget_list = 0;
306 #if ! USE_CMP_XCHG_FOR_BGET
307 #ifdef USE_QUEUING_LOCK_FOR_BGET
308  __kmp_init_lock( & th->th.th_local.bget_lock );
309 #else
310  __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock );
311 #endif /* USE_LOCK_FOR_BGET */
312 #endif /* ! USE_CMP_XCHG_FOR_BGET */
313 }
314 
315 static thr_data_t *
316 get_thr_data( kmp_info_t *th )
317 {
318  thr_data_t *data;
319 
320  data = (thr_data_t *) th->th.th_local.bget_data;
321 
322  KMP_DEBUG_ASSERT( data != 0 );
323 
324  return data;
325 }
326 
327 
328 #ifdef KMP_DEBUG
329 
330 static void
331 __kmp_bget_validate_queue( kmp_info_t *th )
332 {
333  /* NOTE: assume that the global_lock is held */
334 
335  void *p = (void *) th->th.th_local.bget_list;
336 
337  while (p != 0) {
338  bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
339 
340  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
341  p = (void *) b->ql.flink;
342  }
343 }
344 
345 #endif
346 
347 /* Walk the free list and release the enqueued buffers */
348 
349 static void
350 __kmp_bget_dequeue( kmp_info_t *th )
351 {
352  void *p = TCR_SYNC_PTR(th->th.th_local.bget_list);
353 
354  if (p != 0) {
355  #if USE_CMP_XCHG_FOR_BGET
356  {
357  volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
358  while ( ! KMP_COMPARE_AND_STORE_PTR(
359  & th->th.th_local.bget_list, old_value, NULL ) )
360  {
361  KMP_CPU_PAUSE();
362  old_value = TCR_SYNC_PTR(th->th.th_local.bget_list);
363  }
364  p = (void *) old_value;
365  }
366  #else /* ! USE_CMP_XCHG_FOR_BGET */
367  #ifdef USE_QUEUING_LOCK_FOR_BGET
368  __kmp_acquire_lock( & th->th.th_local.bget_lock,
369  __kmp_gtid_from_thread(th) );
370  #else
371  __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
372  #endif /* USE_QUEUING_LOCK_FOR_BGET */
373 
374  p = (void *) th->th.th_local.bget_list;
375  th->th.th_local.bget_list = 0;
376 
377  #ifdef USE_QUEUING_LOCK_FOR_BGET
378  __kmp_release_lock( & th->th.th_local.bget_lock,
379  __kmp_gtid_from_thread(th) );
380  #else
381  __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
382  #endif
383  #endif /* USE_CMP_XCHG_FOR_BGET */
384 
385  /* Check again to make sure the list is not empty */
386 
387  while (p != 0) {
388  void *buf = p;
389  bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t));
390 
391  KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
392  KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
393  (kmp_uintptr_t)th ); // clear possible mark
394  KMP_DEBUG_ASSERT( b->ql.blink == 0 );
395 
396  p = (void *) b->ql.flink;
397 
398  brel( th, buf );
399  }
400  }
401 }
402 
403 /* Chain together the free buffers by using the thread owner field */
404 
405 static void
406 __kmp_bget_enqueue( kmp_info_t *th, void *buf
407 #ifdef USE_QUEUING_LOCK_FOR_BGET
408  , kmp_int32 rel_gtid
409 #endif
410  )
411 {
412  bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t));
413 
414  KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 );
415  KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) ==
416  (kmp_uintptr_t)th ); // clear possible mark
417 
418  b->ql.blink = 0;
419 
420  KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n",
421  __kmp_gtid_from_thread( th ) ) );
422 
423 #if USE_CMP_XCHG_FOR_BGET
424  {
425  volatile void *old_value = TCR_PTR(th->th.th_local.bget_list);
426  /* the next pointer must be set before setting bget_list to buf to avoid
427  exposing a broken list to other threads, even for an instant. */
428  b->ql.flink = BFH( old_value );
429 
430 # ifdef USE_ITT
431  KMP_FSYNC_RELEASING(&th->th.th_local.bget_list);
432 # endif /* defined(USE_ITT) */
433 
434  while ( ! KMP_COMPARE_AND_STORE_PTR(
435  & th->th.th_local.bget_list, old_value, buf ) )
436  {
437  KMP_CPU_PAUSE();
438  old_value = TCR_PTR(th->th.th_local.bget_list);
439  /* the next pointer must be set before setting bget_list to buf to avoid
440  exposing a broken list to other threads, even for an instant. */
441  b->ql.flink = BFH( old_value );
442 # ifdef USE_ITT
443  KMP_FSYNC_RELEASING(&th->th.th_local.bget_list);
444 # endif /* defined(USE_ITT) */
445  }
446  }
447 #else /* ! USE_CMP_XCHG_FOR_BGET */
448 # ifdef USE_QUEUING_LOCK_FOR_BGET
449  __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid );
450 # else
451  __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock );
452  # endif
453 
454  b->ql.flink = BFH( th->th.th_local.bget_list );
455  th->th.th_local.bget_list = (void *) buf;
456 
457 # ifdef USE_QUEUING_LOCK_FOR_BGET
458  __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid );
459 # else
460  __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock );
461 # endif
462 #endif /* USE_CMP_XCHG_FOR_BGET */
463 }
464 
465 /* insert buffer back onto a new freelist */
466 
467 static void
468 __kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b )
469 {
470  int bin;
471 
472  KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 );
473  KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 );
474 
475  bin = bget_get_bin( b->bh.bb.bsize );
476 
477  KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]);
478  KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]);
479 
480  b->ql.flink = &thr->freelist[ bin ];
481  b->ql.blink = thr->freelist[ bin ].ql.blink;
482 
483  thr->freelist[ bin ].ql.blink = b;
484  b->ql.blink->ql.flink = b;
485 }
486 
487 /* unlink the buffer from the old freelist */
488 
489 static void
490 __kmp_bget_remove_from_freelist( bfhead_t *b )
491 {
492  KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b);
493  KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b);
494 
495  b->ql.blink->ql.flink = b->ql.flink;
496  b->ql.flink->ql.blink = b->ql.blink;
497 }
498 
499 /* ------------------------------------------------------------------------ */
500 
501 /* GET STATS -- check info on free list */
502 
503 static void
504 bcheck( kmp_info_t *th, bufsize *max_free, bufsize *total_free )
505 {
506  thr_data_t *thr = get_thr_data( th );
507  int bin;
508 
509  *total_free = *max_free = 0;
510 
511  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
512  bfhead_t *b, *best;
513 
514  best = &thr->freelist[ bin ];
515  b = best->ql.flink;
516 
517  while (b != &thr->freelist[ bin ]) {
518  *total_free += (b->bh.bb.bsize - sizeof( bhead_t ));
519  if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize))
520  best = b;
521 
522  /* Link to next buffer */
523  b = b->ql.flink;
524  }
525 
526  if (*max_free < best->bh.bb.bsize)
527  *max_free = best->bh.bb.bsize;
528  }
529 
530  if (*max_free > sizeof( bhead_t ))
531  *max_free -= sizeof( bhead_t );
532 }
533 
534 /* ------------------------------------------------------------------------ */
535 
536 /* BGET -- Allocate a buffer. */
537 
538 static void *
539 bget( kmp_info_t *th, bufsize requested_size )
540 {
541  thr_data_t *thr = get_thr_data( th );
542  bufsize size = requested_size;
543  bfhead_t *b;
544  void *buf;
545  int compactseq = 0;
546  int use_blink = 0;
547 /* For BestFit */
548  bfhead_t *best;
549 
550  if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) {
551  return NULL;
552  }; // if
553 
554  __kmp_bget_dequeue( th ); /* Release any queued buffers */
555 
556  if (size < SizeQ) { /* Need at least room for the */
557  size = SizeQ; /* queue links. */
558  }
559  #if defined( SizeQuant ) && ( SizeQuant > 1 )
560  size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1));
561  #endif
562 
563  size += sizeof(bhead_t); /* Add overhead in allocated buffer
564  to size required. */
565  KMP_DEBUG_ASSERT( size >= 0 );
566  KMP_DEBUG_ASSERT( size % SizeQuant == 0 );
567 
568  use_blink = ( thr->mode == bget_mode_lifo );
569 
570  /* If a compact function was provided in the call to bectl(), wrap
571  a loop around the allocation process to allow compaction to
572  intervene in case we don't find a suitable buffer in the chain. */
573 
574  for (;;) {
575  int bin;
576 
577  for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) {
578  /* Link to next buffer */
579  b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink );
580 
581  if (thr->mode == bget_mode_best) {
582  best = &thr->freelist[ bin ];
583 
584  /* Scan the free list searching for the first buffer big enough
585  to hold the requested size buffer. */
586 
587  while (b != &thr->freelist[ bin ]) {
588  if (b->bh.bb.bsize >= (bufsize) size) {
589  if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) {
590  best = b;
591  }
592  }
593 
594  /* Link to next buffer */
595  b = ( use_blink ? b->ql.blink : b->ql.flink );
596  }
597  b = best;
598  }
599 
600  while (b != &thr->freelist[ bin ]) {
601  if ((bufsize) b->bh.bb.bsize >= (bufsize) size) {
602 
603  /* Buffer is big enough to satisfy the request. Allocate it
604  to the caller. We must decide whether the buffer is large
605  enough to split into the part given to the caller and a
606  free buffer that remains on the free list, or whether the
607  entire buffer should be removed from the free list and
608  given to the caller in its entirety. We only split the
609  buffer if enough room remains for a header plus the minimum
610  quantum of allocation. */
611 
612  if ((b->bh.bb.bsize - (bufsize) size) > (SizeQ + (sizeof(bhead_t)))) {
613  bhead_t *ba, *bn;
614 
615  ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size));
616  bn = BH(((char *) ba) + size);
617 
618  KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize);
619 
620  /* Subtract size from length of free block. */
621  b->bh.bb.bsize -= (bufsize) size;
622 
623  /* Link allocated buffer to the previous free buffer. */
624  ba->bb.prevfree = b->bh.bb.bsize;
625 
626  /* Plug negative size into user buffer. */
627  ba->bb.bsize = -size;
628 
629  /* Mark this buffer as owned by this thread. */
630  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
631  /* Mark buffer after this one not preceded by free block. */
632  bn->bb.prevfree = 0;
633 
634  /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
635  __kmp_bget_remove_from_freelist( b );
636  __kmp_bget_insert_into_freelist( thr, b );
637 #if BufStats
638  thr->totalloc += (size_t) size;
639  thr->numget++; /* Increment number of bget() calls */
640 #endif
641  buf = (void *) ((((char *) ba) + sizeof(bhead_t)));
642  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
643  return buf;
644  } else {
645  bhead_t *ba;
646 
647  ba = BH(((char *) b) + b->bh.bb.bsize);
648 
649  KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize);
650 
651  /* The buffer isn't big enough to split. Give the whole
652  shebang to the caller and remove it from the free list. */
653 
654  __kmp_bget_remove_from_freelist( b );
655 #if BufStats
656  thr->totalloc += (size_t) b->bh.bb.bsize;
657  thr->numget++; /* Increment number of bget() calls */
658 #endif
659  /* Negate size to mark buffer allocated. */
660  b->bh.bb.bsize = -(b->bh.bb.bsize);
661 
662  /* Mark this buffer as owned by this thread. */
663  TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it)
664  /* Zero the back pointer in the next buffer in memory
665  to indicate that this buffer is allocated. */
666  ba->bb.prevfree = 0;
667 
668  /* Give user buffer starting at queue links. */
669  buf = (void *) &(b->ql);
670  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
671  return buf;
672  }
673  }
674 
675  /* Link to next buffer */
676  b = ( use_blink ? b->ql.blink : b->ql.flink );
677  }
678  }
679 
680  /* We failed to find a buffer. If there's a compact function
681  defined, notify it of the size requested. If it returns
682  TRUE, try the allocation again. */
683 
684  if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) {
685  break;
686  }
687  }
688 
689  /* No buffer available with requested size free. */
690 
691  /* Don't give up yet -- look in the reserve supply. */
692 
693  if (thr->acqfcn != 0) {
694  if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) {
695 
696  /* Request is too large to fit in a single expansion
697  block. Try to satisy it by a direct buffer acquisition. */
698 
699  bdhead_t *bdh;
700 
701  size += sizeof(bdhead_t) - sizeof(bhead_t);
702 
703  KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) );
704 
705  /* richryan */
706  bdh = BDH((*thr->acqfcn)((bufsize) size));
707  if (bdh != NULL) {
708 
709  /* Mark the buffer special by setting the size field
710  of its header to zero. */
711  bdh->bh.bb.bsize = 0;
712 
713  /* Mark this buffer as owned by this thread. */
714  TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated,
715  // because direct buffer never goes to free list
716  bdh->bh.bb.prevfree = 0;
717  bdh->tsize = size;
718 #if BufStats
719  thr->totalloc += (size_t) size;
720  thr->numget++; /* Increment number of bget() calls */
721  thr->numdget++; /* Direct bget() call count */
722 #endif
723  buf = (void *) (bdh + 1);
724  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
725  return buf;
726  }
727 
728  } else {
729 
730  /* Try to obtain a new expansion block */
731 
732  void *newpool;
733 
734  KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) );
735 
736  /* richryan */
737  newpool = (*thr->acqfcn)((bufsize) thr->exp_incr);
738  KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 );
739  if (newpool != NULL) {
740  bpool( th, newpool, thr->exp_incr);
741  buf = bget( th, requested_size); /* This can't, I say, can't get into a loop. */
742  return buf;
743  }
744  }
745  }
746 
747  /* Still no buffer available */
748 
749  return NULL;
750 }
751 
752 /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear
753  the entire contents of the buffer to zero, not just the
754  region requested by the caller. */
755 
756 static void *
757 bgetz( kmp_info_t *th, bufsize size )
758 {
759  char *buf = (char *) bget( th, size);
760 
761  if (buf != NULL) {
762  bhead_t *b;
763  bufsize rsize;
764 
765  b = BH(buf - sizeof(bhead_t));
766  rsize = -(b->bb.bsize);
767  if (rsize == 0) {
768  bdhead_t *bd;
769 
770  bd = BDH(buf - sizeof(bdhead_t));
771  rsize = bd->tsize - (bufsize) sizeof(bdhead_t);
772  } else {
773  rsize -= sizeof(bhead_t);
774  }
775 
776  KMP_DEBUG_ASSERT(rsize >= size);
777 
778  (void) memset(buf, 0, (bufsize) rsize);
779  }
780  return ((void *) buf);
781 }
782 
783 /* BGETR -- Reallocate a buffer. This is a minimal implementation,
784  simply in terms of brel() and bget(). It could be
785  enhanced to allow the buffer to grow into adjacent free
786  blocks and to avoid moving data unnecessarily. */
787 
788 static void *
789 bgetr( kmp_info_t *th, void *buf, bufsize size)
790 {
791  void *nbuf;
792  bufsize osize; /* Old size of buffer */
793  bhead_t *b;
794 
795  nbuf = bget( th, size );
796  if ( nbuf == NULL ) { /* Acquire new buffer */
797  return NULL;
798  }
799  if ( buf == NULL ) {
800  return nbuf;
801  }
802  b = BH(((char *) buf) - sizeof(bhead_t));
803  osize = -b->bb.bsize;
804  if (osize == 0) {
805  /* Buffer acquired directly through acqfcn. */
806  bdhead_t *bd;
807 
808  bd = BDH(((char *) buf) - sizeof(bdhead_t));
809  osize = bd->tsize - (bufsize) sizeof(bdhead_t);
810  } else {
811  osize -= sizeof(bhead_t);
812  };
813 
814  KMP_DEBUG_ASSERT(osize > 0);
815 
816  (void) memcpy((char *) nbuf, (char *) buf, /* Copy the data */
817  (size_t) ((size < osize) ? size : osize));
818  brel( th, buf );
819 
820  return nbuf;
821 }
822 
823 /* BREL -- Release a buffer. */
824 
825 static void
826 brel( kmp_info_t *th, void *buf )
827 {
828  thr_data_t *thr = get_thr_data( th );
829  bfhead_t *b, *bn;
830  kmp_info_t *bth;
831 
832  KMP_DEBUG_ASSERT(buf != NULL);
833  KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 );
834 
835  b = BFH(((char *) buf) - sizeof(bhead_t));
836 
837  if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */
838  bdhead_t *bdh;
839 
840  bdh = BDH(((char *) buf) - sizeof(bdhead_t));
841  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
842 #if BufStats
843  thr->totalloc -= (size_t) bdh->tsize;
844  thr->numdrel++; /* Number of direct releases */
845  thr->numrel++; /* Increment number of brel() calls */
846 #endif /* BufStats */
847 #ifdef FreeWipe
848  (void) memset((char *) buf, 0x55,
849  (size_t) (bdh->tsize - sizeof(bdhead_t)));
850 #endif /* FreeWipe */
851 
852  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) );
853 
854  KMP_DEBUG_ASSERT( thr->relfcn != 0 );
855  (*thr->relfcn)((void *) bdh); /* Release it directly. */
856  return;
857  }
858 
859  bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison
860  if ( bth != th ) {
861  /* Add this buffer to be released by the owning thread later */
862  __kmp_bget_enqueue( bth, buf
863 #ifdef USE_QUEUING_LOCK_FOR_BGET
864  , __kmp_gtid_from_thread( th )
865 #endif
866  );
867  return;
868  }
869 
870  /* Buffer size must be negative, indicating that the buffer is
871  allocated. */
872 
873  if (b->bh.bb.bsize >= 0) {
874  bn = NULL;
875  }
876  KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0);
877 
878  /* Back pointer in next buffer must be zero, indicating the
879  same thing: */
880 
881  KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0);
882 
883 #if BufStats
884  thr->numrel++; /* Increment number of brel() calls */
885  thr->totalloc += (size_t) b->bh.bb.bsize;
886 #endif
887 
888  /* If the back link is nonzero, the previous buffer is free. */
889 
890  if (b->bh.bb.prevfree != 0) {
891  /* The previous buffer is free. Consolidate this buffer with it
892  by adding the length of this buffer to the previous free
893  buffer. Note that we subtract the size in the buffer being
894  released, since it's negative to indicate that the buffer is
895  allocated. */
896 
897  register bufsize size = b->bh.bb.bsize;
898 
899  /* Make the previous buffer the one we're working on. */
900  KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree);
901  b = BFH(((char *) b) - b->bh.bb.prevfree);
902  b->bh.bb.bsize -= size;
903 
904  /* unlink the buffer from the old freelist */
905  __kmp_bget_remove_from_freelist( b );
906  }
907  else {
908  /* The previous buffer isn't allocated. Mark this buffer
909  size as positive (i.e. free) and fall throught to place
910  the buffer on the free list as an isolated free block. */
911 
912  b->bh.bb.bsize = -b->bh.bb.bsize;
913  }
914 
915  /* insert buffer back onto a new freelist */
916  __kmp_bget_insert_into_freelist( thr, b );
917 
918 
919  /* Now we look at the next buffer in memory, located by advancing from
920  the start of this buffer by its size, to see if that buffer is
921  free. If it is, we combine this buffer with the next one in
922  memory, dechaining the second buffer from the free list. */
923 
924  bn = BFH(((char *) b) + b->bh.bb.bsize);
925  if (bn->bh.bb.bsize > 0) {
926 
927  /* The buffer is free. Remove it from the free list and add
928  its size to that of our buffer. */
929 
930  KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize);
931 
932  __kmp_bget_remove_from_freelist( bn );
933 
934  b->bh.bb.bsize += bn->bh.bb.bsize;
935 
936  /* unlink the buffer from the old freelist, and reinsert it into the new freelist */
937 
938  __kmp_bget_remove_from_freelist( b );
939  __kmp_bget_insert_into_freelist( thr, b );
940 
941  /* Finally, advance to the buffer that follows the newly
942  consolidated free block. We must set its backpointer to the
943  head of the consolidated free block. We know the next block
944  must be an allocated block because the process of recombination
945  guarantees that two free blocks will never be contiguous in
946  memory. */
947 
948  bn = BFH(((char *) b) + b->bh.bb.bsize);
949  }
950 #ifdef FreeWipe
951  (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
952  (size_t) (b->bh.bb.bsize - sizeof(bfhead_t)));
953 #endif
954  KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0);
955 
956  /* The next buffer is allocated. Set the backpointer in it to point
957  to this buffer; the previous free buffer in memory. */
958 
959  bn->bh.bb.prevfree = b->bh.bb.bsize;
960 
961  /* If a block-release function is defined, and this free buffer
962  constitutes the entire block, release it. Note that pool_len
963  is defined in such a way that the test will fail unless all
964  pool blocks are the same size. */
965 
966  if (thr->relfcn != 0 &&
967  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
968  {
969 #if BufStats
970  if (thr->numpblk != 1) { /* Do not release the last buffer until finalization time */
971 #endif
972 
973  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
974  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
975  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
976 
977  /* Unlink the buffer from the free list */
978  __kmp_bget_remove_from_freelist( b );
979 
980  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
981 
982  (*thr->relfcn)(b);
983 #if BufStats
984  thr->numprel++; /* Nr of expansion block releases */
985  thr->numpblk--; /* Total number of blocks */
986  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
987 
988  /* avoid leaving stale last_pool pointer around if it is being dealloced */
989  if (thr->last_pool == b) thr->last_pool = 0;
990  }
991  else {
992  thr->last_pool = b;
993  }
994 #endif /* BufStats */
995  }
996 }
997 
998 /* BECTL -- Establish automatic pool expansion control */
999 
1000 static void
1001 bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr)
1002 {
1003  thr_data_t *thr = get_thr_data( th );
1004 
1005  thr->compfcn = compact;
1006  thr->acqfcn = acquire;
1007  thr->relfcn = release;
1008  thr->exp_incr = pool_incr;
1009 }
1010 
1011 /* BPOOL -- Add a region of memory to the buffer pool. */
1012 
1013 static void
1014 bpool( kmp_info_t *th, void *buf, bufsize len)
1015 {
1016 /* int bin = 0; */
1017  thr_data_t *thr = get_thr_data( th );
1018  bfhead_t *b = BFH(buf);
1019  bhead_t *bn;
1020 
1021  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1022 
1023 #ifdef SizeQuant
1024  len &= ~(SizeQuant - 1);
1025 #endif
1026  if (thr->pool_len == 0) {
1027  thr->pool_len = len;
1028  } else if (len != thr->pool_len) {
1029  thr->pool_len = -1;
1030  }
1031 #if BufStats
1032  thr->numpget++; /* Number of block acquisitions */
1033  thr->numpblk++; /* Number of blocks total */
1034  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1035 #endif /* BufStats */
1036 
1037  /* Since the block is initially occupied by a single free buffer,
1038  it had better not be (much) larger than the largest buffer
1039  whose size we can store in bhead.bb.bsize. */
1040 
1041  KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1));
1042 
1043  /* Clear the backpointer at the start of the block to indicate that
1044  there is no free block prior to this one. That blocks
1045  recombination when the first block in memory is released. */
1046 
1047  b->bh.bb.prevfree = 0;
1048 
1049  /* Create a dummy allocated buffer at the end of the pool. This dummy
1050  buffer is seen when a buffer at the end of the pool is released and
1051  blocks recombination of the last buffer with the dummy buffer at
1052  the end. The length in the dummy buffer is set to the largest
1053  negative number to denote the end of the pool for diagnostic
1054  routines (this specific value is not counted on by the actual
1055  allocation and release functions). */
1056 
1057  len -= sizeof(bhead_t);
1058  b->bh.bb.bsize = (bufsize) len;
1059  /* Set the owner of this buffer */
1060  TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address
1061 
1062  /* Chain the new block to the free list. */
1063  __kmp_bget_insert_into_freelist( thr, b );
1064 
1065 #ifdef FreeWipe
1066  (void) memset(((char *) b) + sizeof(bfhead_t), 0x55,
1067  (size_t) (len - sizeof(bfhead_t)));
1068 #endif
1069  bn = BH(((char *) b) + len);
1070  bn->bb.prevfree = (bufsize) len;
1071  /* Definition of ESent assumes two's complement! */
1072  KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) );
1073 
1074  bn->bb.bsize = ESent;
1075 }
1076 
1077 /* ------------------------------------------------------------------------ */
1078 
1079 /* BFREED -- Dump the free lists for this thread. */
1080 
1081 static void
1082 bfreed( kmp_info_t *th )
1083 {
1084  int bin = 0, count = 0;
1085  int gtid = __kmp_gtid_from_thread( th );
1086  thr_data_t *thr = get_thr_data( th );
1087 
1088 #if BufStats
1089  __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \
1090  KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \
1091  " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n",
1092  gtid, (kmp_uint64) thr->totalloc,
1093  (kmp_int64) thr->numget, (kmp_int64) thr->numrel,
1094  (kmp_int64) thr->numpblk,
1095  (kmp_int64) thr->numpget, (kmp_int64) thr->numprel,
1096  (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel );
1097 #endif
1098 
1099  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1100  bfhead_t *b;
1101 
1102  for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) {
1103  bufsize bs = b->bh.bb.bsize;
1104 
1105  KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b );
1106  KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b );
1107  KMP_DEBUG_ASSERT( bs > 0 );
1108 
1109  count += 1;
1110 
1111  __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs );
1112 #ifdef FreeWipe
1113  {
1114  char *lerr = ((char *) b) + sizeof(bfhead_t);
1115  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1116  __kmp_printf_no_lock( "__kmp_printpool: T#%d (Contents of above free block have been overstored.)\n", gtid );
1117  }
1118  }
1119 #endif
1120  }
1121  }
1122 
1123  if (count == 0)
1124  __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid );
1125 }
1126 
1127 /* ------------------------------------------------------------------------ */
1128 
1129 #ifdef KMP_DEBUG
1130 
1131 #if BufStats
1132 
1133 /* BSTATS -- Return buffer allocation free space statistics. */
1134 
1135 static void
1136 bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel)
1137 {
1138  int bin = 0;
1139  thr_data_t *thr = get_thr_data( th );
1140 
1141  *nget = thr->numget;
1142  *nrel = thr->numrel;
1143  *curalloc = (bufsize) thr->totalloc;
1144  *totfree = 0;
1145  *maxfree = -1;
1146 
1147  for (bin = 0; bin < MAX_BGET_BINS; ++bin) {
1148  bfhead_t *b = thr->freelist[ bin ].ql.flink;
1149 
1150  while (b != &thr->freelist[ bin ]) {
1151  KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0);
1152  *totfree += b->bh.bb.bsize;
1153  if (b->bh.bb.bsize > *maxfree) {
1154  *maxfree = b->bh.bb.bsize;
1155  }
1156  b = b->ql.flink; /* Link to next buffer */
1157  }
1158  }
1159 }
1160 
1161 /* BSTATSE -- Return extended statistics */
1162 
1163 static void
1164 bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel)
1165 {
1166  thr_data_t *thr = get_thr_data( th );
1167 
1168  *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr;
1169  *npool = thr->numpblk;
1170  *npget = thr->numpget;
1171  *nprel = thr->numprel;
1172  *ndget = thr->numdget;
1173  *ndrel = thr->numdrel;
1174 }
1175 
1176 #endif /* BufStats */
1177 
1178 /* BUFDUMP -- Dump the data in a buffer. This is called with the user
1179  data pointer, and backs up to the buffer header. It will
1180  dump either a free block or an allocated one. */
1181 
1182 static void
1183 bufdump( kmp_info_t *th, void *buf )
1184 {
1185  bfhead_t *b;
1186  unsigned char *bdump;
1187  bufsize bdlen;
1188 
1189  b = BFH(((char *) buf) - sizeof(bhead_t));
1190  KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0);
1191  if (b->bh.bb.bsize < 0) {
1192  bdump = (unsigned char *) buf;
1193  bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t);
1194  } else {
1195  bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t));
1196  bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t);
1197  }
1198 
1199  while (bdlen > 0) {
1200  int i, dupes = 0;
1201  bufsize l = bdlen;
1202  char bhex[50], bascii[20];
1203 
1204  if (l > 16) {
1205  l = 16;
1206  }
1207 
1208  for (i = 0; i < l; i++) {
1209  (void) sprintf(bhex + i * 3, "%02X ", bdump[i]);
1210  if (bdump[i] > 0x20 && bdump[i] < 0x7F)
1211  bascii[ i ] = bdump[ i ];
1212  else
1213  bascii[ i ] = ' ';
1214  }
1215  bascii[i] = 0;
1216  (void) __kmp_printf_no_lock("%-48s %s\n", bhex, bascii);
1217  bdump += l;
1218  bdlen -= l;
1219  while ((bdlen > 16) && (memcmp((char *) (bdump - 16),
1220  (char *) bdump, 16) == 0)) {
1221  dupes++;
1222  bdump += 16;
1223  bdlen -= 16;
1224  }
1225  if (dupes > 1) {
1226  (void) __kmp_printf_no_lock(
1227  " (%d lines [%d bytes] identical to above line skipped)\n",
1228  dupes, dupes * 16);
1229  } else if (dupes == 1) {
1230  bdump -= 16;
1231  bdlen += 16;
1232  }
1233  }
1234 }
1235 
1236 /* BPOOLD -- Dump a buffer pool. The buffer headers are always listed.
1237  If DUMPALLOC is nonzero, the contents of allocated buffers
1238  are dumped. If DUMPFREE is nonzero, free blocks are
1239  dumped as well. If FreeWipe checking is enabled, free
1240  blocks which have been clobbered will always be dumped. */
1241 
1242 static void
1243 bpoold( kmp_info_t *th, void *buf, int dumpalloc, int dumpfree)
1244 {
1245  bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t));
1246 
1247  while (b->bh.bb.bsize != ESent) {
1248  bufsize bs = b->bh.bb.bsize;
1249 
1250  if (bs < 0) {
1251  bs = -bs;
1252  (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs);
1253  if (dumpalloc) {
1254  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1255  }
1256  } else {
1257  char *lerr = "";
1258 
1259  KMP_DEBUG_ASSERT(bs > 0);
1260  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1261  lerr = " (Bad free list links)";
1262  }
1263  (void) __kmp_printf_no_lock("Free block: size %6ld bytes.%s\n",
1264  (long) bs, lerr);
1265 #ifdef FreeWipe
1266  lerr = ((char *) b) + sizeof(bfhead_t);
1267  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1268  (memcmp(lerr, lerr + 1,
1269  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1270  (void) __kmp_printf_no_lock(
1271  "(Contents of above free block have been overstored.)\n");
1272  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1273  } else
1274 #endif
1275  if (dumpfree) {
1276  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1277  }
1278  }
1279  b = BFH(((char *) b) + bs);
1280  }
1281 }
1282 
1283 /* BPOOLV -- Validate a buffer pool. */
1284 
1285 static int
1286 bpoolv( kmp_info_t *th, void *buf )
1287 {
1288  bfhead_t *b = BFH(buf);
1289 
1290  while (b->bh.bb.bsize != ESent) {
1291  bufsize bs = b->bh.bb.bsize;
1292 
1293  if (bs < 0) {
1294  bs = -bs;
1295  } else {
1296 #ifdef FreeWipe
1297  char *lerr = "";
1298 #endif
1299 
1300  KMP_DEBUG_ASSERT(bs > 0);
1301  if (bs <= 0) {
1302  return 0;
1303  }
1304  if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) {
1305  (void) __kmp_printf_no_lock("Free block: size %6ld bytes. (Bad free list links)\n",
1306  (long) bs);
1307  KMP_DEBUG_ASSERT(0);
1308  return 0;
1309  }
1310 #ifdef FreeWipe
1311  lerr = ((char *) b) + sizeof(bfhead_t);
1312  if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) ||
1313  (memcmp(lerr, lerr + 1,
1314  (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) {
1315  (void) __kmp_printf_no_lock(
1316  "(Contents of above free block have been overstored.)\n");
1317  bufdump( th, (void *) (((char *) b) + sizeof(bhead_t)));
1318  KMP_DEBUG_ASSERT(0);
1319  return 0;
1320  }
1321 #endif /* FreeWipe */
1322  }
1323  b = BFH(((char *) b) + bs);
1324  }
1325  return 1;
1326 }
1327 
1328 #endif /* KMP_DEBUG */
1329 
1330 /* ------------------------------------------------------------------------ */
1331 
1332 void
1333 __kmp_initialize_bget( kmp_info_t *th )
1334 {
1335  KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) );
1336 
1337  set_thr_data( th );
1338 
1339  bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free,
1340  (bufsize) __kmp_malloc_pool_incr );
1341 }
1342 
1343 void
1344 __kmp_finalize_bget( kmp_info_t *th )
1345 {
1346  thr_data_t *thr;
1347  bfhead_t *b;
1348 
1349  KMP_DEBUG_ASSERT( th != 0 );
1350 
1351 #if BufStats
1352  thr = (thr_data_t *) th->th.th_local.bget_data;
1353  KMP_DEBUG_ASSERT( thr != NULL );
1354  b = thr->last_pool;
1355 
1356  /* If a block-release function is defined, and this free buffer
1357  constitutes the entire block, release it. Note that pool_len
1358  is defined in such a way that the test will fail unless all
1359  pool blocks are the same size. */
1360 
1361  /* Deallocate the last pool if one exists because we no longer do it in brel() */
1362  if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 &&
1363  b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t)))
1364  {
1365  KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0);
1366  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent);
1367  KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize);
1368 
1369  /* Unlink the buffer from the free list */
1370  __kmp_bget_remove_from_freelist( b );
1371 
1372  KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) );
1373 
1374  (*thr->relfcn)(b);
1375  thr->numprel++; /* Nr of expansion block releases */
1376  thr->numpblk--; /* Total number of blocks */
1377  KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel);
1378  }
1379 #endif /* BufStats */
1380 
1381  /* Deallocate bget_data */
1382  if ( th->th.th_local.bget_data != NULL ) {
1383  __kmp_free( th->th.th_local.bget_data );
1384  th->th.th_local.bget_data = NULL;
1385  }; // if
1386 }
1387 
1388 void
1389 kmpc_set_poolsize( size_t size )
1390 {
1391  bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc,
1392  (bget_release_t) free, (bufsize) size );
1393 }
1394 
1395 size_t
1396 kmpc_get_poolsize( void )
1397 {
1398  thr_data_t *p;
1399 
1400  p = get_thr_data( __kmp_get_thread() );
1401 
1402  return p->exp_incr;
1403 }
1404 
1405 void
1406 kmpc_set_poolmode( int mode )
1407 {
1408  thr_data_t *p;
1409 
1410  if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) {
1411  p = get_thr_data( __kmp_get_thread() );
1412  p->mode = (bget_mode_t) mode;
1413  }
1414 }
1415 
1416 int
1417 kmpc_get_poolmode( void )
1418 {
1419  thr_data_t *p;
1420 
1421  p = get_thr_data( __kmp_get_thread() );
1422 
1423  return p->mode;
1424 }
1425 
1426 void
1427 kmpc_get_poolstat( size_t *maxmem, size_t *allmem )
1428 {
1429  kmp_info_t *th = __kmp_get_thread();
1430  bufsize a, b;
1431 
1432  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1433 
1434  bcheck( th, &a, &b );
1435 
1436  *maxmem = a;
1437  *allmem = b;
1438 }
1439 
1440 void
1441 kmpc_poolprint( void )
1442 {
1443  kmp_info_t *th = __kmp_get_thread();
1444 
1445  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1446 
1447  bfreed( th );
1448 }
1449 
1450 #endif // #if KMP_USE_BGET
1451 
1452 /* ------------------------------------------------------------------------ */
1453 
1454 void *
1455 kmpc_malloc( size_t size )
1456 {
1457  void * ptr;
1458  ptr = bget( __kmp_entry_thread(), (bufsize) size );
1459 
1460  return ptr;
1461 }
1462 
1463 void *
1464 kmpc_calloc( size_t nelem, size_t elsize )
1465 {
1466  void * ptr;
1467  ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize) );
1468 
1469  return ptr;
1470 }
1471 
1472 void *
1473 kmpc_realloc( void * ptr, size_t size )
1474 {
1475  void * result = NULL;
1476 
1477  if ( ptr == NULL ) {
1478  // If pointer is NULL, realloc behaves like malloc.
1479  result = bget( __kmp_entry_thread(), (bufsize) size );
1480  } else if ( size == 0 ) {
1481  // If size is 0, realloc behaves like free.
1482  // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before.
1483  // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread().
1484  brel( __kmp_get_thread(), ptr );
1485  } else {
1486  result = bgetr( __kmp_entry_thread(), ptr, (bufsize) size );
1487  }; // if
1488 
1489  return result;
1490 }
1491 
1492 /* NOTE: the library must have already been initialized by a previous allocate */
1493 
1494 void
1495 kmpc_free( void * ptr )
1496 {
1497  if ( ! __kmp_init_serial ) {
1498  return;
1499  }; // if
1500  if ( ptr != NULL ) {
1501  kmp_info_t *th = __kmp_get_thread();
1502  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1503  brel( th, ptr );
1504  };
1505 }
1506 
1507 
1508 /* ------------------------------------------------------------------------ */
1509 
1510 void *
1511 ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL )
1512 {
1513  void * ptr;
1514  KE_TRACE( 30, (
1515  "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n",
1516  th,
1517  (int) size
1518  KMP_SRC_LOC_PARM
1519  ) );
1520  ptr = bget( th, (bufsize) size );
1521  KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) );
1522  return ptr;
1523 }
1524 
1525 void *
1526 ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL )
1527 {
1528  void * ptr;
1529  KE_TRACE( 30, (
1530  "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n",
1531  th,
1532  (int) nelem,
1533  (int) elsize
1534  KMP_SRC_LOC_PARM
1535  ) );
1536  ptr = bgetz( th, (bufsize) (nelem * elsize) );
1537  KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) );
1538  return ptr;
1539 }
1540 
1541 void *
1542 ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL )
1543 {
1544  KE_TRACE( 30, (
1545  "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n",
1546  th,
1547  ptr,
1548  (int) size
1549  KMP_SRC_LOC_PARM
1550  ) );
1551  ptr = bgetr( th, ptr, (bufsize) size );
1552  KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) );
1553  return ptr;
1554 }
1555 
1556 void
1557 ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL )
1558 {
1559  KE_TRACE( 30, (
1560  "-> __kmp_thread_free( %p, %p ) called from %s:%d\n",
1561  th,
1562  ptr
1563  KMP_SRC_LOC_PARM
1564  ) );
1565  if ( ptr != NULL ) {
1566  __kmp_bget_dequeue( th ); /* Release any queued buffers */
1567  brel( th, ptr );
1568  }
1569  KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) );
1570 }
1571 
1572 /* ------------------------------------------------------------------------ */
1573 /* ------------------------------------------------------------------------ */
1574 /*
1575  If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it
1576  may be useful for debugging memory corruptions, used freed pointers, etc.
1577 */
1578 /* #define LEAK_MEMORY */
1579 
1580 struct kmp_mem_descr { // Memory block descriptor.
1581  void * ptr_allocated; // Pointer returned by malloc(), subject for free().
1582  size_t size_allocated; // Size of allocated memory block.
1583  void * ptr_aligned; // Pointer to aligned memory, to be used by client code.
1584  size_t size_aligned; // Size of aligned memory block.
1585 };
1586 typedef struct kmp_mem_descr kmp_mem_descr_t;
1587 
1588 /*
1589  Allocate memory on requested boundary, fill allocated memory with 0x00.
1590  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1591  Must use __kmp_free when freeing memory allocated by this routine!
1592  */
1593 static
1594 void *
1595 ___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL )
1596 {
1597  /*
1598  __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to
1599  return properly aligned pointer. Original pointer returned by malloc() and size of allocated
1600  block is saved in descriptor just before the aligned pointer. This information used by
1601  __kmp_free() -- it has to pass to free() original pointer, not aligned one.
1602 
1603  +---------+------------+-----------------------------------+---------+
1604  | padding | descriptor | aligned block | padding |
1605  +---------+------------+-----------------------------------+---------+
1606  ^ ^
1607  | |
1608  | +- Aligned pointer returned to caller
1609  +- Pointer returned by malloc()
1610 
1611  Aligned block is filled with zeros, paddings are filled with 0xEF.
1612  */
1613 
1614  kmp_mem_descr_t descr;
1615  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1616  kmp_uintptr_t addr_aligned; // Aligned address to return to caller.
1617  kmp_uintptr_t addr_descr; // Address of memory block descriptor.
1618 
1619  KE_TRACE( 25, (
1620  "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n",
1621  (int) size,
1622  (int) alignment
1623  KMP_SRC_LOC_PARM
1624  ) );
1625 
1626  KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too
1627  KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) );
1628  // Make sure kmp_uintptr_t is enough to store addresses.
1629 
1630  descr.size_aligned = size;
1631  descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment;
1632 
1633  #if KMP_DEBUG
1634  descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ );
1635  #else
1636  descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM );
1637  #endif
1638  KE_TRACE( 10, (
1639  " malloc( %d ) returned %p\n",
1640  (int) descr.size_allocated,
1641  descr.ptr_allocated
1642  ) );
1643  if ( descr.ptr_allocated == NULL ) {
1644  KMP_FATAL( OutOfHeapMemory );
1645  };
1646 
1647  addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1648  addr_aligned =
1649  ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment )
1650  & ~ ( alignment - 1 );
1651  addr_descr = addr_aligned - sizeof( kmp_mem_descr_t );
1652 
1653  descr.ptr_aligned = (void *) addr_aligned;
1654 
1655  KE_TRACE( 26, (
1656  " ___kmp_allocate_align: "
1657  "ptr_allocated=%p, size_allocated=%d, "
1658  "ptr_aligned=%p, size_aligned=%d\n",
1659  descr.ptr_allocated,
1660  (int) descr.size_allocated,
1661  descr.ptr_aligned,
1662  (int) descr.size_aligned
1663  ) );
1664 
1665  KMP_DEBUG_ASSERT( addr_allocated <= addr_descr );
1666  KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned );
1667  KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1668  KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 );
1669 
1670  #ifdef KMP_DEBUG
1671  memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1672  // Fill allocated memory block with 0xEF.
1673  #endif
1674  memset( descr.ptr_aligned, 0x00, descr.size_aligned );
1675  // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not
1676  // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding
1677  // bytes remain filled with 0xEF in debugging library.)
1678  * ( (kmp_mem_descr_t *) addr_descr ) = descr;
1679 
1680  KMP_MB();
1681 
1682  KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) );
1683  return descr.ptr_aligned;
1684 
1685 } // func ___kmp_allocate_align
1686 
1687 
1688 /*
1689  Allocate memory on cache line boundary, fill allocated memory with 0x00.
1690  Do not call this func directly! Use __kmp_allocate macro instead.
1691  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1692  Must use __kmp_free when freeing memory allocated by this routine!
1693  */
1694 void *
1695 ___kmp_allocate( size_t size KMP_SRC_LOC_DECL )
1696 {
1697 
1698  void * ptr;
1699  KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) );
1700  ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM );
1701  KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) );
1702  return ptr;
1703 
1704 } // func ___kmp_allocate
1705 
1706 #if (BUILD_MEMORY==FIRST_TOUCH)
1707 void *
1708 __kmp_ft_page_allocate(size_t size)
1709 {
1710  void *adr, *aadr;
1711 #if KMP_OS_LINUX
1712  /* TODO: Use this function to get page size everywhere */
1713  int page_size = getpagesize();
1714 #else
1715  /* TODO: Find windows function to get page size and use it everywhere */
1716  int page_size = PAGE_SIZE;
1717 #endif /* KMP_OS_LINUX */
1718 
1719  adr = (void *) __kmp_thread_malloc( __kmp_get_thread(),
1720  size + page_size + KMP_PTR_SKIP);
1721  if ( adr == 0 )
1722  KMP_FATAL( OutOfHeapMemory );
1723 
1724  /* check to see if adr is on a page boundary. */
1725  if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0)
1726  /* nothing to do if adr is already on a page boundary. */
1727  aadr = adr;
1728  else
1729  /* else set aadr to the first page boundary in the allocated memory. */
1730  aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) );
1731 
1732  /* the first touch by the owner thread. */
1733  *((void**)aadr) = adr;
1734 
1735  /* skip the memory space used for storing adr above. */
1736  return (void*)((char*)aadr + KMP_PTR_SKIP);
1737 }
1738 #endif
1739 
1740 /*
1741  Allocate memory on page boundary, fill allocated memory with 0x00.
1742  Does not call this func directly! Use __kmp_page_allocate macro instead.
1743  NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error.
1744  Must use __kmp_free when freeing memory allocated by this routine!
1745  */
1746 void *
1747 ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL )
1748 {
1749  int page_size = 8 * 1024;
1750  void * ptr;
1751 
1752  KE_TRACE( 25, (
1753  "-> __kmp_page_allocate( %d ) called from %s:%d\n",
1754  (int) size
1755  KMP_SRC_LOC_PARM
1756  ) );
1757  ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM );
1758  KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) );
1759  return ptr;
1760 } // ___kmp_page_allocate
1761 
1762 /*
1763  Free memory allocated by __kmp_allocate() and __kmp_page_allocate().
1764  In debug mode, fill the memory block with 0xEF before call to free().
1765 */
1766 void
1767 ___kmp_free( void * ptr KMP_SRC_LOC_DECL )
1768 {
1769 
1770  kmp_mem_descr_t descr;
1771  kmp_uintptr_t addr_allocated; // Address returned by malloc().
1772  kmp_uintptr_t addr_aligned; // Aligned address passed by caller.
1773 
1774  KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) );
1775  KMP_ASSERT( ptr != NULL );
1776 
1777  descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) );
1778 
1779  KE_TRACE( 26, ( " __kmp_free: "
1780  "ptr_allocated=%p, size_allocated=%d, "
1781  "ptr_aligned=%p, size_aligned=%d\n",
1782  descr.ptr_allocated, (int) descr.size_allocated,
1783  descr.ptr_aligned, (int) descr.size_aligned ));
1784 
1785  addr_allocated = (kmp_uintptr_t) descr.ptr_allocated;
1786  addr_aligned = (kmp_uintptr_t) descr.ptr_aligned;
1787 
1788  KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 );
1789  KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr );
1790  KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned );
1791  KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated );
1792  KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated );
1793 
1794  #ifdef KMP_DEBUG
1795  memset( descr.ptr_allocated, 0xEF, descr.size_allocated );
1796  // Fill memory block with 0xEF, it helps catch using freed memory.
1797  #endif
1798 
1799  #ifndef LEAK_MEMORY
1800  KE_TRACE( 10, ( " free( %p )\n", descr.ptr_allocated ) );
1801  free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM );
1802  #endif
1803 
1804  KMP_MB();
1805 
1806  KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) );
1807 
1808 } // func ___kmp_free
1809 
1810 /* ------------------------------------------------------------------------ */
1811 /* ------------------------------------------------------------------------ */
1812 
1813 #if USE_FAST_MEMORY == 3
1814 // Allocate fast memory by first scanning the thread's free lists
1815 // If a chunk the right size exists, grab it off the free list.
1816 // Otherwise allocate normally using kmp_thread_malloc.
1817 
1818 // AC: How to choose the limit? Just get 16 for now...
1819 static int const __kmp_free_list_limit = 16;
1820 
1821 // Always use 128 bytes for determining buckets for caching memory blocks
1822 #define DCACHE_LINE 128
1823 
1824 void *
1825 ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL )
1826 {
1827  void * ptr;
1828  int num_lines;
1829  int idx;
1830  int index;
1831  void * alloc_ptr;
1832  size_t alloc_size;
1833  kmp_mem_descr_t * descr;
1834 
1835  KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n",
1836  __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) );
1837 
1838  num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE;
1839  idx = num_lines - 1;
1840  KMP_DEBUG_ASSERT( idx >= 0 );
1841  if ( idx < 2 ) {
1842  index = 0; // idx is [ 0, 1 ], use first free list
1843  num_lines = 2; // 1, 2 cache lines or less than cache line
1844  } else if ( ( idx >>= 2 ) == 0 ) {
1845  index = 1; // idx is [ 2, 3 ], use second free list
1846  num_lines = 4; // 3, 4 cache lines
1847  } else if ( ( idx >>= 2 ) == 0 ) {
1848  index = 2; // idx is [ 4, 15 ], use third free list
1849  num_lines = 16; // 5, 6, ..., 16 cache lines
1850  } else if ( ( idx >>= 2 ) == 0 ) {
1851  index = 3; // idx is [ 16, 63 ], use fourth free list
1852  num_lines = 64; // 17, 18, ..., 64 cache lines
1853  } else {
1854  goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists
1855  }
1856 
1857  ptr = this_thr->th.th_free_lists[index].th_free_list_self;
1858  if ( ptr != NULL ) {
1859  // pop the head of no-sync free list
1860  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1861  KMP_DEBUG_ASSERT( this_thr ==
1862  ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1863  goto end;
1864  };
1865  ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1866  if ( ptr != NULL ) {
1867  // no-sync free list is empty, use sync free list (filled in by other threads only)
1868  // pop the head of the sync free list, push NULL instead
1869  while ( ! KMP_COMPARE_AND_STORE_PTR(
1870  &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) )
1871  {
1872  KMP_CPU_PAUSE();
1873  ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync );
1874  }
1875  // push the rest of chain into no-sync free list (can be NULL if there was the only block)
1876  this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr);
1877  KMP_DEBUG_ASSERT( this_thr ==
1878  ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned );
1879  goto end;
1880  }
1881 
1882  alloc_call:
1883  // haven't found block in the free lists, thus allocate it
1884  size = num_lines * DCACHE_LINE;
1885 
1886  alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE;
1887  KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n",
1888  __kmp_gtid_from_thread( this_thr ), alloc_size ) );
1889  alloc_ptr = bget( this_thr, (bufsize) alloc_size );
1890 
1891  // align ptr to DCACHE_LINE
1892  ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 ));
1893  descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1894 
1895  descr->ptr_allocated = alloc_ptr; // remember allocated pointer
1896  // we don't need size_allocated
1897  descr->ptr_aligned = (void *)this_thr; // remember allocating thread
1898  // (it is already saved in bget buffer,
1899  // but we may want to use another allocator in future)
1900  descr->size_aligned = size;
1901 
1902  end:
1903  KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n",
1904  __kmp_gtid_from_thread( this_thr ), ptr ) );
1905  return ptr;
1906 } // func __kmp_fast_allocate
1907 
1908 // Free fast memory and place it on the thread's free list if it is of
1909 // the correct size.
1910 void
1911 ___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL )
1912 {
1913  kmp_mem_descr_t * descr;
1914  kmp_info_t * alloc_thr;
1915  size_t size;
1916  size_t idx;
1917  int index;
1918 
1919  KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n",
1920  __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) );
1921  KMP_ASSERT( ptr != NULL );
1922 
1923  descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) );
1924 
1925  KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n",
1926  (int) descr->size_aligned ) );
1927 
1928  size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines
1929 
1930  idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block
1931  if ( idx == size ) {
1932  index = 0; // 2 cache lines
1933  } else if ( ( idx <<= 1 ) == size ) {
1934  index = 1; // 4 cache lines
1935  } else if ( ( idx <<= 2 ) == size ) {
1936  index = 2; // 16 cache lines
1937  } else if ( ( idx <<= 2 ) == size ) {
1938  index = 3; // 64 cache lines
1939  } else {
1940  KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 );
1941  goto free_call; // 65 or more cache lines ( > 8KB )
1942  }
1943 
1944  alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block
1945  if ( alloc_thr == this_thr ) {
1946  // push block to self no-sync free list, linking previous head (LIFO)
1947  *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self;
1948  this_thr->th.th_free_lists[index].th_free_list_self = ptr;
1949  } else {
1950  void * head = this_thr->th.th_free_lists[index].th_free_list_other;
1951  if ( head == NULL ) {
1952  // Create new free list
1953  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1954  *((void **)ptr) = NULL; // mark the tail of the list
1955  descr->size_allocated = (size_t)1; // head of the list keeps its length
1956  } else {
1957  // need to check existed "other" list's owner thread and size of queue
1958  kmp_mem_descr_t * dsc = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) );
1959  kmp_info_t * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes
1960  size_t q_sz = dsc->size_allocated + 1; // new size in case we add current task
1961  if ( q_th == alloc_thr && q_sz <= __kmp_free_list_limit ) {
1962  // we can add current task to "other" list, no sync needed
1963  *((void **)ptr) = head;
1964  descr->size_allocated = q_sz;
1965  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
1966  } else {
1967  // either queue blocks owner is changing or size limit exceeded
1968  // return old queue to allocating thread (q_th) synchroneously,
1969  // and start new list for alloc_thr's tasks
1970  void * old_ptr;
1971  void * tail = head;
1972  void * next = *((void **)head);
1973  while ( next != NULL ) {
1974  KMP_DEBUG_ASSERT(
1975  // queue size should decrease by 1 each step through the list
1976  ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 ==
1977  ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated );
1978  tail = next; // remember tail node
1979  next = *((void **)next);
1980  }
1981  KMP_DEBUG_ASSERT( q_th != NULL );
1982  // push block to owner's sync free list
1983  old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
1984  /* the next pointer must be set before setting free_list to ptr to avoid
1985  exposing a broken list to other threads, even for an instant. */
1986  *((void **)tail) = old_ptr;
1987 
1988  // AC: use the same ITT macros as __kmp_bget_enqueue uses for similar operation
1989 #if defined(USE_ITT)
1990  KMP_FSYNC_RELEASING( & q_th->th.th_free_lists[index].th_free_list_sync );
1991 #endif /* defined(USE_ITT) */
1992 
1993  while ( ! KMP_COMPARE_AND_STORE_PTR(
1994  &q_th->th.th_free_lists[index].th_free_list_sync,
1995  old_ptr,
1996  head ) )
1997  {
1998  KMP_CPU_PAUSE();
1999  old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync );
2000  *((void **)tail) = old_ptr;
2001 #if defined(USE_ITT)
2002  KMP_FSYNC_RELEASING( & q_th->th.th_free_lists[index].th_free_list_sync );
2003 #endif /* defined(USE_ITT) */
2004  }
2005 
2006  // start new list of not-selt tasks
2007  this_thr->th.th_free_lists[index].th_free_list_other = ptr;
2008  *((void **)ptr) = NULL;
2009  descr->size_allocated = (size_t)1; // head of queue keeps its length
2010  }
2011  }
2012  }
2013  goto end;
2014 
2015  free_call:
2016  KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n",
2017  __kmp_gtid_from_thread( this_thr), size ) );
2018  __kmp_bget_dequeue( this_thr ); /* Release any queued buffers */
2019  brel( this_thr, descr->ptr_allocated );
2020 
2021  end:
2022  KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) );
2023 
2024 } // func __kmp_fast_free
2025 
2026 
2027 // Initialize the thread free lists related to fast memory
2028 // Only do this when a thread is initially created.
2029 void
2030 __kmp_initialize_fast_memory( kmp_info_t *this_thr )
2031 {
2032  KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) );
2033 
2034  memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) );
2035 }
2036 
2037 // Free the memory in the thread free lists related to fast memory
2038 // Only do this when a thread is being reaped (destroyed).
2039 void
2040 __kmp_free_fast_memory( kmp_info_t *th )
2041 {
2042  // Suppose we use BGET underlying allocator, walk through its structures...
2043  int bin;
2044  thr_data_t * thr = get_thr_data( th );
2045  void ** lst = NULL;
2046 
2047  KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n",
2048  __kmp_gtid_from_thread( th ) ) );
2049 
2050  __kmp_bget_dequeue( th ); // Release any queued buffers
2051 
2052  // Dig through free lists and extract all allocated blocks
2053  for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) {
2054  bfhead_t * b = thr->freelist[ bin ].ql.flink;
2055  while ( b != &thr->freelist[ bin ] ) {
2056  if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) { // if the buffer is an allocated address?
2057  *((void**)b) = lst; // link the list (override bthr, but keep flink yet)
2058  lst = (void**)b; // push b into lst
2059  }
2060  b = b->ql.flink; // get next buffer
2061  }
2062  }
2063  while ( lst != NULL ) {
2064  void * next = *lst;
2065  KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n",
2066  lst, next, th, __kmp_gtid_from_thread( th ) ) );
2067  (*thr->relfcn)(lst);
2068  #if BufStats
2069  // count blocks to prevent problems in __kmp_finalize_bget()
2070  thr->numprel++; /* Nr of expansion block releases */
2071  thr->numpblk--; /* Total number of blocks */
2072  #endif
2073  lst = (void**)next;
2074  }
2075 
2076  KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n",
2077  __kmp_gtid_from_thread( th ) ) );
2078 }
2079 
2080 #endif // USE_FAST_MEMORY