Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_lock.cpp
1 /*
2  * kmp_lock.cpp -- lock-related functions
3  * $Revision: 42505 $
4  * $Date: 2013-07-09 17:47:30 -0500 (Tue, 09 Jul 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2013 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include <stddef.h>
38 
39 #include "kmp.h"
40 #include "kmp_itt.h"
41 #include "kmp_i18n.h"
42 #include "kmp_lock.h"
43 #include "kmp_io.h"
44 
45 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
46 # include <unistd.h>
47 # include <sys/syscall.h>
48 // We should really include <futex.h>, but that causes compatibility problems on different
49 // Linux* OS distributions that either require that you include (or break when you try to include)
50 // <pci/types.h>.
51 // Since all we need is the two macros below (which are part of the kernel ABI, so can't change)
52 // we just define the constants here and don't include <futex.h>
53 # ifndef FUTEX_WAIT
54 # define FUTEX_WAIT 0
55 # endif
56 # ifndef FUTEX_WAKE
57 # define FUTEX_WAKE 1
58 # endif
59 #endif
60 
61 
62 #ifndef KMP_DEBUG
63 # define __kmp_static_delay( arg ) /* nothing to do */
64 #else
65 
66 static void
67 __kmp_static_delay( int arg )
68 {
69 /* Work around weird code-gen bug that causes assert to trip */
70 # if KMP_ARCH_X86_64 && KMP_OS_LINUX
71  KMP_ASSERT( arg != 0 );
72 # else
73  KMP_ASSERT( arg >= 0 );
74 # endif
75 }
76 #endif /* KMP_DEBUG */
77 
78 static void
79 __kmp_static_yield( int arg )
80 {
81  __kmp_yield( arg );
82 }
83 
84 /* Implement spin locks for internal library use. */
85 /* The algorithm implemented is Lamport's bakery lock [1974]. */
86 
87 void
88 __kmp_validate_locks( void )
89 {
90  int i;
91  kmp_uint32 x, y;
92 
93  /* Check to make sure unsigned arithmetic does wraps properly */
94  x = ~((kmp_uint32) 0) - 2;
95  y = x - 2;
96 
97  for (i = 0; i < 8; ++i, ++x, ++y) {
98  kmp_uint32 z = (x - y);
99  KMP_ASSERT( z == 2 );
100  }
101 
102  KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 );
103 }
104 
105 
106 /* ------------------------------------------------------------------------ */
107 /* test and set locks */
108 
109 //
110 // For the non-nested locks, we can only assume that the first 4 bytes were
111 // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
112 // compiler only allocates a 4 byte pointer on IA-32 architecture. On
113 // Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
114 //
115 // gcc reserves >= 8 bytes for nested locks, so we can assume that the
116 // entire 8 bytes were allocated for nested locks on all 64-bit platforms.
117 //
118 
119 static kmp_int32
120 __kmp_get_tas_lock_owner( kmp_tas_lock_t *lck )
121 {
122  return TCR_4( lck->lk.poll ) - 1;
123 }
124 
125 static inline bool
126 __kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck )
127 {
128  return lck->lk.depth_locked != -1;
129 }
130 
131 __forceinline static void
132 __kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid )
133 {
134  KMP_MB();
135 
136 #ifdef USE_LOCK_PROFILE
137  kmp_uint32 curr = TCR_4( lck->lk.poll );
138  if ( ( curr != 0 ) && ( curr != gtid + 1 ) )
139  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
140  /* else __kmp_printf( "." );*/
141 #endif /* USE_LOCK_PROFILE */
142 
143  if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
144  KMP_FSYNC_ACQUIRED(lck);
145  return;
146  }
147 
148  kmp_uint32 spins;
149  KMP_FSYNC_PREPARE( lck );
150  KMP_INIT_YIELD( spins );
151  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
152  __kmp_xproc ) ) {
153  KMP_YIELD( TRUE );
154  }
155  else {
156  KMP_YIELD_SPIN( spins );
157  }
158 
159  while ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 )
160  == 0 ) {
161  //
162  // FIXME - use exponential backoff here
163  //
164  if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
165  __kmp_xproc ) ) {
166  KMP_YIELD( TRUE );
167  }
168  else {
169  KMP_YIELD_SPIN( spins );
170  }
171  }
172  KMP_FSYNC_ACQUIRED( lck );
173 }
174 
175 void
176 __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
177 {
178  __kmp_acquire_tas_lock_timed_template( lck, gtid );
179 }
180 
181 static void
182 __kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
183 {
184  if ( __kmp_env_consistency_check ) {
185  char const * const func = "omp_set_lock";
186  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
187  && __kmp_is_tas_lock_nestable( lck ) ) {
188  KMP_FATAL( LockNestableUsedAsSimple, func );
189  }
190  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) {
191  KMP_FATAL( LockIsAlreadyOwned, func );
192  }
193  }
194  __kmp_acquire_tas_lock( lck, gtid );
195 }
196 
197 int
198 __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
199 {
200  if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, gtid + 1 ) ) {
201  KMP_FSYNC_ACQUIRED( lck );
202  return TRUE;
203  }
204  return FALSE;
205 }
206 
207 static int
208 __kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
209 {
210  if ( __kmp_env_consistency_check ) {
211  char const * const func = "omp_test_lock";
212  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
213  && __kmp_is_tas_lock_nestable( lck ) ) {
214  KMP_FATAL( LockNestableUsedAsSimple, func );
215  }
216  }
217  return __kmp_test_tas_lock( lck, gtid );
218 }
219 
220 void
221 __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
222 {
223  KMP_MB(); /* Flush all pending memory write invalidates. */
224 
225  KMP_FSYNC_RELEASING(lck);
226  TCW_4( lck->lk.poll, 0 );
227 
228  KMP_MB(); /* Flush all pending memory write invalidates. */
229 
230  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
231  __kmp_xproc ) );
232 }
233 
234 static void
235 __kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
236 {
237  if ( __kmp_env_consistency_check ) {
238  char const * const func = "omp_unset_lock";
239  KMP_MB(); /* in case another processor initialized lock */
240  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
241  && __kmp_is_tas_lock_nestable( lck ) ) {
242  KMP_FATAL( LockNestableUsedAsSimple, func );
243  }
244  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
245  KMP_FATAL( LockUnsettingFree, func );
246  }
247  if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 )
248  && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) {
249  KMP_FATAL( LockUnsettingSetByAnother, func );
250  }
251  }
252  __kmp_release_tas_lock( lck, gtid );
253 }
254 
255 void
256 __kmp_init_tas_lock( kmp_tas_lock_t * lck )
257 {
258  TCW_4( lck->lk.poll, 0 );
259 }
260 
261 static void
262 __kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck )
263 {
264  __kmp_init_tas_lock( lck );
265 }
266 
267 void
268 __kmp_destroy_tas_lock( kmp_tas_lock_t *lck )
269 {
270  lck->lk.poll = 0;
271 }
272 
273 static void
274 __kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck )
275 {
276  if ( __kmp_env_consistency_check ) {
277  char const * const func = "omp_destroy_lock";
278  if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE )
279  && __kmp_is_tas_lock_nestable( lck ) ) {
280  KMP_FATAL( LockNestableUsedAsSimple, func );
281  }
282  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
283  KMP_FATAL( LockStillOwned, func );
284  }
285  }
286  __kmp_destroy_tas_lock( lck );
287 }
288 
289 
290 //
291 // nested test and set locks
292 //
293 
294 void
295 __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
296 {
297  KMP_DEBUG_ASSERT( gtid >= 0 );
298 
299  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
300  lck->lk.depth_locked += 1;
301  }
302  else {
303  __kmp_acquire_tas_lock_timed_template( lck, gtid );
304  lck->lk.depth_locked = 1;
305  }
306 }
307 
308 static void
309 __kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
310 {
311  if ( __kmp_env_consistency_check ) {
312  char const * const func = "omp_set_nest_lock";
313  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
314  KMP_FATAL( LockSimpleUsedAsNestable, func );
315  }
316  }
317  __kmp_acquire_nested_tas_lock( lck, gtid );
318 }
319 
320 int
321 __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
322 {
323  int retval;
324 
325  KMP_DEBUG_ASSERT( gtid >= 0 );
326 
327  if ( __kmp_get_tas_lock_owner( lck ) == gtid ) {
328  retval = ++lck->lk.depth_locked;
329  }
330  else if ( !__kmp_test_tas_lock( lck, gtid ) ) {
331  retval = 0;
332  }
333  else {
334  KMP_MB();
335  retval = lck->lk.depth_locked = 1;
336  }
337  return retval;
338 }
339 
340 static int
341 __kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
342 {
343  if ( __kmp_env_consistency_check ) {
344  char const * const func = "omp_test_nest_lock";
345  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
346  KMP_FATAL( LockSimpleUsedAsNestable, func );
347  }
348  }
349  return __kmp_test_nested_tas_lock( lck, gtid );
350 }
351 
352 void
353 __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid )
354 {
355  KMP_DEBUG_ASSERT( gtid >= 0 );
356 
357  KMP_MB();
358  if ( --(lck->lk.depth_locked) == 0 ) {
359  __kmp_release_tas_lock( lck, gtid );
360  }
361 }
362 
363 static void
364 __kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid )
365 {
366  if ( __kmp_env_consistency_check ) {
367  char const * const func = "omp_unset_nest_lock";
368  KMP_MB(); /* in case another processor initialized lock */
369  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
370  KMP_FATAL( LockSimpleUsedAsNestable, func );
371  }
372  if ( __kmp_get_tas_lock_owner( lck ) == -1 ) {
373  KMP_FATAL( LockUnsettingFree, func );
374  }
375  if ( __kmp_get_tas_lock_owner( lck ) != gtid ) {
376  KMP_FATAL( LockUnsettingSetByAnother, func );
377  }
378  }
379  __kmp_release_nested_tas_lock( lck, gtid );
380 }
381 
382 void
383 __kmp_init_nested_tas_lock( kmp_tas_lock_t * lck )
384 {
385  __kmp_init_tas_lock( lck );
386  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
387 }
388 
389 static void
390 __kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck )
391 {
392  __kmp_init_nested_tas_lock( lck );
393 }
394 
395 void
396 __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck )
397 {
398  __kmp_destroy_tas_lock( lck );
399  lck->lk.depth_locked = 0;
400 }
401 
402 static void
403 __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck )
404 {
405  if ( __kmp_env_consistency_check ) {
406  char const * const func = "omp_destroy_nest_lock";
407  if ( ! __kmp_is_tas_lock_nestable( lck ) ) {
408  KMP_FATAL( LockSimpleUsedAsNestable, func );
409  }
410  if ( __kmp_get_tas_lock_owner( lck ) != -1 ) {
411  KMP_FATAL( LockStillOwned, func );
412  }
413  }
414  __kmp_destroy_nested_tas_lock( lck );
415 }
416 
417 
418 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
419 
420 /* ------------------------------------------------------------------------ */
421 /* futex locks */
422 
423 // futex locks are really just test and set locks, with a different method
424 // of handling contention. They take the same amount of space as test and
425 // set locks, and are allocated the same way (i.e. use the area allocated by
426 // the compiler for non-nested locks / allocate nested locks on the heap).
427 
428 static kmp_int32
429 __kmp_get_futex_lock_owner( kmp_futex_lock_t *lck )
430 {
431  return ( TCR_4( lck->lk.poll ) >> 1 ) - 1;
432 }
433 
434 static inline bool
435 __kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck )
436 {
437  return lck->lk.depth_locked != -1;
438 }
439 
440 __forceinline static void
441 __kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid )
442 {
443  kmp_int32 gtid_code = ( gtid + 1 ) << 1;
444 
445  KMP_MB();
446 
447 #ifdef USE_LOCK_PROFILE
448  kmp_uint32 curr = TCR_4( lck->lk.poll );
449  if ( ( curr != 0 ) && ( curr != gtid_code ) )
450  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
451  /* else __kmp_printf( "." );*/
452 #endif /* USE_LOCK_PROFILE */
453 
454  KMP_FSYNC_PREPARE( lck );
455  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
456  lck, lck->lk.poll, gtid ) );
457 
458  kmp_int32 poll_val;
459  while ( ( poll_val = __kmp_compare_and_store_ret32( & ( lck->lk.poll ), 0,
460  gtid_code ) ) != 0 ) {
461  kmp_int32 cond = poll_val & 1;
462  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
463  lck, gtid, poll_val, cond ) );
464 
465  //
466  // NOTE: if you try to use the following condition for this branch
467  //
468  // if ( poll_val & 1 == 0 )
469  //
470  // Then the 12.0 compiler has a bug where the following block will
471  // always be skipped, regardless of the value of the LSB of poll_val.
472  //
473  if ( ! cond ) {
474  //
475  // Try to set the lsb in the poll to indicate to the owner
476  // thread that they need to wake this thread up.
477  //
478  if ( ! __kmp_compare_and_store32( & ( lck->lk.poll ),
479  poll_val, poll_val | 1 ) ) {
480  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
481  lck, lck->lk.poll, gtid ) );
482  continue;
483  }
484  poll_val |= 1;
485 
486  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n",
487  lck, lck->lk.poll, gtid ) );
488  }
489 
490  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
491  lck, gtid, poll_val ) );
492 
493  kmp_int32 rc;
494  if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT,
495  poll_val, NULL, NULL, 0 ) ) != 0 ) {
496  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n",
497  lck, gtid, poll_val, rc, errno ) );
498  continue;
499  }
500 
501  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
502  lck, gtid, poll_val ) );
503  //
504  // This thread has now done a succesful futex wait call and was
505  // entered on the OS futex queue. We must now perform a futex
506  // wake call when releasing the lock, as we have no idea how many
507  // other threads are in the queue.
508  //
509  gtid_code |= 1;
510  }
511 
512  KMP_FSYNC_ACQUIRED( lck );
513  KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n",
514  lck, lck->lk.poll, gtid ) );
515 }
516 
517 void
518 __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
519 {
520  __kmp_acquire_futex_lock_timed_template( lck, gtid );
521 }
522 
523 static void
524 __kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
525 {
526  if ( __kmp_env_consistency_check ) {
527  char const * const func = "omp_set_lock";
528  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
529  && __kmp_is_futex_lock_nestable( lck ) ) {
530  KMP_FATAL( LockNestableUsedAsSimple, func );
531  }
532  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) {
533  KMP_FATAL( LockIsAlreadyOwned, func );
534  }
535  }
536  __kmp_acquire_futex_lock( lck, gtid );
537 }
538 
539 int
540 __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
541 {
542  if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), 0, ( gtid + 1 ) << 1 ) ) {
543  KMP_FSYNC_ACQUIRED( lck );
544  return TRUE;
545  }
546  return FALSE;
547 }
548 
549 static int
550 __kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
551 {
552  if ( __kmp_env_consistency_check ) {
553  char const * const func = "omp_test_lock";
554  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
555  && __kmp_is_futex_lock_nestable( lck ) ) {
556  KMP_FATAL( LockNestableUsedAsSimple, func );
557  }
558  }
559  return __kmp_test_futex_lock( lck, gtid );
560 }
561 
562 void
563 __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
564 {
565  KMP_MB(); /* Flush all pending memory write invalidates. */
566 
567  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
568  lck, lck->lk.poll, gtid ) );
569 
570  KMP_FSYNC_RELEASING(lck);
571 
572  kmp_int32 poll_val = __kmp_xchg_fixed32( & ( lck->lk.poll ), 0 );
573 
574  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
575  lck, gtid, poll_val ) );
576 
577  if ( poll_val & 1 ) {
578  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
579  lck, gtid ) );
580  syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, 1, NULL, NULL, 0 );
581  }
582 
583  KMP_MB(); /* Flush all pending memory write invalidates. */
584 
585  KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n",
586  lck, lck->lk.poll, gtid ) );
587 
588  KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc :
589  __kmp_xproc ) );
590 }
591 
592 static void
593 __kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
594 {
595  if ( __kmp_env_consistency_check ) {
596  char const * const func = "omp_unset_lock";
597  KMP_MB(); /* in case another processor initialized lock */
598  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
599  && __kmp_is_futex_lock_nestable( lck ) ) {
600  KMP_FATAL( LockNestableUsedAsSimple, func );
601  }
602  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
603  KMP_FATAL( LockUnsettingFree, func );
604  }
605  if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 )
606  && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) {
607  KMP_FATAL( LockUnsettingSetByAnother, func );
608  }
609  }
610  __kmp_release_futex_lock( lck, gtid );
611 }
612 
613 void
614 __kmp_init_futex_lock( kmp_futex_lock_t * lck )
615 {
616  TCW_4( lck->lk.poll, 0 );
617 }
618 
619 static void
620 __kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck )
621 {
622  __kmp_init_futex_lock( lck );
623 }
624 
625 void
626 __kmp_destroy_futex_lock( kmp_futex_lock_t *lck )
627 {
628  lck->lk.poll = 0;
629 }
630 
631 static void
632 __kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck )
633 {
634  if ( __kmp_env_consistency_check ) {
635  char const * const func = "omp_destroy_lock";
636  if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE )
637  && __kmp_is_futex_lock_nestable( lck ) ) {
638  KMP_FATAL( LockNestableUsedAsSimple, func );
639  }
640  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
641  KMP_FATAL( LockStillOwned, func );
642  }
643  }
644  __kmp_destroy_futex_lock( lck );
645 }
646 
647 
648 //
649 // nested futex locks
650 //
651 
652 void
653 __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
654 {
655  KMP_DEBUG_ASSERT( gtid >= 0 );
656 
657  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
658  lck->lk.depth_locked += 1;
659  }
660  else {
661  __kmp_acquire_futex_lock_timed_template( lck, gtid );
662  lck->lk.depth_locked = 1;
663  }
664 }
665 
666 static void
667 __kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
668 {
669  if ( __kmp_env_consistency_check ) {
670  char const * const func = "omp_set_nest_lock";
671  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
672  KMP_FATAL( LockSimpleUsedAsNestable, func );
673  }
674  }
675  __kmp_acquire_nested_futex_lock( lck, gtid );
676 }
677 
678 int
679 __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
680 {
681  int retval;
682 
683  KMP_DEBUG_ASSERT( gtid >= 0 );
684 
685  if ( __kmp_get_futex_lock_owner( lck ) == gtid ) {
686  retval = ++lck->lk.depth_locked;
687  }
688  else if ( !__kmp_test_futex_lock( lck, gtid ) ) {
689  retval = 0;
690  }
691  else {
692  KMP_MB();
693  retval = lck->lk.depth_locked = 1;
694  }
695  return retval;
696 }
697 
698 static int
699 __kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
700 {
701  if ( __kmp_env_consistency_check ) {
702  char const * const func = "omp_test_nest_lock";
703  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
704  KMP_FATAL( LockSimpleUsedAsNestable, func );
705  }
706  }
707  return __kmp_test_nested_futex_lock( lck, gtid );
708 }
709 
710 void
711 __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid )
712 {
713  KMP_DEBUG_ASSERT( gtid >= 0 );
714 
715  KMP_MB();
716  if ( --(lck->lk.depth_locked) == 0 ) {
717  __kmp_release_futex_lock( lck, gtid );
718  }
719 }
720 
721 static void
722 __kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid )
723 {
724  if ( __kmp_env_consistency_check ) {
725  char const * const func = "omp_unset_nest_lock";
726  KMP_MB(); /* in case another processor initialized lock */
727  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
728  KMP_FATAL( LockSimpleUsedAsNestable, func );
729  }
730  if ( __kmp_get_futex_lock_owner( lck ) == -1 ) {
731  KMP_FATAL( LockUnsettingFree, func );
732  }
733  if ( __kmp_get_futex_lock_owner( lck ) != gtid ) {
734  KMP_FATAL( LockUnsettingSetByAnother, func );
735  }
736  }
737  __kmp_release_nested_futex_lock( lck, gtid );
738 }
739 
740 void
741 __kmp_init_nested_futex_lock( kmp_futex_lock_t * lck )
742 {
743  __kmp_init_futex_lock( lck );
744  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
745 }
746 
747 static void
748 __kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck )
749 {
750  __kmp_init_nested_futex_lock( lck );
751 }
752 
753 void
754 __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck )
755 {
756  __kmp_destroy_futex_lock( lck );
757  lck->lk.depth_locked = 0;
758 }
759 
760 static void
761 __kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck )
762 {
763  if ( __kmp_env_consistency_check ) {
764  char const * const func = "omp_destroy_nest_lock";
765  if ( ! __kmp_is_futex_lock_nestable( lck ) ) {
766  KMP_FATAL( LockSimpleUsedAsNestable, func );
767  }
768  if ( __kmp_get_futex_lock_owner( lck ) != -1 ) {
769  KMP_FATAL( LockStillOwned, func );
770  }
771  }
772  __kmp_destroy_nested_futex_lock( lck );
773 }
774 
775 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
776 
777 
778 /* ------------------------------------------------------------------------ */
779 /* ticket (bakery) locks */
780 
781 static kmp_int32
782 __kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck )
783 {
784  return TCR_4( lck->lk.owner_id ) - 1;
785 }
786 
787 static inline bool
788 __kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck )
789 {
790  return lck->lk.depth_locked != -1;
791 }
792 
793 static kmp_uint32
794 __kmp_bakery_check(kmp_uint value, kmp_uint checker)
795 {
796  register kmp_uint32 pause;
797 
798  if (value == checker) {
799  return TRUE;
800  }
801  for (pause = checker - value; pause != 0; --pause) {
802  __kmp_static_delay(TRUE);
803  }
804  return FALSE;
805 }
806 
807 __forceinline static void
808 __kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid )
809 {
810  kmp_uint32 my_ticket;
811  KMP_MB();
812 
813  my_ticket = KMP_TEST_THEN_INC32( (kmp_int32 *) &lck->lk.next_ticket );
814 
815 #ifdef USE_LOCK_PROFILE
816  if ( TCR_4( lck->lk.now_serving ) != my_ticket )
817  __kmp_printf( "LOCK CONTENTION: %p\n", lck );
818  /* else __kmp_printf( "." );*/
819 #endif /* USE_LOCK_PROFILE */
820 
821  if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
822  KMP_FSYNC_ACQUIRED(lck);
823  return;
824  }
825  KMP_WAIT_YIELD( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck );
826  KMP_FSYNC_ACQUIRED(lck);
827 }
828 
829 void
830 __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
831 {
832  __kmp_acquire_ticket_lock_timed_template( lck, gtid );
833 }
834 
835 static void
836 __kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
837 {
838  if ( __kmp_env_consistency_check ) {
839  char const * const func = "omp_set_lock";
840  if ( lck->lk.initialized != lck ) {
841  KMP_FATAL( LockIsUninitialized, func );
842  }
843  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
844  KMP_FATAL( LockNestableUsedAsSimple, func );
845  }
846  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) {
847  KMP_FATAL( LockIsAlreadyOwned, func );
848  }
849  }
850 
851  __kmp_acquire_ticket_lock( lck, gtid );
852 
853  if ( __kmp_env_consistency_check ) {
854  lck->lk.owner_id = gtid + 1;
855  }
856 }
857 
858 int
859 __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
860 {
861  kmp_uint32 my_ticket = TCR_4( lck->lk.next_ticket );
862  if ( TCR_4( lck->lk.now_serving ) == my_ticket ) {
863  kmp_uint32 next_ticket = my_ticket + 1;
864  if ( KMP_COMPARE_AND_STORE_ACQ32( (kmp_int32 *) &lck->lk.next_ticket,
865  my_ticket, next_ticket ) ) {
866  KMP_FSYNC_ACQUIRED( lck );
867  return TRUE;
868  }
869  }
870  return FALSE;
871 }
872 
873 static int
874 __kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
875 {
876  if ( __kmp_env_consistency_check ) {
877  char const * const func = "omp_test_lock";
878  if ( lck->lk.initialized != lck ) {
879  KMP_FATAL( LockIsUninitialized, func );
880  }
881  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
882  KMP_FATAL( LockNestableUsedAsSimple, func );
883  }
884  }
885 
886  int retval = __kmp_test_ticket_lock( lck, gtid );
887 
888  if ( __kmp_env_consistency_check && retval ) {
889  lck->lk.owner_id = gtid + 1;
890  }
891  return retval;
892 }
893 
894 void
895 __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
896 {
897  kmp_uint32 distance;
898 
899  KMP_MB(); /* Flush all pending memory write invalidates. */
900 
901  KMP_FSYNC_RELEASING(lck);
902  distance = ( TCR_4( lck->lk.next_ticket ) - TCR_4( lck->lk.now_serving ) );
903 
904 #if USE_ITT_BUILD && defined(USE_ITT)
905  TCW_4( lck->lk.now_serving, TCR_4( lck->lk.now_serving ) + 1 );
906 #else
907  // Since we own the lock this cannot be a race.
908  lck->lk.now_serving += 1;
909 #endif
910 
911 
912  KMP_MB(); /* Flush all pending memory write invalidates. */
913 
914  KMP_YIELD( distance
915  > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) );
916 }
917 
918 static void
919 __kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
920 {
921  if ( __kmp_env_consistency_check ) {
922  char const * const func = "omp_unset_lock";
923  KMP_MB(); /* in case another processor initialized lock */
924  if ( lck->lk.initialized != lck ) {
925  KMP_FATAL( LockIsUninitialized, func );
926  }
927  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
928  KMP_FATAL( LockNestableUsedAsSimple, func );
929  }
930  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
931  KMP_FATAL( LockUnsettingFree, func );
932  }
933  if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 )
934  && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) {
935  KMP_FATAL( LockUnsettingSetByAnother, func );
936  }
937  lck->lk.owner_id = 0;
938  }
939  __kmp_release_ticket_lock( lck, gtid );
940 }
941 
942 void
943 __kmp_init_ticket_lock( kmp_ticket_lock_t * lck )
944 {
945  lck->lk.location = NULL;
946  TCW_4( lck->lk.next_ticket, 0 );
947  TCW_4( lck->lk.now_serving, 0 );
948  lck->lk.owner_id = 0; // no thread owns the lock.
949  lck->lk.depth_locked = -1; // -1 => not a nested lock.
950  lck->lk.initialized = (kmp_ticket_lock *)lck;
951 }
952 
953 static void
954 __kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
955 {
956  __kmp_init_ticket_lock( lck );
957 }
958 
959 void
960 __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck )
961 {
962  lck->lk.initialized = NULL;
963  lck->lk.location = NULL;
964  lck->lk.next_ticket = 0;
965  lck->lk.now_serving = 0;
966  lck->lk.owner_id = 0;
967  lck->lk.depth_locked = -1;
968 }
969 
970 static void
971 __kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
972 {
973  if ( __kmp_env_consistency_check ) {
974  char const * const func = "omp_destroy_lock";
975  if ( lck->lk.initialized != lck ) {
976  KMP_FATAL( LockIsUninitialized, func );
977  }
978  if ( __kmp_is_ticket_lock_nestable( lck ) ) {
979  KMP_FATAL( LockNestableUsedAsSimple, func );
980  }
981  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
982  KMP_FATAL( LockStillOwned, func );
983  }
984  }
985  __kmp_destroy_ticket_lock( lck );
986 }
987 
988 
989 //
990 // nested ticket locks
991 //
992 
993 void
994 __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
995 {
996  KMP_DEBUG_ASSERT( gtid >= 0 );
997 
998  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
999  lck->lk.depth_locked += 1;
1000  }
1001  else {
1002  __kmp_acquire_ticket_lock_timed_template( lck, gtid );
1003  KMP_MB();
1004  lck->lk.depth_locked = 1;
1005  KMP_MB();
1006  lck->lk.owner_id = gtid + 1;
1007  }
1008 }
1009 
1010 static void
1011 __kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1012 {
1013  if ( __kmp_env_consistency_check ) {
1014  char const * const func = "omp_set_nest_lock";
1015  if ( lck->lk.initialized != lck ) {
1016  KMP_FATAL( LockIsUninitialized, func );
1017  }
1018  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1019  KMP_FATAL( LockSimpleUsedAsNestable, func );
1020  }
1021  }
1022  __kmp_acquire_nested_ticket_lock( lck, gtid );
1023 }
1024 
1025 int
1026 __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1027 {
1028  int retval;
1029 
1030  KMP_DEBUG_ASSERT( gtid >= 0 );
1031 
1032  if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) {
1033  retval = ++lck->lk.depth_locked;
1034  }
1035  else if ( !__kmp_test_ticket_lock( lck, gtid ) ) {
1036  retval = 0;
1037  }
1038  else {
1039  KMP_MB();
1040  retval = lck->lk.depth_locked = 1;
1041  KMP_MB();
1042  lck->lk.owner_id = gtid + 1;
1043  }
1044  return retval;
1045 }
1046 
1047 static int
1048 __kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck,
1049  kmp_int32 gtid )
1050 {
1051  if ( __kmp_env_consistency_check ) {
1052  char const * const func = "omp_test_nest_lock";
1053  if ( lck->lk.initialized != lck ) {
1054  KMP_FATAL( LockIsUninitialized, func );
1055  }
1056  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1057  KMP_FATAL( LockSimpleUsedAsNestable, func );
1058  }
1059  }
1060  return __kmp_test_nested_ticket_lock( lck, gtid );
1061 }
1062 
1063 void
1064 __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1065 {
1066  KMP_DEBUG_ASSERT( gtid >= 0 );
1067 
1068  KMP_MB();
1069  if ( --(lck->lk.depth_locked) == 0 ) {
1070  KMP_MB();
1071  lck->lk.owner_id = 0;
1072  __kmp_release_ticket_lock( lck, gtid );
1073  }
1074 }
1075 
1076 static void
1077 __kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid )
1078 {
1079  if ( __kmp_env_consistency_check ) {
1080  char const * const func = "omp_unset_nest_lock";
1081  KMP_MB(); /* in case another processor initialized lock */
1082  if ( lck->lk.initialized != lck ) {
1083  KMP_FATAL( LockIsUninitialized, func );
1084  }
1085  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1086  KMP_FATAL( LockSimpleUsedAsNestable, func );
1087  }
1088  if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) {
1089  KMP_FATAL( LockUnsettingFree, func );
1090  }
1091  if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) {
1092  KMP_FATAL( LockUnsettingSetByAnother, func );
1093  }
1094  }
1095  __kmp_release_nested_ticket_lock( lck, gtid );
1096 }
1097 
1098 void
1099 __kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck )
1100 {
1101  __kmp_init_ticket_lock( lck );
1102  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1103 }
1104 
1105 static void
1106 __kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck )
1107 {
1108  __kmp_init_nested_ticket_lock( lck );
1109 }
1110 
1111 void
1112 __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck )
1113 {
1114  __kmp_destroy_ticket_lock( lck );
1115  lck->lk.depth_locked = 0;
1116 }
1117 
1118 static void
1119 __kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck )
1120 {
1121  if ( __kmp_env_consistency_check ) {
1122  char const * const func = "omp_destroy_nest_lock";
1123  if ( lck->lk.initialized != lck ) {
1124  KMP_FATAL( LockIsUninitialized, func );
1125  }
1126  if ( ! __kmp_is_ticket_lock_nestable( lck ) ) {
1127  KMP_FATAL( LockSimpleUsedAsNestable, func );
1128  }
1129  if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) {
1130  KMP_FATAL( LockStillOwned, func );
1131  }
1132  }
1133  __kmp_destroy_nested_ticket_lock( lck );
1134 }
1135 
1136 
1137 //
1138 // access functions to fields which don't exist for all lock kinds.
1139 //
1140 
1141 static int
1142 __kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck )
1143 {
1144  return lck == lck->lk.initialized;
1145 }
1146 
1147 static const ident_t *
1148 __kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck )
1149 {
1150  return lck->lk.location;
1151 }
1152 
1153 static void
1154 __kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc )
1155 {
1156  lck->lk.location = loc;
1157 }
1158 
1159 static kmp_lock_flags_t
1160 __kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck )
1161 {
1162  return lck->lk.flags;
1163 }
1164 
1165 static void
1166 __kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags )
1167 {
1168  lck->lk.flags = flags;
1169 }
1170 
1171 /* ------------------------------------------------------------------------ */
1172 /* queuing locks */
1173 
1174 /*
1175  * First the states
1176  * (head,tail) = 0, 0 means lock is unheld, nobody on queue
1177  * UINT_MAX or -1, 0 means lock is held, nobody on queue
1178  * h, h means lock is held or about to transition, 1 element on queue
1179  * h, t h <> t, means lock is held or about to transition, >1 elements on queue
1180  *
1181  * Now the transitions
1182  * Acquire(0,0) = -1 ,0
1183  * Release(0,0) = Error
1184  * Acquire(-1,0) = h ,h h > 0
1185  * Release(-1,0) = 0 ,0
1186  * Acquire(h,h) = h ,t h > 0, t > 0, h <> t
1187  * Release(h,h) = -1 ,0 h > 0
1188  * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
1189  * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
1190  *
1191  * And pictorially
1192  *
1193  *
1194  * +-----+
1195  * | 0, 0|------- release -------> Error
1196  * +-----+
1197  * | ^
1198  * acquire| |release
1199  * | |
1200  * | |
1201  * v |
1202  * +-----+
1203  * |-1, 0|
1204  * +-----+
1205  * | ^
1206  * acquire| |release
1207  * | |
1208  * | |
1209  * v |
1210  * +-----+
1211  * | h, h|
1212  * +-----+
1213  * | ^
1214  * acquire| |release
1215  * | |
1216  * | |
1217  * v |
1218  * +-----+
1219  * | h, t|----- acquire, release loopback ---+
1220  * +-----+ |
1221  * ^ |
1222  * | |
1223  * +------------------------------------+
1224  *
1225  */
1226 
1227 #ifdef DEBUG_QUEUING_LOCKS
1228 
1229 /* Stuff for circular trace buffer */
1230 #define TRACE_BUF_ELE 1024
1231 static char traces[TRACE_BUF_ELE][128] = { 0 }
1232 static int tc = 0;
1233 #define TRACE_LOCK(X,Y) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s\n", X, Y );
1234 #define TRACE_LOCK_T(X,Y,Z) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s%d\n", X,Y,Z );
1235 #define TRACE_LOCK_HT(X,Y,Z,Q) sprintf( traces[tc++ % TRACE_BUF_ELE], "t%d at %s %d,%d\n", X, Y, Z, Q );
1236 
1237 static void
1238 __kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid,
1239  kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id )
1240 {
1241  kmp_int32 t, i;
1242 
1243  __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" );
1244 
1245  i = tc % TRACE_BUF_ELE;
1246  __kmp_printf_no_lock( "%s\n", traces[i] );
1247  i = (i+1) % TRACE_BUF_ELE;
1248  while ( i != (tc % TRACE_BUF_ELE) ) {
1249  __kmp_printf_no_lock( "%s", traces[i] );
1250  i = (i+1) % TRACE_BUF_ELE;
1251  }
1252  __kmp_printf_no_lock( "\n" );
1253 
1254  __kmp_printf_no_lock(
1255  "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n",
1256  gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting,
1257  head_id, tail_id );
1258 
1259  __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id );
1260 
1261  if ( lck->lk.head_id >= 1 ) {
1262  t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting;
1263  while (t > 0) {
1264  __kmp_printf_no_lock( "-> %d ", t );
1265  t = __kmp_threads[t-1]->th.th_next_waiting;
1266  }
1267  }
1268  __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id );
1269  __kmp_printf_no_lock( "\n\n" );
1270 }
1271 
1272 #endif /* DEBUG_QUEUING_LOCKS */
1273 
1274 static kmp_int32
1275 __kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck )
1276 {
1277  return TCR_4( lck->lk.owner_id ) - 1;
1278 }
1279 
1280 static inline bool
1281 __kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck )
1282 {
1283  return lck->lk.depth_locked != -1;
1284 }
1285 
1286 /* Acquire a lock using a the queuing lock implementation */
1287 template <bool takeTime>
1288 /* [TLW] The unused template above is left behind because of what BEB believes is a
1289  potential compiler problem with __forceinline. */
1290 __forceinline static void
1291 __kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck,
1292  kmp_int32 gtid )
1293 {
1294  register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid );
1295  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1296  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1297  volatile kmp_uint32 *spin_here_p;
1298  kmp_int32 need_mf = 1;
1299 
1300  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1301 
1302  KMP_FSYNC_PREPARE( lck );
1303  KMP_DEBUG_ASSERT( this_thr != NULL );
1304  spin_here_p = & this_thr->th.th_spin_here;
1305 
1306 #ifdef DEBUG_QUEUING_LOCKS
1307  TRACE_LOCK( gtid+1, "acq ent" );
1308  if ( *spin_here_p )
1309  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1310  if ( this_thr->th.th_next_waiting != 0 )
1311  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1312 #endif
1313  KMP_DEBUG_ASSERT( !*spin_here_p );
1314  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1315 
1316 
1317  /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p
1318  that may follow, not just in execution order, but also in visibility order. This way,
1319  when a releasing thread observes the changes to the queue by this thread, it can
1320  rightly assume that spin_here_p has already been set to TRUE, so that when it sets
1321  spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p
1322  to FALSE before this thread sets it to TRUE, this thread will hang.
1323  */
1324  *spin_here_p = TRUE; /* before enqueuing to prevent race */
1325 
1326  while( 1 ) {
1327  kmp_int32 enqueued;
1328  kmp_int32 head;
1329  kmp_int32 tail;
1330 
1331  head = *head_id_p;
1332 
1333  switch ( head ) {
1334 
1335  case -1:
1336  {
1337 #ifdef DEBUG_QUEUING_LOCKS
1338  tail = *tail_id_p;
1339  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1340 #endif
1341  tail = 0; /* to make sure next link asynchronously read is not set accidentally;
1342  this assignment prevents us from entering the if ( t > 0 )
1343  condition in the enqueued case below, which is not necessary for
1344  this state transition */
1345 
1346  need_mf = 0;
1347  /* try (-1,0)->(tid,tid) */
1348  enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p,
1349  KMP_PACK_64( -1, 0 ),
1350  KMP_PACK_64( gtid+1, gtid+1 ) );
1351 #ifdef DEBUG_QUEUING_LOCKS
1352  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" );
1353 #endif
1354  }
1355  break;
1356 
1357  default:
1358  {
1359  tail = *tail_id_p;
1360  KMP_DEBUG_ASSERT( tail != gtid + 1 );
1361 
1362 #ifdef DEBUG_QUEUING_LOCKS
1363  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1364 #endif
1365 
1366  if ( tail == 0 ) {
1367  enqueued = FALSE;
1368  }
1369  else {
1370  need_mf = 0;
1371  /* try (h,t) or (h,h)->(h,tid) */
1372  enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 );
1373 
1374 #ifdef DEBUG_QUEUING_LOCKS
1375  if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" );
1376 #endif
1377  }
1378  }
1379  break;
1380 
1381  case 0: /* empty queue */
1382  {
1383  kmp_int32 grabbed_lock;
1384 
1385 #ifdef DEBUG_QUEUING_LOCKS
1386  tail = *tail_id_p;
1387  TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail );
1388 #endif
1389  /* try (0,0)->(-1,0) */
1390 
1391  /* only legal transition out of head = 0 is head = -1 with no change to tail */
1392  grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 );
1393 
1394  if ( grabbed_lock ) {
1395 
1396  *spin_here_p = FALSE;
1397 
1398  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1399  lck, gtid ));
1400 #ifdef DEBUG_QUEUING_LOCKS
1401  TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 );
1402 #endif
1403  KMP_FSYNC_ACQUIRED( lck );
1404  return; /* lock holder cannot be on queue */
1405  }
1406  enqueued = FALSE;
1407  }
1408  break;
1409  }
1410 
1411  if ( enqueued ) {
1412  if ( tail > 0 ) {
1413  kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 );
1414  KMP_ASSERT( tail_thr != NULL );
1415  tail_thr->th.th_next_waiting = gtid+1;
1416  /* corresponding wait for this write in release code */
1417  }
1418  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid ));
1419 
1420 
1421  /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for
1422  * throughput only here.
1423  */
1424  KMP_MB();
1425  KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck);
1426 
1427 #ifdef DEBUG_QUEUING_LOCKS
1428  TRACE_LOCK( gtid+1, "acq spin" );
1429 
1430  if ( this_thr->th.th_next_waiting != 0 )
1431  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1432 #endif
1433  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1434  KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n",
1435  lck, gtid ));
1436 
1437 #ifdef DEBUG_QUEUING_LOCKS
1438  TRACE_LOCK( gtid+1, "acq exit 2" );
1439 #endif
1440  /* got lock, we were dequeued by the thread that released lock */
1441  return;
1442  }
1443 
1444  /* Yield if number of threads > number of logical processors */
1445  /* ToDo: Not sure why this should only be in oversubscription case,
1446  maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1447  KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc :
1448  __kmp_xproc ) );
1449 #ifdef DEBUG_QUEUING_LOCKS
1450  TRACE_LOCK( gtid+1, "acq retry" );
1451 #endif
1452 
1453  }
1454  KMP_ASSERT2( 0, "should not get here" );
1455 }
1456 
1457 void
1458 __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1459 {
1460  KMP_DEBUG_ASSERT( gtid >= 0 );
1461 
1462  __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1463 }
1464 
1465 static void
1466 __kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1467  kmp_int32 gtid )
1468 {
1469  if ( __kmp_env_consistency_check ) {
1470  char const * const func = "omp_set_lock";
1471  if ( lck->lk.initialized != lck ) {
1472  KMP_FATAL( LockIsUninitialized, func );
1473  }
1474  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1475  KMP_FATAL( LockNestableUsedAsSimple, func );
1476  }
1477  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1478  KMP_FATAL( LockIsAlreadyOwned, func );
1479  }
1480  }
1481 
1482  __kmp_acquire_queuing_lock( lck, gtid );
1483 
1484  if ( __kmp_env_consistency_check ) {
1485  lck->lk.owner_id = gtid + 1;
1486  }
1487 }
1488 
1489 int
1490 __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1491 {
1492  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1493  kmp_int32 head;
1494 #ifdef KMP_DEBUG
1495  kmp_info_t *this_thr;
1496 #endif
1497 
1498  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid ));
1499  KMP_DEBUG_ASSERT( gtid >= 0 );
1500 #ifdef KMP_DEBUG
1501  this_thr = __kmp_thread_from_gtid( gtid );
1502  KMP_DEBUG_ASSERT( this_thr != NULL );
1503  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1504 #endif
1505 
1506  head = *head_id_p;
1507 
1508  if ( head == 0 ) { /* nobody on queue, nobody holding */
1509 
1510  /* try (0,0)->(-1,0) */
1511 
1512  if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) {
1513  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid ));
1514  KMP_FSYNC_ACQUIRED(lck);
1515  return TRUE;
1516  }
1517  }
1518 
1519  KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid ));
1520  return FALSE;
1521 }
1522 
1523 static int
1524 __kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1525 {
1526  if ( __kmp_env_consistency_check ) {
1527  char const * const func = "omp_test_lock";
1528  if ( lck->lk.initialized != lck ) {
1529  KMP_FATAL( LockIsUninitialized, func );
1530  }
1531  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1532  KMP_FATAL( LockNestableUsedAsSimple, func );
1533  }
1534  }
1535 
1536  int retval = __kmp_test_queuing_lock( lck, gtid );
1537 
1538  if ( __kmp_env_consistency_check && retval ) {
1539  lck->lk.owner_id = gtid + 1;
1540  }
1541  return retval;
1542 }
1543 
1544 void
1545 __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1546 {
1547  register kmp_info_t *this_thr;
1548  volatile kmp_int32 *head_id_p = & lck->lk.head_id;
1549  volatile kmp_int32 *tail_id_p = & lck->lk.tail_id;
1550 
1551  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid ));
1552  KMP_DEBUG_ASSERT( gtid >= 0 );
1553  this_thr = __kmp_thread_from_gtid( gtid );
1554  KMP_DEBUG_ASSERT( this_thr != NULL );
1555 #ifdef DEBUG_QUEUING_LOCKS
1556  TRACE_LOCK( gtid+1, "rel ent" );
1557 
1558  if ( this_thr->th.th_spin_here )
1559  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1560  if ( this_thr->th.th_next_waiting != 0 )
1561  __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p );
1562 #endif
1563  KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
1564  KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
1565 
1566  KMP_FSYNC_RELEASING(lck);
1567 
1568  while( 1 ) {
1569  kmp_int32 dequeued;
1570  kmp_int32 head;
1571  kmp_int32 tail;
1572 
1573  head = *head_id_p;
1574 
1575 #ifdef DEBUG_QUEUING_LOCKS
1576  tail = *tail_id_p;
1577  TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail );
1578  if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1579 #endif
1580  KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */
1581 
1582  if ( head == -1 ) { /* nobody on queue */
1583 
1584  /* try (-1,0)->(0,0) */
1585  if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) {
1586  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1587  lck, gtid ));
1588 #ifdef DEBUG_QUEUING_LOCKS
1589  TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 );
1590 #endif
1591  return;
1592  }
1593  dequeued = FALSE;
1594 
1595  }
1596  else {
1597 
1598  tail = *tail_id_p;
1599  if ( head == tail ) { /* only one thread on the queue */
1600 
1601 #ifdef DEBUG_QUEUING_LOCKS
1602  if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1603 #endif
1604  KMP_DEBUG_ASSERT( head > 0 );
1605 
1606  /* try (h,h)->(-1,0) */
1607  dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p,
1608  KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) );
1609 #ifdef DEBUG_QUEUING_LOCKS
1610  TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" );
1611 #endif
1612 
1613  }
1614  else {
1615  volatile kmp_int32 *waiting_id_p;
1616  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1617  KMP_DEBUG_ASSERT( head_thr != NULL );
1618  waiting_id_p = & head_thr->th.th_next_waiting;
1619 
1620  /* Does this require synchronous reads? */
1621 #ifdef DEBUG_QUEUING_LOCKS
1622  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1623 #endif
1624  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1625 
1626  /* try (h,t)->(h',t) or (t,t) */
1627 
1628  KMP_MB();
1629  /* make sure enqueuing thread has time to update next waiting thread field */
1630  *head_id_p = (kmp_int32) KMP_WAIT_YIELD((volatile kmp_uint*) waiting_id_p, 0, KMP_NEQ, NULL);
1631 #ifdef DEBUG_QUEUING_LOCKS
1632  TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" );
1633 #endif
1634  dequeued = TRUE;
1635  }
1636  }
1637 
1638  if ( dequeued ) {
1639  kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 );
1640  KMP_DEBUG_ASSERT( head_thr != NULL );
1641 
1642  /* Does this require synchronous reads? */
1643 #ifdef DEBUG_QUEUING_LOCKS
1644  if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail );
1645 #endif
1646  KMP_DEBUG_ASSERT( head > 0 && tail > 0 );
1647 
1648  /* For clean code only.
1649  * Thread not released until next statement prevents race with acquire code.
1650  */
1651  head_thr->th.th_next_waiting = 0;
1652 #ifdef DEBUG_QUEUING_LOCKS
1653  TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head );
1654 #endif
1655 
1656  KMP_MB();
1657  /* reset spin value */
1658  head_thr->th.th_spin_here = FALSE;
1659 
1660  KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n",
1661  lck, gtid ));
1662 #ifdef DEBUG_QUEUING_LOCKS
1663  TRACE_LOCK( gtid+1, "rel exit 2" );
1664 #endif
1665  return;
1666  }
1667  /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */
1668 
1669 #ifdef DEBUG_QUEUING_LOCKS
1670  TRACE_LOCK( gtid+1, "rel retry" );
1671 #endif
1672 
1673  } /* while */
1674  KMP_ASSERT2( 0, "should not get here" );
1675 }
1676 
1677 static void
1678 __kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1679  kmp_int32 gtid )
1680 {
1681  if ( __kmp_env_consistency_check ) {
1682  char const * const func = "omp_unset_lock";
1683  KMP_MB(); /* in case another processor initialized lock */
1684  if ( lck->lk.initialized != lck ) {
1685  KMP_FATAL( LockIsUninitialized, func );
1686  }
1687  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1688  KMP_FATAL( LockNestableUsedAsSimple, func );
1689  }
1690  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1691  KMP_FATAL( LockUnsettingFree, func );
1692  }
1693  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1694  KMP_FATAL( LockUnsettingSetByAnother, func );
1695  }
1696  lck->lk.owner_id = 0;
1697  }
1698  __kmp_release_queuing_lock( lck, gtid );
1699 }
1700 
1701 void
1702 __kmp_init_queuing_lock( kmp_queuing_lock_t *lck )
1703 {
1704  lck->lk.location = NULL;
1705  lck->lk.head_id = 0;
1706  lck->lk.tail_id = 0;
1707  lck->lk.next_ticket = 0;
1708  lck->lk.now_serving = 0;
1709  lck->lk.owner_id = 0; // no thread owns the lock.
1710  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1711  lck->lk.initialized = lck;
1712 
1713  KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1714 }
1715 
1716 static void
1717 __kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1718 {
1719  __kmp_init_queuing_lock( lck );
1720 }
1721 
1722 void
1723 __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck )
1724 {
1725  lck->lk.initialized = NULL;
1726  lck->lk.location = NULL;
1727  lck->lk.head_id = 0;
1728  lck->lk.tail_id = 0;
1729  lck->lk.next_ticket = 0;
1730  lck->lk.now_serving = 0;
1731  lck->lk.owner_id = 0;
1732  lck->lk.depth_locked = -1;
1733 }
1734 
1735 static void
1736 __kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1737 {
1738  if ( __kmp_env_consistency_check ) {
1739  char const * const func = "omp_destroy_lock";
1740  if ( lck->lk.initialized != lck ) {
1741  KMP_FATAL( LockIsUninitialized, func );
1742  }
1743  if ( __kmp_is_queuing_lock_nestable( lck ) ) {
1744  KMP_FATAL( LockNestableUsedAsSimple, func );
1745  }
1746  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1747  KMP_FATAL( LockStillOwned, func );
1748  }
1749  }
1750  __kmp_destroy_queuing_lock( lck );
1751 }
1752 
1753 
1754 //
1755 // nested queuing locks
1756 //
1757 
1758 void
1759 __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1760 {
1761  KMP_DEBUG_ASSERT( gtid >= 0 );
1762 
1763  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1764  lck->lk.depth_locked += 1;
1765  }
1766  else {
1767  __kmp_acquire_queuing_lock_timed_template<false>( lck, gtid );
1768  KMP_MB();
1769  lck->lk.depth_locked = 1;
1770  KMP_MB();
1771  lck->lk.owner_id = gtid + 1;
1772  }
1773 }
1774 
1775 static void
1776 __kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1777 {
1778  if ( __kmp_env_consistency_check ) {
1779  char const * const func = "omp_set_nest_lock";
1780  if ( lck->lk.initialized != lck ) {
1781  KMP_FATAL( LockIsUninitialized, func );
1782  }
1783  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1784  KMP_FATAL( LockSimpleUsedAsNestable, func );
1785  }
1786  }
1787  __kmp_acquire_nested_queuing_lock( lck, gtid );
1788 }
1789 
1790 int
1791 __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1792 {
1793  int retval;
1794 
1795  KMP_DEBUG_ASSERT( gtid >= 0 );
1796 
1797  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
1798  retval = ++lck->lk.depth_locked;
1799  }
1800  else if ( !__kmp_test_queuing_lock( lck, gtid ) ) {
1801  retval = 0;
1802  }
1803  else {
1804  KMP_MB();
1805  retval = lck->lk.depth_locked = 1;
1806  KMP_MB();
1807  lck->lk.owner_id = gtid + 1;
1808  }
1809  return retval;
1810 }
1811 
1812 static int
1813 __kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck,
1814  kmp_int32 gtid )
1815 {
1816  if ( __kmp_env_consistency_check ) {
1817  char const * const func = "omp_test_nest_lock";
1818  if ( lck->lk.initialized != lck ) {
1819  KMP_FATAL( LockIsUninitialized, func );
1820  }
1821  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1822  KMP_FATAL( LockSimpleUsedAsNestable, func );
1823  }
1824  }
1825  return __kmp_test_nested_queuing_lock( lck, gtid );
1826 }
1827 
1828 void
1829 __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1830 {
1831  KMP_DEBUG_ASSERT( gtid >= 0 );
1832 
1833  KMP_MB();
1834  if ( --(lck->lk.depth_locked) == 0 ) {
1835  KMP_MB();
1836  lck->lk.owner_id = 0;
1837  __kmp_release_queuing_lock( lck, gtid );
1838  }
1839 }
1840 
1841 static void
1842 __kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
1843 {
1844  if ( __kmp_env_consistency_check ) {
1845  char const * const func = "omp_unset_nest_lock";
1846  KMP_MB(); /* in case another processor initialized lock */
1847  if ( lck->lk.initialized != lck ) {
1848  KMP_FATAL( LockIsUninitialized, func );
1849  }
1850  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1851  KMP_FATAL( LockSimpleUsedAsNestable, func );
1852  }
1853  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
1854  KMP_FATAL( LockUnsettingFree, func );
1855  }
1856  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
1857  KMP_FATAL( LockUnsettingSetByAnother, func );
1858  }
1859  }
1860  __kmp_release_nested_queuing_lock( lck, gtid );
1861 }
1862 
1863 void
1864 __kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck )
1865 {
1866  __kmp_init_queuing_lock( lck );
1867  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1868 }
1869 
1870 static void
1871 __kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck )
1872 {
1873  __kmp_init_nested_queuing_lock( lck );
1874 }
1875 
1876 void
1877 __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck )
1878 {
1879  __kmp_destroy_queuing_lock( lck );
1880  lck->lk.depth_locked = 0;
1881 }
1882 
1883 static void
1884 __kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck )
1885 {
1886  if ( __kmp_env_consistency_check ) {
1887  char const * const func = "omp_destroy_nest_lock";
1888  if ( lck->lk.initialized != lck ) {
1889  KMP_FATAL( LockIsUninitialized, func );
1890  }
1891  if ( ! __kmp_is_queuing_lock_nestable( lck ) ) {
1892  KMP_FATAL( LockSimpleUsedAsNestable, func );
1893  }
1894  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
1895  KMP_FATAL( LockStillOwned, func );
1896  }
1897  }
1898  __kmp_destroy_nested_queuing_lock( lck );
1899 }
1900 
1901 
1902 //
1903 // access functions to fields which don't exist for all lock kinds.
1904 //
1905 
1906 static int
1907 __kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck )
1908 {
1909  return lck == lck->lk.initialized;
1910 }
1911 
1912 static const ident_t *
1913 __kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck )
1914 {
1915  return lck->lk.location;
1916 }
1917 
1918 static void
1919 __kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc )
1920 {
1921  lck->lk.location = loc;
1922 }
1923 
1924 static kmp_lock_flags_t
1925 __kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck )
1926 {
1927  return lck->lk.flags;
1928 }
1929 
1930 static void
1931 __kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags )
1932 {
1933  lck->lk.flags = flags;
1934 }
1935 
1936 #if KMP_USE_ADAPTIVE_LOCKS
1937 
1938 /*
1939  RTM Adaptive locks
1940 */
1941 
1942 // TODO: Use the header for intrinsics below with the compiler 13.0
1943 //#include <immintrin.h>
1944 
1945 // Values from the status register after failed speculation.
1946 #define _XBEGIN_STARTED (~0u)
1947 #define _XABORT_EXPLICIT (1 << 0)
1948 #define _XABORT_RETRY (1 << 1)
1949 #define _XABORT_CONFLICT (1 << 2)
1950 #define _XABORT_CAPACITY (1 << 3)
1951 #define _XABORT_DEBUG (1 << 4)
1952 #define _XABORT_NESTED (1 << 5)
1953 #define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1954 
1955 // Aborts for which it's worth trying again immediately
1956 #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1957 
1958 #define STRINGIZE_INTERNAL(arg) #arg
1959 #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1960 
1961 // Access to RTM instructions
1962 
1963 /*
1964  A version of XBegin which returns -1 on speculation, and the value of EAX on an abort.
1965  This is the same definition as the compiler intrinsic that will be supported at some point.
1966 */
1967 static __inline int _xbegin()
1968 {
1969  int res = -1;
1970 
1971 #if KMP_OS_WINDOWS
1972 #if KMP_ARCH_X86_64
1973  _asm {
1974  _emit 0xC7
1975  _emit 0xF8
1976  _emit 2
1977  _emit 0
1978  _emit 0
1979  _emit 0
1980  jmp L2
1981  mov res, eax
1982  L2:
1983  }
1984 #else /* IA32 */
1985  _asm {
1986  _emit 0xC7
1987  _emit 0xF8
1988  _emit 2
1989  _emit 0
1990  _emit 0
1991  _emit 0
1992  jmp L2
1993  mov res, eax
1994  L2:
1995  }
1996 #endif // KMP_ARCH_X86_64
1997 #else
1998  /* Note that %eax must be noted as killed (clobbered), because
1999  * the XSR is returned in %eax(%rax) on abort. Other register
2000  * values are restored, so don't need to be killed.
2001  *
2002  * We must also mark 'res' as an input and an output, since otherwise
2003  * 'res=-1' may be dropped as being dead, whereas we do need the
2004  * assignment on the successful (i.e., non-abort) path.
2005  */
2006  __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n"
2007  " .long 1f-1b-6\n"
2008  " jmp 2f\n"
2009  "1: movl %%eax,%0\n"
2010  "2:"
2011  :"+r"(res)::"memory","%eax");
2012 #endif // KMP_OS_WINDOWS
2013  return res;
2014 }
2015 
2016 /*
2017  Transaction end
2018 */
2019 static __inline void _xend()
2020 {
2021 #if KMP_OS_WINDOWS
2022  __asm {
2023  _emit 0x0f
2024  _emit 0x01
2025  _emit 0xd5
2026  }
2027 #else
2028  __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory");
2029 #endif
2030 }
2031 
2032 /*
2033  This is a macro, the argument must be a single byte constant which
2034  can be evaluated by the inline assembler, since it is emitted as a
2035  byte into the assembly code.
2036 */
2037 #if KMP_OS_WINDOWS
2038 #define _xabort(ARG) \
2039  _asm _emit 0xc6 \
2040  _asm _emit 0xf8 \
2041  _asm _emit ARG
2042 #else
2043 #define _xabort(ARG) \
2044  __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory");
2045 #endif
2046 
2047 //
2048 // Statistics is collected for testing purpose
2049 //
2050 #if KMP_DEBUG_ADAPTIVE_LOCKS
2051 
2052 // We accumulate speculative lock statistics when the lock is destroyed.
2053 // We keep locks that haven't been destroyed in the liveLocks list
2054 // so that we can grab their statistics too.
2055 static kmp_adaptive_lock_statistics_t destroyedStats;
2056 
2057 // To hold the list of live locks.
2058 static kmp_adaptive_lock_t liveLocks;
2059 
2060 // A lock so we can safely update the list of locks.
2061 static kmp_bootstrap_lock_t chain_lock;
2062 
2063 // Initialize the list of stats.
2064 void
2065 __kmp_init_speculative_stats()
2066 {
2067  kmp_adaptive_lock *lck = &liveLocks;
2068 
2069  memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) );
2070  lck->stats.next = lck;
2071  lck->stats.prev = lck;
2072 
2073  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2074  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2075 
2076  __kmp_init_bootstrap_lock( &chain_lock );
2077 
2078 }
2079 
2080 // Insert the lock into the circular list
2081 static void
2082 __kmp_remember_lock( kmp_adaptive_lock * lck )
2083 {
2084  __kmp_acquire_bootstrap_lock( &chain_lock );
2085 
2086  lck->stats.next = liveLocks.stats.next;
2087  lck->stats.prev = &liveLocks;
2088 
2089  liveLocks.stats.next = lck;
2090  lck->stats.next->stats.prev = lck;
2091 
2092  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2093  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2094 
2095  __kmp_release_bootstrap_lock( &chain_lock );
2096 }
2097 
2098 static void
2099 __kmp_forget_lock( kmp_adaptive_lock * lck )
2100 {
2101  KMP_ASSERT( lck->stats.next->stats.prev == lck );
2102  KMP_ASSERT( lck->stats.prev->stats.next == lck );
2103 
2104  kmp_adaptive_lock * n = lck->stats.next;
2105  kmp_adaptive_lock * p = lck->stats.prev;
2106 
2107  n->stats.prev = p;
2108  p->stats.next = n;
2109 }
2110 
2111 static void
2112 __kmp_zero_speculative_stats( kmp_adaptive_lock * lck )
2113 {
2114  memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) );
2115  __kmp_remember_lock( lck );
2116 }
2117 
2118 static void
2119 __kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_t * lck )
2120 {
2121  kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
2122 
2123  t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
2124  t->successfulSpeculations += s->successfulSpeculations;
2125  t->hardFailedSpeculations += s->hardFailedSpeculations;
2126  t->softFailedSpeculations += s->softFailedSpeculations;
2127  t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
2128  t->lemmingYields += s->lemmingYields;
2129 }
2130 
2131 static void
2132 __kmp_accumulate_speculative_stats( kmp_adaptive_lock * lck)
2133 {
2134  kmp_adaptive_lock_statistics_t *t = &destroyedStats;
2135 
2136  __kmp_acquire_bootstrap_lock( &chain_lock );
2137 
2138  __kmp_add_stats( &destroyedStats, lck );
2139  __kmp_forget_lock( lck );
2140 
2141  __kmp_release_bootstrap_lock( &chain_lock );
2142 }
2143 
2144 static float
2145 percent (kmp_uint32 count, kmp_uint32 total)
2146 {
2147  return (total == 0) ? 0.0: (100.0 * count)/total;
2148 }
2149 
2150 static
2151 FILE * __kmp_open_stats_file()
2152 {
2153  if (strcmp (__kmp_speculative_statsfile, "-") == 0)
2154  return stdout;
2155 
2156  size_t buffLen = strlen( __kmp_speculative_statsfile ) + 20;
2157  char buffer[buffLen];
2158  snprintf (&buffer[0], buffLen, __kmp_speculative_statsfile, getpid());
2159  FILE * result = fopen(&buffer[0], "w");
2160 
2161  // Maybe we should issue a warning here...
2162  return result ? result : stdout;
2163 }
2164 
2165 void
2166 __kmp_print_speculative_stats()
2167 {
2168  if (__kmp_user_lock_kind != lk_adaptive)
2169  return;
2170 
2171  FILE * statsFile = __kmp_open_stats_file();
2172 
2173  kmp_adaptive_lock_statistics_t total = destroyedStats;
2174  kmp_adaptive_lock *lck;
2175 
2176  for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
2177  __kmp_add_stats( &total, lck );
2178  }
2179  kmp_adaptive_lock_statistics_t *t = &total;
2180  kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations;
2181  kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations +
2182  t->softFailedSpeculations;
2183 
2184  fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n");
2185  fprintf ( statsFile, " Lock parameters: \n"
2186  " max_soft_retries : %10d\n"
2187  " max_badness : %10d\n",
2188  __kmp_adaptive_backoff_params.max_soft_retries,
2189  __kmp_adaptive_backoff_params.max_badness);
2190  fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts );
2191  fprintf( statsFile, " Total critical sections : %10d\n", totalSections );
2192  fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n",
2193  t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) );
2194  fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
2195  t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) );
2196  fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields );
2197 
2198  fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations );
2199  fprintf( statsFile, " Successes : %10d (%5.1f%%)\n",
2200  t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) );
2201  fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n",
2202  t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) );
2203  fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n",
2204  t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) );
2205 
2206  if (statsFile != stdout)
2207  fclose( statsFile );
2208 }
2209 
2210 # define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ )
2211 #else
2212 # define KMP_INC_STAT(lck,stat)
2213 
2214 #endif // KMP_DEBUG_ADAPTIVE_LOCKS
2215 
2216 static inline bool
2217 __kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck )
2218 {
2219  // It is enough to check that the head_id is zero.
2220  // We don't also need to check the tail.
2221  bool res = lck->lk.head_id == 0;
2222 
2223  // We need a fence here, since we must ensure that no memory operations
2224  // from later in this thread float above that read.
2225 #if defined( __GNUC__ ) && !defined( __INTEL_COMPILER )
2226  __sync_synchronize();
2227 #else
2228  _mm_mfence();
2229 #endif
2230 
2231  return res;
2232 }
2233 
2234 // Functions for manipulating the badness
2235 static __inline void
2236 __kmp_update_badness_after_success( kmp_queuing_lock_t *lck )
2237 {
2238  // Reset the badness to zero so we eagerly try to speculate again
2239  lck->lk.adaptive.badness = 0;
2240  KMP_INC_STAT(lck,successfulSpeculations);
2241 }
2242 
2243 // Create a bit mask with one more set bit.
2244 static __inline void
2245 __kmp_step_badness( kmp_queuing_lock_t *lck )
2246 {
2247  kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1;
2248  if ( newBadness > lck->lk.adaptive.max_badness) {
2249  return;
2250  } else {
2251  lck->lk.adaptive.badness = newBadness;
2252  }
2253 }
2254 
2255 // Check whether speculation should be attempted.
2256 static __inline int
2257 __kmp_should_speculate( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2258 {
2259  kmp_uint32 badness = lck->lk.adaptive.badness;
2260  kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts;
2261  int res = (attempts & badness) == 0;
2262  return res;
2263 }
2264 
2265 // Attempt to acquire only the speculative lock.
2266 // Does not back off to the non-speculative lock.
2267 //
2268 static int
2269 __kmp_test_adaptive_lock_only( kmp_queuing_lock_t * lck, kmp_int32 gtid )
2270 {
2271  int retries = lck->lk.adaptive.max_soft_retries;
2272 
2273  // We don't explicitly count the start of speculation, rather we record
2274  // the results (success, hard fail, soft fail). The sum of all of those
2275  // is the total number of times we started speculation since all
2276  // speculations must end one of those ways.
2277  do
2278  {
2279  kmp_uint32 status = _xbegin();
2280  // Switch this in to disable actual speculation but exercise
2281  // at least some of the rest of the code. Useful for debugging...
2282  // kmp_uint32 status = _XABORT_NESTED;
2283 
2284  if (status == _XBEGIN_STARTED )
2285  { /* We have successfully started speculation
2286  * Check that no-one acquired the lock for real between when we last looked
2287  * and now. This also gets the lock cache line into our read-set,
2288  * which we need so that we'll abort if anyone later claims it for real.
2289  */
2290  if (! __kmp_is_unlocked_queuing_lock( lck ) )
2291  {
2292  // Lock is now visibly acquired, so someone beat us to it.
2293  // Abort the transaction so we'll restart from _xbegin with the
2294  // failure status.
2295  _xabort(0x01)
2296  KMP_ASSERT2( 0, "should not get here" );
2297  }
2298  return 1; // Lock has been acquired (speculatively)
2299  } else {
2300  // We have aborted, update the statistics
2301  if ( status & SOFT_ABORT_MASK)
2302  {
2303  KMP_INC_STAT(lck,softFailedSpeculations);
2304  // and loop round to retry.
2305  }
2306  else
2307  {
2308  KMP_INC_STAT(lck,hardFailedSpeculations);
2309  // Give up if we had a hard failure.
2310  break;
2311  }
2312  }
2313  } while( retries-- ); // Loop while we have retries, and didn't fail hard.
2314 
2315  // Either we had a hard failure or we didn't succeed softly after
2316  // the full set of attempts, so back off the badness.
2317  __kmp_step_badness( lck );
2318  return 0;
2319 }
2320 
2321 // Attempt to acquire the speculative lock, or back off to the non-speculative one
2322 // if the speculative lock cannot be acquired.
2323 // We can succeed speculatively, non-speculatively, or fail.
2324 static int
2325 __kmp_test_adaptive_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2326 {
2327  // First try to acquire the lock speculatively
2328  if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) )
2329  return 1;
2330 
2331  // Speculative acquisition failed, so try to acquire it non-speculatively.
2332  // Count the non-speculative acquire attempt
2333  lck->lk.adaptive.acquire_attempts++;
2334 
2335  // Use base, non-speculative lock.
2336  if ( __kmp_test_queuing_lock( lck, gtid ) )
2337  {
2338  KMP_INC_STAT(lck,nonSpeculativeAcquires);
2339  return 1; // Lock is acquired (non-speculatively)
2340  }
2341  else
2342  {
2343  return 0; // Failed to acquire the lock, it's already visibly locked.
2344  }
2345 }
2346 
2347 static int
2348 __kmp_test_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2349 {
2350  if ( __kmp_env_consistency_check ) {
2351  char const * const func = "omp_test_lock";
2352  if ( lck->lk.initialized != lck ) {
2353  KMP_FATAL( LockIsUninitialized, func );
2354  }
2355  }
2356 
2357  int retval = __kmp_test_adaptive_lock( lck, gtid );
2358 
2359  if ( __kmp_env_consistency_check && retval ) {
2360  lck->lk.owner_id = gtid + 1;
2361  }
2362  return retval;
2363 }
2364 
2365 // Block until we can acquire a speculative, adaptive lock.
2366 // We check whether we should be trying to speculate.
2367 // If we should be, we check the real lock to see if it is free,
2368 // and, if not, pause without attempting to acquire it until it is.
2369 // Then we try the speculative acquire.
2370 // This means that although we suffer from lemmings a little (
2371 // because all we can't acquire the lock speculatively until
2372 // the queue of threads waiting has cleared), we don't get into a
2373 // state where we can never acquire the lock speculatively (because we
2374 // force the queue to clear by preventing new arrivals from entering the
2375 // queue).
2376 // This does mean that when we're trying to break lemmings, the lock
2377 // is no longer fair. However OpenMP makes no guarantee that its
2378 // locks are fair, so this isn't a real problem.
2379 static void
2380 __kmp_acquire_adaptive_lock( kmp_queuing_lock_t * lck, kmp_int32 gtid )
2381 {
2382  if ( __kmp_should_speculate( lck, gtid ) )
2383  {
2384  if ( __kmp_is_unlocked_queuing_lock( lck ) )
2385  {
2386  if ( __kmp_test_adaptive_lock_only( lck , gtid ) )
2387  return;
2388  // We tried speculation and failed, so give up.
2389  }
2390  else
2391  {
2392  // We can't try speculation until the lock is free, so we
2393  // pause here (without suspending on the queueing lock,
2394  // to allow it to drain, then try again.
2395  // All other threads will also see the same result for
2396  // shouldSpeculate, so will be doing the same if they
2397  // try to claim the lock from now on.
2398  while ( ! __kmp_is_unlocked_queuing_lock( lck ) )
2399  {
2400  KMP_INC_STAT(lck,lemmingYields);
2401  __kmp_yield (TRUE);
2402  }
2403 
2404  if ( __kmp_test_adaptive_lock_only( lck, gtid ) )
2405  return;
2406  }
2407  }
2408 
2409  // Speculative acquisition failed, so acquire it non-speculatively.
2410  // Count the non-speculative acquire attempt
2411  lck->lk.adaptive.acquire_attempts++;
2412 
2413  __kmp_acquire_queuing_lock_timed_template<FALSE>( lck, gtid );
2414  // We have acquired the base lock, so count that.
2415  KMP_INC_STAT(lck,nonSpeculativeAcquires );
2416 }
2417 
2418 static void
2419 __kmp_acquire_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2420 {
2421  if ( __kmp_env_consistency_check ) {
2422  char const * const func = "omp_set_lock";
2423  if ( lck->lk.initialized != lck ) {
2424  KMP_FATAL( LockIsUninitialized, func );
2425  }
2426  if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) {
2427  KMP_FATAL( LockIsAlreadyOwned, func );
2428  }
2429  }
2430 
2431  __kmp_acquire_adaptive_lock( lck, gtid );
2432 
2433  if ( __kmp_env_consistency_check ) {
2434  lck->lk.owner_id = gtid + 1;
2435  }
2436 }
2437 
2438 static void
2439 __kmp_release_adaptive_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2440 {
2441  if ( __kmp_is_unlocked_queuing_lock( lck ) )
2442  { // If the lock doesn't look claimed we must be speculating.
2443  // (Or the user's code is buggy and they're releasing without locking;
2444  // if we had XTEST we'd be able to check that case...)
2445  _xend(); // Exit speculation
2446  __kmp_update_badness_after_success( lck );
2447  }
2448  else
2449  { // Since the lock *is* visibly locked we're not speculating,
2450  // so should use the underlying lock's release scheme.
2451  __kmp_release_queuing_lock( lck, gtid );
2452  }
2453 }
2454 
2455 static void
2456 __kmp_release_adaptive_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid )
2457 {
2458  if ( __kmp_env_consistency_check ) {
2459  char const * const func = "omp_unset_lock";
2460  KMP_MB(); /* in case another processor initialized lock */
2461  if ( lck->lk.initialized != lck ) {
2462  KMP_FATAL( LockIsUninitialized, func );
2463  }
2464  if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) {
2465  KMP_FATAL( LockUnsettingFree, func );
2466  }
2467  if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) {
2468  KMP_FATAL( LockUnsettingSetByAnother, func );
2469  }
2470  lck->lk.owner_id = 0;
2471  }
2472  __kmp_release_adaptive_lock( lck, gtid );
2473 }
2474 
2475 static void
2476 __kmp_init_adaptive_lock( kmp_queuing_lock_t *lck )
2477 {
2478  __kmp_init_queuing_lock( lck );
2479  lck->lk.adaptive.badness = 0;
2480  lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0;
2481  lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries;
2482  lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2483 #if KMP_DEBUG_ADAPTIVE_LOCKS
2484  __kmp_zero_speculative_stats( &lck->lk.adaptive );
2485 #endif
2486  KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2487 }
2488 
2489 static void
2490 __kmp_init_adaptive_lock_with_checks( kmp_queuing_lock_t * lck )
2491 {
2492  __kmp_init_adaptive_lock( lck );
2493 }
2494 
2495 static void
2496 __kmp_destroy_adaptive_lock( kmp_queuing_lock_t *lck )
2497 {
2498 #if KMP_DEBUG_ADAPTIVE_LOCKS
2499  __kmp_accumulate_speculative_stats( &lck->lk.adaptive );
2500 #endif
2501  __kmp_destroy_queuing_lock (lck);
2502  // Nothing needed for the speculative part.
2503 }
2504 
2505 static void
2506 __kmp_destroy_adaptive_lock_with_checks( kmp_queuing_lock_t *lck )
2507 {
2508  if ( __kmp_env_consistency_check ) {
2509  char const * const func = "omp_destroy_lock";
2510  if ( lck->lk.initialized != lck ) {
2511  KMP_FATAL( LockIsUninitialized, func );
2512  }
2513  if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) {
2514  KMP_FATAL( LockStillOwned, func );
2515  }
2516  }
2517  __kmp_destroy_adaptive_lock( lck );
2518 }
2519 
2520 
2521 #endif // KMP_USE_ADAPTIVE_LOCKS
2522 
2523 
2524 /* ------------------------------------------------------------------------ */
2525 /* DRDPA ticket locks */
2526 /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2527 
2528 static kmp_int32
2529 __kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck )
2530 {
2531  return TCR_4( lck->lk.owner_id ) - 1;
2532 }
2533 
2534 static inline bool
2535 __kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck )
2536 {
2537  return lck->lk.depth_locked != -1;
2538 }
2539 
2540 __forceinline static void
2541 __kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2542 {
2543  kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket);
2544  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2545  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2546  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2547  TCR_PTR(lck->lk.polls); // volatile load
2548 
2549 #ifdef USE_LOCK_PROFILE
2550  if (TCR_8(polls[ticket & mask].poll) != ticket)
2551  __kmp_printf("LOCK CONTENTION: %p\n", lck);
2552  /* else __kmp_printf( "." );*/
2553 #endif /* USE_LOCK_PROFILE */
2554 
2555  //
2556  // Now spin-wait, but reload the polls pointer and mask, in case the
2557  // polling area has been reconfigured. Unless it is reconfigured, the
2558  // reloads stay in L1 cache and are cheap.
2559  //
2560  // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.c !!!
2561  //
2562  // The current implementation of KMP_WAIT_YIELD doesn't allow for mask
2563  // and poll to be re-read every spin iteration.
2564  //
2565  kmp_uint32 spins;
2566 
2567  KMP_FSYNC_PREPARE(lck);
2568  KMP_INIT_YIELD(spins);
2569  while (TCR_8(polls[ticket & mask]).poll < ticket) { // volatile load
2570  __kmp_static_delay(TRUE);
2571 
2572  //
2573  // If we are oversubscribed,
2574  // or ahve waited a bit (and KMP_LIBRARY=turnaround), then yield.
2575  // CPU Pause is in the macros for yield.
2576  //
2577  KMP_YIELD(TCR_4(__kmp_nth)
2578  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
2579  KMP_YIELD_SPIN(spins);
2580 
2581  // Re-read the mask and the poll pointer from the lock structure.
2582  //
2583  // Make certain that "mask" is read before "polls" !!!
2584  //
2585  // If another thread picks reconfigures the polling area and updates
2586  // their values, and we get the new value of mask and the old polls
2587  // pointer, we could access memory beyond the end of the old polling
2588  // area.
2589  //
2590  mask = TCR_8(lck->lk.mask); // volatile load
2591  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2592  TCR_PTR(lck->lk.polls); // volatile load
2593  }
2594 
2595  //
2596  // Critical section starts here
2597  //
2598  KMP_FSYNC_ACQUIRED(lck);
2599  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2600  ticket, lck));
2601  lck->lk.now_serving = ticket; // non-volatile store
2602 
2603  //
2604  // Deallocate a garbage polling area if we know that we are the last
2605  // thread that could possibly access it.
2606  //
2607  // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2608  // ticket.
2609  //
2610  if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2611  __kmp_free((void *)lck->lk.old_polls);
2612  lck->lk.old_polls = NULL;
2613  lck->lk.cleanup_ticket = 0;
2614  }
2615 
2616  //
2617  // Check to see if we should reconfigure the polling area.
2618  // If there is still a garbage polling area to be deallocated from a
2619  // previous reconfiguration, let a later thread reconfigure it.
2620  //
2621  if (lck->lk.old_polls == NULL) {
2622  bool reconfigure = false;
2623  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls;
2624  kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2625 
2626  if (TCR_4(__kmp_nth)
2627  > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2628  //
2629  // We are in oversubscription mode. Contract the polling area
2630  // down to a single location, if that hasn't been done already.
2631  //
2632  if (num_polls > 1) {
2633  reconfigure = true;
2634  num_polls = TCR_4(lck->lk.num_polls);
2635  mask = 0;
2636  num_polls = 1;
2637  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2638  __kmp_allocate(num_polls * sizeof(*polls));
2639  polls[0].poll = ticket;
2640  }
2641  }
2642  else {
2643  //
2644  // We are in under/fully subscribed mode. Check the number of
2645  // threads waiting on the lock. The size of the polling area
2646  // should be at least the number of threads waiting.
2647  //
2648  kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2649  if (num_waiting > num_polls) {
2650  kmp_uint32 old_num_polls = num_polls;
2651  reconfigure = true;
2652  do {
2653  mask = (mask << 1) | 1;
2654  num_polls *= 2;
2655  } while (num_polls <= num_waiting);
2656 
2657  //
2658  // Allocate the new polling area, and copy the relevant portion
2659  // of the old polling area to the new area. __kmp_allocate()
2660  // zeroes the memory it allocates, and most of the old area is
2661  // just zero padding, so we only copy the release counters.
2662  //
2663  polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2664  __kmp_allocate(num_polls * sizeof(*polls));
2665  int i;
2666  for (i = 0; i < old_num_polls; i++) {
2667  polls[i].poll = old_polls[i].poll;
2668  }
2669  }
2670  }
2671 
2672  if (reconfigure) {
2673  //
2674  // Now write the updated fields back to the lock structure.
2675  //
2676  // Make certain that "polls" is written before "mask" !!!
2677  //
2678  // If another thread picks up the new value of mask and the old
2679  // polls pointer , it could access memory beyond the end of the
2680  // old polling area.
2681  //
2682  // On x86, we need memory fences.
2683  //
2684  KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n",
2685  ticket, lck, num_polls));
2686 
2687  lck->lk.old_polls = old_polls; // non-volatile store
2688  lck->lk.polls = polls; // volatile store
2689 
2690  KMP_MB();
2691 
2692  lck->lk.num_polls = num_polls; // non-volatile store
2693  lck->lk.mask = mask; // volatile store
2694 
2695  KMP_MB();
2696 
2697  //
2698  // Only after the new polling area and mask have been flushed
2699  // to main memory can we update the cleanup ticket field.
2700  //
2701  // volatile load / non-volatile store
2702  //
2703  lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket);
2704  }
2705  }
2706 }
2707 
2708 void
2709 __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2710 {
2711  __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2712 }
2713 
2714 static void
2715 __kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2716 {
2717  if ( __kmp_env_consistency_check ) {
2718  char const * const func = "omp_set_lock";
2719  if ( lck->lk.initialized != lck ) {
2720  KMP_FATAL( LockIsUninitialized, func );
2721  }
2722  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2723  KMP_FATAL( LockNestableUsedAsSimple, func );
2724  }
2725  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) {
2726  KMP_FATAL( LockIsAlreadyOwned, func );
2727  }
2728  }
2729 
2730  __kmp_acquire_drdpa_lock( lck, gtid );
2731 
2732  if ( __kmp_env_consistency_check ) {
2733  lck->lk.owner_id = gtid + 1;
2734  }
2735 }
2736 
2737 int
2738 __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2739 {
2740  //
2741  // First get a ticket, then read the polls pointer and the mask.
2742  // The polls pointer must be read before the mask!!! (See above)
2743  //
2744  kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load
2745  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2746  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2747  TCR_PTR(lck->lk.polls); // volatile load
2748  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2749  if (TCR_8(polls[ticket & mask].poll) == ticket) {
2750  kmp_uint64 next_ticket = ticket + 1;
2751  if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket,
2752  ticket, next_ticket)) {
2753  KMP_FSYNC_ACQUIRED(lck);
2754  KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2755  ticket, lck));
2756  lck->lk.now_serving = ticket; // non-volatile store
2757 
2758  //
2759  // Since no threads are waiting, there is no possiblity that
2760  // we would want to reconfigure the polling area. We might
2761  // have the cleanup ticket value (which says that it is now
2762  // safe to deallocate old_polls), but we'll let a later thread
2763  // which calls __kmp_acquire_lock do that - this routine
2764  // isn't supposed to block, and we would risk blocks if we
2765  // called __kmp_free() to do the deallocation.
2766  //
2767  return TRUE;
2768  }
2769  }
2770  return FALSE;
2771 }
2772 
2773 static int
2774 __kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2775 {
2776  if ( __kmp_env_consistency_check ) {
2777  char const * const func = "omp_test_lock";
2778  if ( lck->lk.initialized != lck ) {
2779  KMP_FATAL( LockIsUninitialized, func );
2780  }
2781  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2782  KMP_FATAL( LockNestableUsedAsSimple, func );
2783  }
2784  }
2785 
2786  int retval = __kmp_test_drdpa_lock( lck, gtid );
2787 
2788  if ( __kmp_env_consistency_check && retval ) {
2789  lck->lk.owner_id = gtid + 1;
2790  }
2791  return retval;
2792 }
2793 
2794 void
2795 __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2796 {
2797  //
2798  // Read the ticket value from the lock data struct, then the polls
2799  // pointer and the mask. The polls pointer must be read before the
2800  // mask!!! (See above)
2801  //
2802  kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load
2803  volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls
2804  = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2805  TCR_PTR(lck->lk.polls); // volatile load
2806  kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load
2807  KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2808  ticket - 1, lck));
2809  KMP_FSYNC_RELEASING(lck);
2810  TCW_8(polls[ticket & mask].poll, ticket); // volatile store
2811 }
2812 
2813 static void
2814 __kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2815 {
2816  if ( __kmp_env_consistency_check ) {
2817  char const * const func = "omp_unset_lock";
2818  KMP_MB(); /* in case another processor initialized lock */
2819  if ( lck->lk.initialized != lck ) {
2820  KMP_FATAL( LockIsUninitialized, func );
2821  }
2822  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2823  KMP_FATAL( LockNestableUsedAsSimple, func );
2824  }
2825  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
2826  KMP_FATAL( LockUnsettingFree, func );
2827  }
2828  if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 )
2829  && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) {
2830  KMP_FATAL( LockUnsettingSetByAnother, func );
2831  }
2832  lck->lk.owner_id = 0;
2833  }
2834  __kmp_release_drdpa_lock( lck, gtid );
2835 }
2836 
2837 void
2838 __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck )
2839 {
2840  lck->lk.location = NULL;
2841  lck->lk.mask = 0;
2842  lck->lk.num_polls = 1;
2843  lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)
2844  __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2845  lck->lk.cleanup_ticket = 0;
2846  lck->lk.old_polls = NULL;
2847  lck->lk.next_ticket = 0;
2848  lck->lk.now_serving = 0;
2849  lck->lk.owner_id = 0; // no thread owns the lock.
2850  lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2851  lck->lk.initialized = lck;
2852 
2853  KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2854 }
2855 
2856 static void
2857 __kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
2858 {
2859  __kmp_init_drdpa_lock( lck );
2860 }
2861 
2862 void
2863 __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck )
2864 {
2865  lck->lk.initialized = NULL;
2866  lck->lk.location = NULL;
2867  if (lck->lk.polls != NULL) {
2868  __kmp_free((void *)lck->lk.polls);
2869  lck->lk.polls = NULL;
2870  }
2871  if (lck->lk.old_polls != NULL) {
2872  __kmp_free((void *)lck->lk.old_polls);
2873  lck->lk.old_polls = NULL;
2874  }
2875  lck->lk.mask = 0;
2876  lck->lk.num_polls = 0;
2877  lck->lk.cleanup_ticket = 0;
2878  lck->lk.next_ticket = 0;
2879  lck->lk.now_serving = 0;
2880  lck->lk.owner_id = 0;
2881  lck->lk.depth_locked = -1;
2882 }
2883 
2884 static void
2885 __kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
2886 {
2887  if ( __kmp_env_consistency_check ) {
2888  char const * const func = "omp_destroy_lock";
2889  if ( lck->lk.initialized != lck ) {
2890  KMP_FATAL( LockIsUninitialized, func );
2891  }
2892  if ( __kmp_is_drdpa_lock_nestable( lck ) ) {
2893  KMP_FATAL( LockNestableUsedAsSimple, func );
2894  }
2895  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
2896  KMP_FATAL( LockStillOwned, func );
2897  }
2898  }
2899  __kmp_destroy_drdpa_lock( lck );
2900 }
2901 
2902 
2903 //
2904 // nested drdpa ticket locks
2905 //
2906 
2907 void
2908 __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2909 {
2910  KMP_DEBUG_ASSERT( gtid >= 0 );
2911 
2912  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2913  lck->lk.depth_locked += 1;
2914  }
2915  else {
2916  __kmp_acquire_drdpa_lock_timed_template( lck, gtid );
2917  KMP_MB();
2918  lck->lk.depth_locked = 1;
2919  KMP_MB();
2920  lck->lk.owner_id = gtid + 1;
2921  }
2922 }
2923 
2924 static void
2925 __kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2926 {
2927  if ( __kmp_env_consistency_check ) {
2928  char const * const func = "omp_set_nest_lock";
2929  if ( lck->lk.initialized != lck ) {
2930  KMP_FATAL( LockIsUninitialized, func );
2931  }
2932  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2933  KMP_FATAL( LockSimpleUsedAsNestable, func );
2934  }
2935  }
2936  __kmp_acquire_nested_drdpa_lock( lck, gtid );
2937 }
2938 
2939 int
2940 __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2941 {
2942  int retval;
2943 
2944  KMP_DEBUG_ASSERT( gtid >= 0 );
2945 
2946  if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) {
2947  retval = ++lck->lk.depth_locked;
2948  }
2949  else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) {
2950  retval = 0;
2951  }
2952  else {
2953  KMP_MB();
2954  retval = lck->lk.depth_locked = 1;
2955  KMP_MB();
2956  lck->lk.owner_id = gtid + 1;
2957  }
2958  return retval;
2959 }
2960 
2961 static int
2962 __kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2963 {
2964  if ( __kmp_env_consistency_check ) {
2965  char const * const func = "omp_test_nest_lock";
2966  if ( lck->lk.initialized != lck ) {
2967  KMP_FATAL( LockIsUninitialized, func );
2968  }
2969  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2970  KMP_FATAL( LockSimpleUsedAsNestable, func );
2971  }
2972  }
2973  return __kmp_test_nested_drdpa_lock( lck, gtid );
2974 }
2975 
2976 void
2977 __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2978 {
2979  KMP_DEBUG_ASSERT( gtid >= 0 );
2980 
2981  KMP_MB();
2982  if ( --(lck->lk.depth_locked) == 0 ) {
2983  KMP_MB();
2984  lck->lk.owner_id = 0;
2985  __kmp_release_drdpa_lock( lck, gtid );
2986  }
2987 }
2988 
2989 static void
2990 __kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid )
2991 {
2992  if ( __kmp_env_consistency_check ) {
2993  char const * const func = "omp_unset_nest_lock";
2994  KMP_MB(); /* in case another processor initialized lock */
2995  if ( lck->lk.initialized != lck ) {
2996  KMP_FATAL( LockIsUninitialized, func );
2997  }
2998  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
2999  KMP_FATAL( LockSimpleUsedAsNestable, func );
3000  }
3001  if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) {
3002  KMP_FATAL( LockUnsettingFree, func );
3003  }
3004  if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) {
3005  KMP_FATAL( LockUnsettingSetByAnother, func );
3006  }
3007  }
3008  __kmp_release_nested_drdpa_lock( lck, gtid );
3009 }
3010 
3011 void
3012 __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck )
3013 {
3014  __kmp_init_drdpa_lock( lck );
3015  lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
3016 }
3017 
3018 static void
3019 __kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck )
3020 {
3021  __kmp_init_nested_drdpa_lock( lck );
3022 }
3023 
3024 void
3025 __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck )
3026 {
3027  __kmp_destroy_drdpa_lock( lck );
3028  lck->lk.depth_locked = 0;
3029 }
3030 
3031 static void
3032 __kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck )
3033 {
3034  if ( __kmp_env_consistency_check ) {
3035  char const * const func = "omp_destroy_nest_lock";
3036  if ( lck->lk.initialized != lck ) {
3037  KMP_FATAL( LockIsUninitialized, func );
3038  }
3039  if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) {
3040  KMP_FATAL( LockSimpleUsedAsNestable, func );
3041  }
3042  if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) {
3043  KMP_FATAL( LockStillOwned, func );
3044  }
3045  }
3046  __kmp_destroy_nested_drdpa_lock( lck );
3047 }
3048 
3049 
3050 //
3051 // access functions to fields which don't exist for all lock kinds.
3052 //
3053 
3054 static int
3055 __kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck )
3056 {
3057  return lck == lck->lk.initialized;
3058 }
3059 
3060 static const ident_t *
3061 __kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck )
3062 {
3063  return lck->lk.location;
3064 }
3065 
3066 static void
3067 __kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc )
3068 {
3069  lck->lk.location = loc;
3070 }
3071 
3072 static kmp_lock_flags_t
3073 __kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck )
3074 {
3075  return lck->lk.flags;
3076 }
3077 
3078 static void
3079 __kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags )
3080 {
3081  lck->lk.flags = flags;
3082 }
3083 
3084 /* ------------------------------------------------------------------------ */
3085 /* user locks
3086  *
3087  * They are implemented as a table of function pointers which are set to the
3088  * lock functions of the appropriate kind, once that has been determined.
3089  */
3090 
3091 enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3092 
3093 size_t __kmp_base_user_lock_size = 0;
3094 size_t __kmp_user_lock_size = 0;
3095 
3096 kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL;
3097 void ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3098 
3099 int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3100 void ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3101 void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3102 void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL;
3103 void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3104 void ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3105 
3106 int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3107 void ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL;
3108 void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3109 void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL;
3110 
3111 int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL;
3112 const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL;
3113 void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL;
3114 kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL;
3115 void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL;
3116 
3117 void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind )
3118 {
3119  switch ( user_lock_kind ) {
3120  case lk_default:
3121  default:
3122  KMP_ASSERT( 0 );
3123 
3124  case lk_tas: {
3125  __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t );
3126  __kmp_user_lock_size = sizeof( kmp_tas_lock_t );
3127 
3128  __kmp_get_user_lock_owner_ =
3129  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3130  ( &__kmp_get_tas_lock_owner );
3131 
3132  __kmp_acquire_user_lock_with_checks_ =
3133  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3134  ( &__kmp_acquire_tas_lock_with_checks );
3135 
3136  __kmp_test_user_lock_with_checks_ =
3137  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3138  ( &__kmp_test_tas_lock_with_checks );
3139 
3140  __kmp_release_user_lock_with_checks_ =
3141  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3142  ( &__kmp_release_tas_lock_with_checks );
3143 
3144  __kmp_init_user_lock_with_checks_ =
3145  ( void ( * )( kmp_user_lock_p ) )
3146  ( &__kmp_init_tas_lock_with_checks );
3147 
3148  __kmp_destroy_user_lock_ =
3149  ( void ( * )( kmp_user_lock_p ) )
3150  ( &__kmp_destroy_tas_lock );
3151 
3152  __kmp_destroy_user_lock_with_checks_ =
3153  ( void ( * )( kmp_user_lock_p ) )
3154  ( &__kmp_destroy_tas_lock_with_checks );
3155 
3156  __kmp_acquire_nested_user_lock_with_checks_ =
3157  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3158  ( &__kmp_acquire_nested_tas_lock_with_checks );
3159 
3160  __kmp_test_nested_user_lock_with_checks_ =
3161  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3162  ( &__kmp_test_nested_tas_lock_with_checks );
3163 
3164  __kmp_release_nested_user_lock_with_checks_ =
3165  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3166  ( &__kmp_release_nested_tas_lock_with_checks );
3167 
3168  __kmp_init_nested_user_lock_with_checks_ =
3169  ( void ( * )( kmp_user_lock_p ) )
3170  ( &__kmp_init_nested_tas_lock_with_checks );
3171 
3172  __kmp_destroy_nested_user_lock_with_checks_ =
3173  ( void ( * )( kmp_user_lock_p ) )
3174  ( &__kmp_destroy_nested_tas_lock_with_checks );
3175 
3176  __kmp_is_user_lock_initialized_ =
3177  ( int ( * )( kmp_user_lock_p ) ) NULL;
3178 
3179  __kmp_get_user_lock_location_ =
3180  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3181 
3182  __kmp_set_user_lock_location_ =
3183  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3184 
3185  __kmp_get_user_lock_flags_ =
3186  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3187 
3188  __kmp_set_user_lock_flags_ =
3189  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3190  }
3191  break;
3192 
3193 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
3194 
3195  case lk_futex: {
3196  __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t );
3197  __kmp_user_lock_size = sizeof( kmp_futex_lock_t );
3198 
3199  __kmp_get_user_lock_owner_ =
3200  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3201  ( &__kmp_get_futex_lock_owner );
3202 
3203  __kmp_acquire_user_lock_with_checks_ =
3204  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3205  ( &__kmp_acquire_futex_lock_with_checks );
3206 
3207  __kmp_test_user_lock_with_checks_ =
3208  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3209  ( &__kmp_test_futex_lock_with_checks );
3210 
3211  __kmp_release_user_lock_with_checks_ =
3212  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3213  ( &__kmp_release_futex_lock_with_checks );
3214 
3215  __kmp_init_user_lock_with_checks_ =
3216  ( void ( * )( kmp_user_lock_p ) )
3217  ( &__kmp_init_futex_lock_with_checks );
3218 
3219  __kmp_destroy_user_lock_ =
3220  ( void ( * )( kmp_user_lock_p ) )
3221  ( &__kmp_destroy_futex_lock );
3222 
3223  __kmp_destroy_user_lock_with_checks_ =
3224  ( void ( * )( kmp_user_lock_p ) )
3225  ( &__kmp_destroy_futex_lock_with_checks );
3226 
3227  __kmp_acquire_nested_user_lock_with_checks_ =
3228  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3229  ( &__kmp_acquire_nested_futex_lock_with_checks );
3230 
3231  __kmp_test_nested_user_lock_with_checks_ =
3232  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3233  ( &__kmp_test_nested_futex_lock_with_checks );
3234 
3235  __kmp_release_nested_user_lock_with_checks_ =
3236  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3237  ( &__kmp_release_nested_futex_lock_with_checks );
3238 
3239  __kmp_init_nested_user_lock_with_checks_ =
3240  ( void ( * )( kmp_user_lock_p ) )
3241  ( &__kmp_init_nested_futex_lock_with_checks );
3242 
3243  __kmp_destroy_nested_user_lock_with_checks_ =
3244  ( void ( * )( kmp_user_lock_p ) )
3245  ( &__kmp_destroy_nested_futex_lock_with_checks );
3246 
3247  __kmp_is_user_lock_initialized_ =
3248  ( int ( * )( kmp_user_lock_p ) ) NULL;
3249 
3250  __kmp_get_user_lock_location_ =
3251  ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL;
3252 
3253  __kmp_set_user_lock_location_ =
3254  ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL;
3255 
3256  __kmp_get_user_lock_flags_ =
3257  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL;
3258 
3259  __kmp_set_user_lock_flags_ =
3260  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL;
3261  }
3262  break;
3263 
3264 #endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
3265 
3266  case lk_ticket: {
3267  __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t );
3268  __kmp_user_lock_size = sizeof( kmp_ticket_lock_t );
3269 
3270  __kmp_get_user_lock_owner_ =
3271  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3272  ( &__kmp_get_ticket_lock_owner );
3273 
3274  __kmp_acquire_user_lock_with_checks_ =
3275  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3276  ( &__kmp_acquire_ticket_lock_with_checks );
3277 
3278  __kmp_test_user_lock_with_checks_ =
3279  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3280  ( &__kmp_test_ticket_lock_with_checks );
3281 
3282  __kmp_release_user_lock_with_checks_ =
3283  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3284  ( &__kmp_release_ticket_lock_with_checks );
3285 
3286  __kmp_init_user_lock_with_checks_ =
3287  ( void ( * )( kmp_user_lock_p ) )
3288  ( &__kmp_init_ticket_lock_with_checks );
3289 
3290  __kmp_destroy_user_lock_ =
3291  ( void ( * )( kmp_user_lock_p ) )
3292  ( &__kmp_destroy_ticket_lock );
3293 
3294  __kmp_destroy_user_lock_with_checks_ =
3295  ( void ( * )( kmp_user_lock_p ) )
3296  ( &__kmp_destroy_ticket_lock_with_checks );
3297 
3298  __kmp_acquire_nested_user_lock_with_checks_ =
3299  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3300  ( &__kmp_acquire_nested_ticket_lock_with_checks );
3301 
3302  __kmp_test_nested_user_lock_with_checks_ =
3303  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3304  ( &__kmp_test_nested_ticket_lock_with_checks );
3305 
3306  __kmp_release_nested_user_lock_with_checks_ =
3307  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3308  ( &__kmp_release_nested_ticket_lock_with_checks );
3309 
3310  __kmp_init_nested_user_lock_with_checks_ =
3311  ( void ( * )( kmp_user_lock_p ) )
3312  ( &__kmp_init_nested_ticket_lock_with_checks );
3313 
3314  __kmp_destroy_nested_user_lock_with_checks_ =
3315  ( void ( * )( kmp_user_lock_p ) )
3316  ( &__kmp_destroy_nested_ticket_lock_with_checks );
3317 
3318  __kmp_is_user_lock_initialized_ =
3319  ( int ( * )( kmp_user_lock_p ) )
3320  ( &__kmp_is_ticket_lock_initialized );
3321 
3322  __kmp_get_user_lock_location_ =
3323  ( const ident_t * ( * )( kmp_user_lock_p ) )
3324  ( &__kmp_get_ticket_lock_location );
3325 
3326  __kmp_set_user_lock_location_ =
3327  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3328  ( &__kmp_set_ticket_lock_location );
3329 
3330  __kmp_get_user_lock_flags_ =
3331  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3332  ( &__kmp_get_ticket_lock_flags );
3333 
3334  __kmp_set_user_lock_flags_ =
3335  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3336  ( &__kmp_set_ticket_lock_flags );
3337  }
3338  break;
3339 
3340  case lk_queuing: {
3341  __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3342  __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3343 
3344  __kmp_get_user_lock_owner_ =
3345  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3346  ( &__kmp_get_queuing_lock_owner );
3347 
3348  __kmp_acquire_user_lock_with_checks_ =
3349  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3350  ( &__kmp_acquire_queuing_lock_with_checks );
3351 
3352  __kmp_test_user_lock_with_checks_ =
3353  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3354  ( &__kmp_test_queuing_lock_with_checks );
3355 
3356  __kmp_release_user_lock_with_checks_ =
3357  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3358  ( &__kmp_release_queuing_lock_with_checks );
3359 
3360  __kmp_init_user_lock_with_checks_ =
3361  ( void ( * )( kmp_user_lock_p ) )
3362  ( &__kmp_init_queuing_lock_with_checks );
3363 
3364  __kmp_destroy_user_lock_ =
3365  ( void ( * )( kmp_user_lock_p ) )
3366  ( &__kmp_destroy_queuing_lock );
3367 
3368  __kmp_destroy_user_lock_with_checks_ =
3369  ( void ( * )( kmp_user_lock_p ) )
3370  ( &__kmp_destroy_queuing_lock_with_checks );
3371 
3372  __kmp_acquire_nested_user_lock_with_checks_ =
3373  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3374  ( &__kmp_acquire_nested_queuing_lock_with_checks );
3375 
3376  __kmp_test_nested_user_lock_with_checks_ =
3377  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3378  ( &__kmp_test_nested_queuing_lock_with_checks );
3379 
3380  __kmp_release_nested_user_lock_with_checks_ =
3381  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3382  ( &__kmp_release_nested_queuing_lock_with_checks );
3383 
3384  __kmp_init_nested_user_lock_with_checks_ =
3385  ( void ( * )( kmp_user_lock_p ) )
3386  ( &__kmp_init_nested_queuing_lock_with_checks );
3387 
3388  __kmp_destroy_nested_user_lock_with_checks_ =
3389  ( void ( * )( kmp_user_lock_p ) )
3390  ( &__kmp_destroy_nested_queuing_lock_with_checks );
3391 
3392  __kmp_is_user_lock_initialized_ =
3393  ( int ( * )( kmp_user_lock_p ) )
3394  ( &__kmp_is_queuing_lock_initialized );
3395 
3396  __kmp_get_user_lock_location_ =
3397  ( const ident_t * ( * )( kmp_user_lock_p ) )
3398  ( &__kmp_get_queuing_lock_location );
3399 
3400  __kmp_set_user_lock_location_ =
3401  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3402  ( &__kmp_set_queuing_lock_location );
3403 
3404  __kmp_get_user_lock_flags_ =
3405  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3406  ( &__kmp_get_queuing_lock_flags );
3407 
3408  __kmp_set_user_lock_flags_ =
3409  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3410  ( &__kmp_set_queuing_lock_flags );
3411  }
3412  break;
3413 
3414 #if KMP_USE_ADAPTIVE_LOCKS
3415  case lk_adaptive: {
3416  __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t );
3417  __kmp_user_lock_size = sizeof( kmp_queuing_lock_t );
3418 
3419  __kmp_get_user_lock_owner_ =
3420  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3421  ( &__kmp_get_queuing_lock_owner );
3422 
3423  __kmp_acquire_user_lock_with_checks_ =
3424  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3425  ( &__kmp_acquire_adaptive_lock_with_checks );
3426 
3427  __kmp_test_user_lock_with_checks_ =
3428  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3429  ( &__kmp_test_adaptive_lock_with_checks );
3430 
3431  __kmp_release_user_lock_with_checks_ =
3432  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3433  ( &__kmp_release_adaptive_lock_with_checks );
3434 
3435  __kmp_init_user_lock_with_checks_ =
3436  ( void ( * )( kmp_user_lock_p ) )
3437  ( &__kmp_init_adaptive_lock_with_checks );
3438 
3439  __kmp_destroy_user_lock_with_checks_ =
3440  ( void ( * )( kmp_user_lock_p ) )
3441  ( &__kmp_destroy_adaptive_lock_with_checks );
3442 
3443  __kmp_destroy_user_lock_ =
3444  ( void ( * )( kmp_user_lock_p ) )
3445  ( &__kmp_destroy_adaptive_lock );
3446 
3447  __kmp_is_user_lock_initialized_ =
3448  ( int ( * )( kmp_user_lock_p ) )
3449  ( &__kmp_is_queuing_lock_initialized );
3450 
3451  __kmp_get_user_lock_location_ =
3452  ( const ident_t * ( * )( kmp_user_lock_p ) )
3453  ( &__kmp_get_queuing_lock_location );
3454 
3455  __kmp_set_user_lock_location_ =
3456  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3457  ( &__kmp_set_queuing_lock_location );
3458 
3459  __kmp_get_user_lock_flags_ =
3460  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3461  ( &__kmp_get_queuing_lock_flags );
3462 
3463  __kmp_set_user_lock_flags_ =
3464  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3465  ( &__kmp_set_queuing_lock_flags );
3466 
3467  }
3468  break;
3469 #endif // KMP_USE_ADAPTIVE_LOCKS
3470 
3471  case lk_drdpa: {
3472  __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t );
3473  __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t );
3474 
3475  __kmp_get_user_lock_owner_ =
3476  ( kmp_int32 ( * )( kmp_user_lock_p ) )
3477  ( &__kmp_get_drdpa_lock_owner );
3478 
3479  __kmp_acquire_user_lock_with_checks_ =
3480  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3481  ( &__kmp_acquire_drdpa_lock_with_checks );
3482 
3483  __kmp_test_user_lock_with_checks_ =
3484  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3485  ( &__kmp_test_drdpa_lock_with_checks );
3486 
3487  __kmp_release_user_lock_with_checks_ =
3488  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3489  ( &__kmp_release_drdpa_lock_with_checks );
3490 
3491  __kmp_init_user_lock_with_checks_ =
3492  ( void ( * )( kmp_user_lock_p ) )
3493  ( &__kmp_init_drdpa_lock_with_checks );
3494 
3495  __kmp_destroy_user_lock_ =
3496  ( void ( * )( kmp_user_lock_p ) )
3497  ( &__kmp_destroy_drdpa_lock );
3498 
3499  __kmp_destroy_user_lock_with_checks_ =
3500  ( void ( * )( kmp_user_lock_p ) )
3501  ( &__kmp_destroy_drdpa_lock_with_checks );
3502 
3503  __kmp_acquire_nested_user_lock_with_checks_ =
3504  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3505  ( &__kmp_acquire_nested_drdpa_lock_with_checks );
3506 
3507  __kmp_test_nested_user_lock_with_checks_ =
3508  ( int ( * )( kmp_user_lock_p, kmp_int32 ) )
3509  ( &__kmp_test_nested_drdpa_lock_with_checks );
3510 
3511  __kmp_release_nested_user_lock_with_checks_ =
3512  ( void ( * )( kmp_user_lock_p, kmp_int32 ) )
3513  ( &__kmp_release_nested_drdpa_lock_with_checks );
3514 
3515  __kmp_init_nested_user_lock_with_checks_ =
3516  ( void ( * )( kmp_user_lock_p ) )
3517  ( &__kmp_init_nested_drdpa_lock_with_checks );
3518 
3519  __kmp_destroy_nested_user_lock_with_checks_ =
3520  ( void ( * )( kmp_user_lock_p ) )
3521  ( &__kmp_destroy_nested_drdpa_lock_with_checks );
3522 
3523  __kmp_is_user_lock_initialized_ =
3524  ( int ( * )( kmp_user_lock_p ) )
3525  ( &__kmp_is_drdpa_lock_initialized );
3526 
3527  __kmp_get_user_lock_location_ =
3528  ( const ident_t * ( * )( kmp_user_lock_p ) )
3529  ( &__kmp_get_drdpa_lock_location );
3530 
3531  __kmp_set_user_lock_location_ =
3532  ( void ( * )( kmp_user_lock_p, const ident_t * ) )
3533  ( &__kmp_set_drdpa_lock_location );
3534 
3535  __kmp_get_user_lock_flags_ =
3536  ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) )
3537  ( &__kmp_get_drdpa_lock_flags );
3538 
3539  __kmp_set_user_lock_flags_ =
3540  ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) )
3541  ( &__kmp_set_drdpa_lock_flags );
3542  }
3543  break;
3544  }
3545 }
3546 
3547 
3548 // ----------------------------------------------------------------------------
3549 // User lock table & lock allocation
3550 
3551 kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL };
3552 kmp_user_lock_p __kmp_lock_pool = NULL;
3553 
3554 // Lock block-allocation support.
3555 kmp_block_of_locks* __kmp_lock_blocks = NULL;
3556 int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3557 
3558 static kmp_lock_index_t
3559 __kmp_lock_table_insert( kmp_user_lock_p lck )
3560 {
3561  // Assume that kmp_global_lock is held upon entry/exit.
3562  kmp_lock_index_t index;
3563  if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) {
3564  kmp_lock_index_t size;
3565  kmp_user_lock_p *table;
3566  kmp_lock_index_t i;
3567  // Reallocate lock table.
3568  if ( __kmp_user_lock_table.allocated == 0 ) {
3569  size = 1024;
3570  }
3571  else {
3572  size = __kmp_user_lock_table.allocated * 2;
3573  }
3574  table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size );
3575  memcpy( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) );
3576  table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3577  // We cannot free the previos table now, sinse it may be in use by other
3578  // threads. So save the pointer to the previous table in in the first element of the
3579  // new table. All the tables will be organized into a list, and could be freed when
3580  // library shutting down.
3581  __kmp_user_lock_table.table = table;
3582  __kmp_user_lock_table.allocated = size;
3583  }
3584  KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated );
3585  index = __kmp_user_lock_table.used;
3586  __kmp_user_lock_table.table[ index ] = lck;
3587  ++ __kmp_user_lock_table.used;
3588  return index;
3589 }
3590 
3591 static kmp_user_lock_p
3592 __kmp_lock_block_allocate()
3593 {
3594  // Assume that kmp_global_lock is held upon entry/exit.
3595  static int last_index = 0;
3596  if ( ( last_index >= __kmp_num_locks_in_block )
3597  || ( __kmp_lock_blocks == NULL ) ) {
3598  // Restart the index.
3599  last_index = 0;
3600  // Need to allocate a new block.
3601  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3602  size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3603  char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) );
3604  // Set up the new block.
3605  kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]);
3606  new_block->next_block = __kmp_lock_blocks;
3607  new_block->locks = (void *)buffer;
3608  // Publish the new block.
3609  KMP_MB();
3610  __kmp_lock_blocks = new_block;
3611  }
3612  kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) )
3613  [ last_index * __kmp_user_lock_size ] ) );
3614  last_index++;
3615  return ret;
3616 }
3617 
3618 //
3619 // Get memory for a lock. It may be freshly allocated memory or reused memory
3620 // from lock pool.
3621 //
3622 kmp_user_lock_p
3623 __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid,
3624  kmp_lock_flags_t flags )
3625 {
3626  kmp_user_lock_p lck;
3627  kmp_lock_index_t index;
3628  KMP_DEBUG_ASSERT( user_lock );
3629 
3630  __kmp_acquire_lock( &__kmp_global_lock, gtid );
3631 
3632  if ( __kmp_lock_pool == NULL ) {
3633  // Lock pool is empty. Allocate new memory.
3634  if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point.
3635  lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size );
3636  }
3637  else {
3638  lck = __kmp_lock_block_allocate();
3639  }
3640 
3641  // Insert lock in the table so that it can be freed in __kmp_cleanup,
3642  // and debugger has info on all allocated locks.
3643  index = __kmp_lock_table_insert( lck );
3644  }
3645  else {
3646  // Pick up lock from pool.
3647  lck = __kmp_lock_pool;
3648  index = __kmp_lock_pool->pool.index;
3649  __kmp_lock_pool = __kmp_lock_pool->pool.next;
3650  }
3651 
3652  //
3653  // We could potentially differentiate between nested and regular locks
3654  // here, and do the lock table lookup for regular locks only.
3655  //
3656  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3657  * ( (kmp_lock_index_t *) user_lock ) = index;
3658  }
3659  else {
3660  * ( (kmp_user_lock_p *) user_lock ) = lck;
3661  }
3662 
3663  // mark the lock if it is critical section lock.
3664  __kmp_set_user_lock_flags( lck, flags );
3665 
3666  __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper
3667 
3668  return lck;
3669 }
3670 
3671 // Put lock's memory to pool for reusing.
3672 void
3673 __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck )
3674 {
3675  kmp_lock_pool_t * lock_pool;
3676 
3677  KMP_DEBUG_ASSERT( user_lock != NULL );
3678  KMP_DEBUG_ASSERT( lck != NULL );
3679 
3680  __kmp_acquire_lock( & __kmp_global_lock, gtid );
3681 
3682  lck->pool.next = __kmp_lock_pool;
3683  __kmp_lock_pool = lck;
3684  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3685  kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock );
3686  KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used );
3687  lck->pool.index = index;
3688  }
3689 
3690  __kmp_release_lock( & __kmp_global_lock, gtid );
3691 }
3692 
3693 kmp_user_lock_p
3694 __kmp_lookup_user_lock( void **user_lock, char const *func )
3695 {
3696  kmp_user_lock_p lck = NULL;
3697 
3698  if ( __kmp_env_consistency_check ) {
3699  if ( user_lock == NULL ) {
3700  KMP_FATAL( LockIsUninitialized, func );
3701  }
3702  }
3703 
3704  if ( OMP_LOCK_T_SIZE < sizeof(void *) ) {
3705  kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock );
3706  if ( __kmp_env_consistency_check ) {
3707  if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) {
3708  KMP_FATAL( LockIsUninitialized, func );
3709  }
3710  }
3711  KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used );
3712  KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 );
3713  lck = __kmp_user_lock_table.table[index];
3714  }
3715  else {
3716  lck = *( (kmp_user_lock_p *)user_lock );
3717  }
3718 
3719  if ( __kmp_env_consistency_check ) {
3720  if ( lck == NULL ) {
3721  KMP_FATAL( LockIsUninitialized, func );
3722  }
3723  }
3724 
3725  return lck;
3726 }
3727 
3728 void
3729 __kmp_cleanup_user_locks( void )
3730 {
3731  //
3732  // Reset lock pool. Do not worry about lock in the pool -- we will free
3733  // them when iterating through lock table (it includes all the locks,
3734  // dead or alive).
3735  //
3736  __kmp_lock_pool = NULL;
3737 
3738 #define IS_CRITICAL(lck) \
3739  ( ( __kmp_get_user_lock_flags_ != NULL ) && \
3740  ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) )
3741 
3742  //
3743  // Loop through lock table, free all locks.
3744  //
3745  // Do not free item [0], it is reserved for lock tables list.
3746  //
3747  // FIXME - we are iterating through a list of (pointers to) objects of
3748  // type union kmp_user_lock, but we have no way of knowing whether the
3749  // base type is currently "pool" or whatever the global user lock type
3750  // is.
3751  //
3752  // We are relying on the fact that for all of the user lock types
3753  // (except "tas"), the first field in the lock struct is the "initialized"
3754  // field, which is set to the address of the lock object itself when
3755  // the lock is initialized. When the union is of type "pool", the
3756  // first field is a pointer to the next object in the free list, which
3757  // will not be the same address as the object itself.
3758  //
3759  // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck )
3760  // will fail for "pool" objects on the free list. This must happen as
3761  // the "location" field of real user locks overlaps the "index" field
3762  // of "pool" objects.
3763  //
3764  // It would be better to run through the free list, and remove all "pool"
3765  // objects from the lock table before executing this loop. However,
3766  // "pool" objects do not always have their index field set (only on
3767  // lin_32e), and I don't want to search the lock table for the address
3768  // of every "pool" object on the free list.
3769  //
3770  while ( __kmp_user_lock_table.used > 1 ) {
3771  const ident *loc;
3772 
3773  //
3774  // reduce __kmp_user_lock_table.used before freeing the lock,
3775  // so that state of locks is consistent
3776  //
3777  kmp_user_lock_p lck = __kmp_user_lock_table.table[
3778  --__kmp_user_lock_table.used ];
3779 
3780  if ( ( __kmp_is_user_lock_initialized_ != NULL ) &&
3781  ( *__kmp_is_user_lock_initialized_ )( lck ) ) {
3782  //
3783  // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is
3784  // initialized AND it is NOT a critical section (user is not
3785  // responsible for destroying criticals) AND we know source
3786  // location to report.
3787  //
3788  if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) &&
3789  ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) &&
3790  ( loc->psource != NULL ) ) {
3791  kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 );
3792  KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.func,
3793  str_loc.line, str_loc.col );
3794  __kmp_str_loc_free( &str_loc);
3795  }
3796 
3797 #ifdef KMP_DEBUG
3798  if ( IS_CRITICAL( lck ) ) {
3799  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) );
3800  }
3801  else {
3802  KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) );
3803  }
3804 #endif // KMP_DEBUG
3805 
3806  //
3807  // Cleanup internal lock dynamic resources
3808  // (for drdpa locks particularly).
3809  //
3810  __kmp_destroy_user_lock( lck );
3811  }
3812 
3813  //
3814  // Free the lock if block allocation of locks is not used.
3815  //
3816  if ( __kmp_lock_blocks == NULL ) {
3817  __kmp_free( lck );
3818  }
3819  }
3820 
3821 #undef IS_CRITICAL
3822 
3823  //
3824  // delete lock table(s).
3825  //
3826  kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
3827  __kmp_user_lock_table.table = NULL;
3828  __kmp_user_lock_table.allocated = 0;
3829 
3830  while ( table_ptr != NULL ) {
3831  //
3832  // In the first element we saved the pointer to the previous
3833  // (smaller) lock table.
3834  //
3835  kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] );
3836  __kmp_free( table_ptr );
3837  table_ptr = next;
3838  }
3839 
3840  //
3841  // Free buffers allocated for blocks of locks.
3842  //
3843  kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
3844  __kmp_lock_blocks = NULL;
3845 
3846  while ( block_ptr != NULL ) {
3847  kmp_block_of_locks_t *next = block_ptr->next_block;
3848  __kmp_free( block_ptr->locks );
3849  //
3850  // *block_ptr itself was allocated at the end of the locks vector.
3851  //
3852  block_ptr = next;
3853  }
3854 
3855  TCW_4(__kmp_init_user_locks, FALSE);
3856 }
3857