Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_csupport.c
1 /*
2  * kmp_csupport.c -- kfront linkage support for OpenMP.
3  * $Revision: 42489 $
4  * $Date: 2013-07-08 11:00:09 -0500 (Mon, 08 Jul 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2013 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include "omp.h" /* extern "C" declarations of user-visible routines */
38 #include "kmp.h"
39 #include "kmp_i18n.h"
40 #include "kmp_itt.h"
41 #include "kmp_error.h"
42 
43 #define MAX_MESSAGE 512
44 
45 /* ------------------------------------------------------------------------ */
46 /* ------------------------------------------------------------------------ */
47 
48 /* flags will be used in future, e.g., to implement */
49 /* openmp_strict library restrictions */
50 
60 void
61 __kmpc_begin(ident_t *loc, kmp_int32 flags)
62 {
63  // By default __kmp_ignore_mppbeg() returns TRUE.
64  if (__kmp_ignore_mppbeg() == FALSE) {
65  __kmp_internal_begin();
66 
67  KC_TRACE( 10, ("__kmpc_begin: called\n" ) );
68  }
69 }
70 
78 void
80 {
81  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op.
82  // However, this can be overridden with KMP_IGNORE_MPPEND environment variable.
83  // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end()
84  // will unregister this root (it can cause library shut down).
85  if (__kmp_ignore_mppend() == FALSE) {
86  KC_TRACE( 10, ("__kmpc_end: called\n" ) );
87  KA_TRACE( 30, ("__kmpc_end\n" ));
88 
89  __kmp_internal_end_thread( -1 );
90  }
91 }
92 
112 kmp_int32
114 {
115  kmp_int32 gtid = __kmp_entry_gtid();
116 
117  KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) );
118 
119  return gtid;
120 }
121 
135 kmp_int32
137 {
138  KC_TRACE( 10, ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_nth ) );
139 
140  return TCR_4(__kmp_nth);
141 }
142 
149 kmp_int32
151 {
152  KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) );
153  return __kmp_tid_from_gtid( __kmp_entry_gtid() );
154 }
155 
161 kmp_int32
163 {
164  KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) );
165 
166  return __kmp_entry_thread() -> th.th_team -> t.t_nproc;
167 }
168 
175 kmp_int32
177 {
178 #ifndef KMP_DEBUG
179 
180  return TRUE;
181 
182 #else
183 
184  const char *semi2;
185  const char *semi3;
186  int line_no;
187 
188  if (__kmp_par_range == 0) {
189  return TRUE;
190  }
191  semi2 = loc->psource;
192  if (semi2 == NULL) {
193  return TRUE;
194  }
195  semi2 = strchr(semi2, ';');
196  if (semi2 == NULL) {
197  return TRUE;
198  }
199  semi2 = strchr(semi2 + 1, ';');
200  if (semi2 == NULL) {
201  return TRUE;
202  }
203  if (__kmp_par_range_filename[0]) {
204  const char *name = semi2 - 1;
205  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
206  name--;
207  }
208  if ((*name == '/') || (*name == ';')) {
209  name++;
210  }
211  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
212  return __kmp_par_range < 0;
213  }
214  }
215  semi3 = strchr(semi2 + 1, ';');
216  if (__kmp_par_range_routine[0]) {
217  if ((semi3 != NULL) && (semi3 > semi2)
218  && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
219  return __kmp_par_range < 0;
220  }
221  }
222  if (sscanf(semi3 + 1, "%d", &line_no) == 1) {
223  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
224  return __kmp_par_range > 0;
225  }
226  return __kmp_par_range < 0;
227  }
228  return TRUE;
229 
230 #endif /* KMP_DEBUG */
231 
232 }
233 
239 kmp_int32
241 {
242  return __kmp_entry_thread() -> th.th_root -> r.r_active;
243 }
244 
254 void
255 __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads )
256 {
257  KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
258  global_tid, num_threads ) );
259 
260  __kmp_push_num_threads( loc, global_tid, num_threads );
261 }
262 
263 void
264 __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid )
265 {
266  KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) );
267 
268  /* the num_threads are automatically popped */
269 }
270 
271 
272 #if OMP_40_ENABLED
273 
274 void
275 __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind )
276 {
277  KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n",
278  global_tid, proc_bind ) );
279 
280  __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind );
281 }
282 
283 #endif /* OMP_40_ENABLED */
284 
285 
295 void
296 __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
297 {
298  int gtid = __kmp_entry_gtid();
299  // maybe to save thr_state is enough here
300  {
301  va_list ap;
302  va_start( ap, microtask );
303 
304  __kmp_fork_call( loc, gtid, TRUE,
305  argc,
306  VOLATILE_CAST(microtask_t) microtask,
307  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
308 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
309 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
310  &ap
311 #else
312  ap
313 #endif
314  );
315  __kmp_join_call( loc, gtid );
316 
317  va_end( ap );
318  }
319 }
320 
321 #if OMP_40_ENABLED
322 
332 void
333 __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads )
334 {
335  KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
336  global_tid, num_teams, num_threads ) );
337 
338  __kmp_push_num_teams( loc, global_tid, num_teams, num_threads );
339 }
340 
350 void
351 __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
352 {
353  int gtid = __kmp_entry_gtid();
354  kmp_info_t *this_thr = __kmp_threads[ gtid ];
355  va_list ap;
356  va_start( ap, microtask );
357 
358  // remember teams entry point and nesting level
359  this_thr->th.th_team_microtask = microtask;
360  this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host
361 
362  // check if __kmpc_push_num_teams called, set default number of teams otherwise
363  if ( this_thr->th.th_set_nth_teams == 0 ) {
364  __kmp_push_num_teams( loc, gtid, 0, 0 );
365  }
366  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
367  KMP_DEBUG_ASSERT(this_thr->th.th_set_nth_teams >= 1);
368 
369  __kmp_fork_call( loc, gtid, TRUE,
370  argc,
371  VOLATILE_CAST(microtask_t) __kmp_teams_master,
372  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
373 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
374  &ap
375 #else
376  ap
377 #endif
378  );
379  __kmp_join_call( loc, gtid );
380  this_thr->th.th_team_microtask = NULL;
381  this_thr->th.th_teams_level = 0;
382 
383  va_end( ap );
384 }
385 #endif /* OMP_40_ENABLED */
386 
387 
388 //
389 // I don't think this function should ever have been exported.
390 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
391 // openmp code ever called it, but it's been exported from the RTL for so
392 // long that I'm afraid to remove the definition.
393 //
394 int
395 __kmpc_invoke_task_func( int gtid )
396 {
397  return __kmp_invoke_task_func( gtid );
398 }
399 
412 void
413 __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
414 {
415  kmp_info_t *this_thr;
416  kmp_team_t *serial_team;
417 
418  KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) );
419 
420  /* Skip all this code for autopar serialized loops since it results in
421  unacceptable overhead */
422  if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
423  return;
424 
425  if( ! TCR_4( __kmp_init_parallel ) )
426  __kmp_parallel_initialize();
427 
428  this_thr = __kmp_threads[ global_tid ];
429  serial_team = this_thr -> th.th_serial_team;
430 
431  /* utilize the serialized team held by this thread */
432  KMP_DEBUG_ASSERT( serial_team );
433  KMP_MB();
434 
435 #if OMP_30_ENABLED
436  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
437  KMP_DEBUG_ASSERT( this_thr -> th.th_task_team == this_thr -> th.th_team -> t.t_task_team );
438  KMP_DEBUG_ASSERT( serial_team -> t.t_task_team == NULL );
439  KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n",
440  global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
441  this_thr -> th.th_task_team = NULL;
442  }
443 #endif // OMP_30_ENABLED
444 
445 #if OMP_40_ENABLED
446  kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
447  if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
448  proc_bind = proc_bind_false;
449  }
450  else if ( proc_bind == proc_bind_default ) {
451  //
452  // No proc_bind clause was specified, so use the current value
453  // of proc-bind-var for this parallel region.
454  //
455  proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
456  }
457  //
458  // Reset for next parallel region
459  //
460  this_thr->th.th_set_proc_bind = proc_bind_default;
461 #endif /* OMP_3_ENABLED */
462 
463  if( this_thr -> th.th_team != serial_team ) {
464 #if OMP_30_ENABLED
465  // Nested level will be an index in the nested nthreads array
466  int level = this_thr->th.th_team->t.t_level;
467 #endif
468  if( serial_team -> t.t_serialized ) {
469  /* this serial team was already used
470  * TODO increase performance by making this locks more specific */
471  kmp_team_t *new_team;
472  int tid = this_thr->th.th_info.ds.ds_tid;
473 
474  __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
475 
476  new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
477 #if OMP_40_ENABLED
478  proc_bind,
479 #endif
480 #if OMP_30_ENABLED
481  & this_thr->th.th_current_task->td_icvs,
482 #else
483  this_thr->th.th_team->t.t_set_nproc[tid],
484  this_thr->th.th_team->t.t_set_dynamic[tid],
485  this_thr->th.th_team->t.t_set_nested[tid],
486  this_thr->th.th_team->t.t_set_blocktime[tid],
487  this_thr->th.th_team->t.t_set_bt_intervals[tid],
488  this_thr->th.th_team->t.t_set_bt_set[tid],
489 #endif // OMP_30_ENABLED
490  0);
491  __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
492  KMP_ASSERT( new_team );
493 
494  /* setup new serialized team and install it */
495  new_team -> t.t_threads[0] = this_thr;
496  new_team -> t.t_parent = this_thr -> th.th_team;
497  serial_team = new_team;
498  this_thr -> th.th_serial_team = serial_team;
499 
500  KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
501  global_tid, serial_team ) );
502 
503 
504  /* TODO the above breaks the requirement that if we run out of
505  * resources, then we can still guarantee that serialized teams
506  * are ok, since we may need to allocate a new one */
507  } else {
508  KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
509  global_tid, serial_team ) );
510  }
511 
512  /* we have to initialize this serial team */
513  KMP_DEBUG_ASSERT( serial_team->t.t_threads );
514  KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr );
515  KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team );
516  serial_team -> t.t_ident = loc;
517  serial_team -> t.t_serialized = 1;
518  serial_team -> t.t_nproc = 1;
519  serial_team -> t.t_parent = this_thr->th.th_team;
520 #if OMP_30_ENABLED
521  serial_team -> t.t_sched = this_thr->th.th_team->t.t_sched;
522 #endif // OMP_30_ENABLED
523  this_thr -> th.th_team = serial_team;
524  serial_team -> t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
525 
526 #if OMP_30_ENABLED
527  KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n",
528  global_tid, this_thr->th.th_current_task ) );
529  KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 );
530  this_thr->th.th_current_task->td_flags.executing = 0;
531 
532  __kmp_push_current_task_to_thread( this_thr, serial_team, 0 );
533 
534  /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for
535  each serialized task represented by team->t.t_serialized? */
536  copy_icvs(
537  & this_thr->th.th_current_task->td_icvs,
538  & this_thr->th.th_current_task->td_parent->td_icvs );
539 
540  // Thread value exists in the nested nthreads array for the next nested level
541  if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
542  this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
543  }
544 
545 #if OMP_40_ENABLED
546  if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) {
547  this_thr->th.th_current_task->td_icvs.proc_bind
548  = __kmp_nested_proc_bind.bind_types[ level + 1 ];
549  }
550 #endif /* OMP_40_ENABLED */
551 
552 #else /* pre-3.0 icv's */
553  serial_team -> t.t_set_nproc[0] = serial_team->t.t_parent->
554  t.t_set_nproc[serial_team->
555  t.t_master_tid];
556  serial_team -> t.t_set_dynamic[0] = serial_team->t.t_parent->
557  t.t_set_dynamic[serial_team->
558  t.t_master_tid];
559  serial_team -> t.t_set_nested[0] = serial_team->t.t_parent->
560  t.t_set_nested[serial_team->
561  t.t_master_tid];
562  serial_team -> t.t_set_blocktime[0] = serial_team->t.t_parent->
563  t.t_set_blocktime[serial_team->
564  t.t_master_tid];
565  serial_team -> t.t_set_bt_intervals[0] = serial_team->t.t_parent->
566  t.t_set_bt_intervals[serial_team->
567  t.t_master_tid];
568  serial_team -> t.t_set_bt_set[0] = serial_team->t.t_parent->
569  t.t_set_bt_set[serial_team->
570  t.t_master_tid];
571 #endif // OMP_30_ENABLED
572  this_thr -> th.th_info.ds.ds_tid = 0;
573 
574  /* set thread cache values */
575  this_thr -> th.th_team_nproc = 1;
576  this_thr -> th.th_team_master = this_thr;
577  this_thr -> th.th_team_serialized = 1;
578 
579 #if OMP_30_ENABLED
580  serial_team -> t.t_level = serial_team -> t.t_parent -> t.t_level + 1;
581  serial_team -> t.t_active_level = serial_team -> t.t_parent -> t.t_active_level;
582 #endif // OMP_30_ENABLED
583 
584 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
585  if ( __kmp_inherit_fp_control ) {
586  __kmp_store_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
587  __kmp_store_mxcsr( &serial_team->t.t_mxcsr );
588  serial_team->t.t_mxcsr &= KMP_X86_MXCSR_MASK;
589  serial_team->t.t_fp_control_saved = TRUE;
590  } else {
591  serial_team->t.t_fp_control_saved = FALSE;
592  }
593 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
594  /* check if we need to allocate dispatch buffers stack */
595  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
596  if ( !serial_team->t.t_dispatch->th_disp_buffer ) {
597  serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *)
598  __kmp_allocate( sizeof( dispatch_private_info_t ) );
599  }
600  this_thr -> th.th_dispatch = serial_team->t.t_dispatch;
601 
602  KMP_MB();
603 
604  } else {
605  /* this serialized team is already being used,
606  * that's fine, just add another nested level */
607  KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team );
608  KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
609  KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
610  ++ serial_team -> t.t_serialized;
611  this_thr -> th.th_team_serialized = serial_team -> t.t_serialized;
612 
613 #if OMP_30_ENABLED
614  // Nested level will be an index in the nested nthreads array
615  int level = this_thr->th.th_team->t.t_level;
616  // Thread value exists in the nested nthreads array for the next nested level
617  if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) {
618  this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ];
619  }
620  serial_team -> t.t_level++;
621  KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n",
622  global_tid, serial_team, serial_team -> t.t_level ) );
623 #else
624  KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing team %p for nested serialized parallel region\n",
625  global_tid, serial_team ) );
626 #endif // OMP_30_ENABLED
627 
628  /* allocate/push dispatch buffers stack */
629  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
630  {
631  dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *)
632  __kmp_allocate( sizeof( dispatch_private_info_t ) );
633  disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
634  serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
635  }
636  this_thr -> th.th_dispatch = serial_team->t.t_dispatch;
637 
638  KMP_MB();
639  }
640 
641  if ( __kmp_env_consistency_check )
642  __kmp_push_parallel( global_tid, NULL );
643 
644 #if USE_ITT_BUILD
645  // Mark start of "parallel" region for VTune.
646  if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames ) || KMP_ITT_DEBUG )
647  {
648  __kmp_itt_region_forking( global_tid, 1 );
649  }
650 #endif /* USE_ITT_BUILD */
651 
652 }
653 
661 void
662 __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
663 {
664  kmp_internal_control_t *top;
665  kmp_info_t *this_thr;
666  kmp_team_t *serial_team;
667 
668  KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) );
669 
670  /* skip all this code for autopar serialized loops since it results in
671  unacceptable overhead */
672  if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) )
673  return;
674 
675  // Not autopar code
676  if( ! TCR_4( __kmp_init_parallel ) )
677  __kmp_parallel_initialize();
678 
679  this_thr = __kmp_threads[ global_tid ];
680  serial_team = this_thr->th.th_serial_team;
681 
682  KMP_MB();
683  KMP_DEBUG_ASSERT( serial_team );
684  KMP_ASSERT( serial_team -> t.t_serialized );
685  KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team );
686  KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team );
687  KMP_DEBUG_ASSERT( serial_team -> t.t_threads );
688  KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr );
689 
690  /* If necessary, pop the internal control stack values and replace the team values */
691  top = serial_team -> t.t_control_stack_top;
692  if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) {
693 #if OMP_30_ENABLED
694  copy_icvs(
695  &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs,
696  top );
697 #else
698  serial_team -> t.t_set_nproc[0] = top -> nproc;
699  serial_team -> t.t_set_dynamic[0] = top -> dynamic;
700  serial_team -> t.t_set_nested[0] = top -> nested;
701  serial_team -> t.t_set_blocktime[0] = top -> blocktime;
702  serial_team -> t.t_set_bt_intervals[0] = top -> bt_intervals;
703  serial_team -> t.t_set_bt_set[0] = top -> bt_set;
704 #endif // OMP_30_ENABLED
705  serial_team -> t.t_control_stack_top = top -> next;
706  __kmp_free(top);
707  }
708 
709 #if OMP_30_ENABLED
710  //if( serial_team -> t.t_serialized > 1 )
711  serial_team -> t.t_level--;
712 #endif // OMP_30_ENABLED
713 
714  /* pop dispatch buffers stack */
715  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
716  {
717  dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer;
718  serial_team->t.t_dispatch->th_disp_buffer =
719  serial_team->t.t_dispatch->th_disp_buffer->next;
720  __kmp_free( disp_buffer );
721  }
722 
723  -- serial_team -> t.t_serialized;
724  if ( serial_team -> t.t_serialized == 0 ) {
725 
726  /* return to the parallel section */
727 
728 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
729  if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) {
730  __kmp_clear_x87_fpu_status_word();
731  __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word );
732  __kmp_load_mxcsr( &serial_team->t.t_mxcsr );
733  }
734 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
735 
736  this_thr -> th.th_team = serial_team -> t.t_parent;
737  this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid;
738 
739  /* restore values cached in the thread */
740  this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */
741  this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */
742  this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized;
743 
744  /* TODO the below shouldn't need to be adjusted for serialized teams */
745  this_thr -> th.th_dispatch = & this_thr -> th.th_team ->
746  t.t_dispatch[ serial_team -> t.t_master_tid ];
747 
748 #if OMP_30_ENABLED
749  __kmp_pop_current_task_from_thread( this_thr );
750 
751  KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 );
752  this_thr -> th.th_current_task -> td_flags.executing = 1;
753 
754  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
755  //
756  // Copy the task team from the new child / old parent team
757  // to the thread. If non-NULL, copy the state flag also.
758  //
759  if ( ( this_thr -> th.th_task_team = this_thr -> th.th_team -> t.t_task_team ) != NULL ) {
760  this_thr -> th.th_task_state = this_thr -> th.th_task_team -> tt.tt_state;
761  }
762  KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n",
763  global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) );
764  }
765 #endif // OMP_30_ENABLED
766 
767  }
768  else {
769 
770 #if OMP_30_ENABLED
771  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
772  KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n",
773  global_tid, serial_team, serial_team -> t.t_serialized ) );
774  }
775 #endif // OMP_30_ENABLED
776 
777  }
778 
779  // Mark the end of the "parallel" region for Vtune
780 #if USE_ITT_BUILD
781  if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames ) || KMP_ITT_DEBUG )
782  {
783  __kmp_itt_region_joined( global_tid, 1 );
784  }
785 #endif /* USE_ITT_BUILD */
786 
787  if ( __kmp_env_consistency_check )
788  __kmp_pop_parallel( global_tid, NULL );
789 }
790 
803 void
805 {
806  KC_TRACE( 10, ("__kmpc_flush: called\n" ) );
807 
808  /* need explicit __mf() here since use volatile instead in library */
809  KMP_MB(); /* Flush all pending memory write invalidates. */
810 
811  // This is not an OMP 3.0 feature.
812  // This macro is used here just not to let the change go to 10.1.
813  // This change will go to the mainline first.
814  #if OMP_30_ENABLED
815  #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
816  #if KMP_MIC
817  // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
818  // We shouldn't need it, though, since the ABI rules require that
819  // * If the compiler generates NGO stores it also generates the fence
820  // * If users hand-code NGO stores they should insert the fence
821  // therefore no incomplete unordered stores should be visible.
822  #else
823  // C74404
824  // This is to address non-temporal store instructions (sfence needed).
825  // The clflush instruction is addressed either (mfence needed).
826  // Probably the non-temporal load monvtdqa instruction should also be addressed.
827  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
828  if ( ! __kmp_cpuinfo.initialized ) {
829  __kmp_query_cpuid( & __kmp_cpuinfo );
830  }; // if
831  if ( ! __kmp_cpuinfo.sse2 ) {
832  // CPU cannot execute SSE2 instructions.
833  } else {
834  #if defined( __GNUC__ ) && !defined( __INTEL_COMPILER )
835  __sync_synchronize();
836  #else
837  _mm_mfence();
838  #endif // __GNUC__
839  }; // if
840  #endif // KMP_MIC
841  #else
842  #error Unknown or unsupported architecture
843  #endif
844  #endif // OMP_30_ENABLED
845 
846 }
847 
848 /* -------------------------------------------------------------------------- */
849 
850 /* -------------------------------------------------------------------------- */
851 
859 void
860 __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
861 {
862  int explicit_barrier_flag;
863  KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );
864 
865  if (! TCR_4(__kmp_init_parallel))
866  __kmp_parallel_initialize();
867 
868  if ( __kmp_env_consistency_check ) {
869  if ( loc == 0 ) {
870  KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
871  }; // if
872 
873  __kmp_check_barrier( global_tid, ct_barrier, loc );
874  }
875 
876  __kmp_threads[ global_tid ]->th.th_ident = loc;
877  // TODO: explicit barrier_wait_id:
878  // this function is called when 'barrier' directive is present or
879  // implicit barrier at the end of a worksharing construct.
880  // 1) better to add a per-thread barrier counter to a thread data structure
881  // 2) set to 0 when a new team is created
882  // 4) no sync is required
883 
884  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
885 }
886 
887 /* The BARRIER for a MASTER section is always explicit */
894 kmp_int32
895 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
896 {
897  int status = 0;
898 
899  KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) );
900 
901  if( ! TCR_4( __kmp_init_parallel ) )
902  __kmp_parallel_initialize();
903 
904  if( KMP_MASTER_GTID( global_tid ))
905  status = 1;
906 
907  if ( __kmp_env_consistency_check ) {
908  if (status)
909  __kmp_push_sync( global_tid, ct_master, loc, NULL );
910  else
911  __kmp_check_sync( global_tid, ct_master, loc, NULL );
912  }
913 
914  return status;
915 }
916 
925 void
926 __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
927 {
928  KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );
929 
930  KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
931 
932  if ( __kmp_env_consistency_check ) {
933  if( global_tid < 0 )
934  KMP_WARNING( ThreadIdentInvalid );
935 
936  if( KMP_MASTER_GTID( global_tid ))
937  __kmp_pop_sync( global_tid, ct_master, loc );
938  }
939 }
940 
948 void
949 __kmpc_ordered( ident_t * loc, kmp_int32 gtid )
950 {
951  int cid = 0;
952  kmp_info_t *th;
953  KMP_DEBUG_ASSERT( __kmp_init_serial );
954 
955  KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid ));
956 
957  if (! TCR_4(__kmp_init_parallel))
958  __kmp_parallel_initialize();
959 
960 #if USE_ITT_BUILD
961  __kmp_itt_ordered_prep( gtid );
962  // TODO: ordered_wait_id
963 #endif /* USE_ITT_BUILD */
964 
965  th = __kmp_threads[ gtid ];
966 
967  if ( th -> th.th_dispatch -> th_deo_fcn != 0 )
968  (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc );
969  else
970  __kmp_parallel_deo( & gtid, & cid, loc );
971 
972 #if USE_ITT_BUILD
973  __kmp_itt_ordered_start( gtid );
974 #endif /* USE_ITT_BUILD */
975 }
976 
984 void
985 __kmpc_end_ordered( ident_t * loc, kmp_int32 gtid )
986 {
987  int cid = 0;
988  kmp_info_t *th;
989 
990  KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) );
991 
992 #if USE_ITT_BUILD
993  __kmp_itt_ordered_end( gtid );
994  // TODO: ordered_wait_id
995 #endif /* USE_ITT_BUILD */
996 
997  th = __kmp_threads[ gtid ];
998 
999  if ( th -> th.th_dispatch -> th_dxo_fcn != 0 )
1000  (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc );
1001  else
1002  __kmp_parallel_dxo( & gtid, & cid, loc );
1003 }
1004 
1005 inline void
1006 __kmp_static_yield( int arg ) { // AC: needed in macro __kmp_acquire_user_lock_with_checks
1007  __kmp_yield( arg );
1008 }
1009 
1010 static kmp_user_lock_p
1011 __kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid )
1012 {
1013  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1014 
1015  //
1016  // Because of the double-check, the following load
1017  // doesn't need to be volatile.
1018  //
1019  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1020 
1021  if ( lck == NULL ) {
1022  void * idx;
1023 
1024  // Allocate & initialize the lock.
1025  // Remember allocated locks in table in order to free them in __kmp_cleanup()
1026  lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section );
1027  __kmp_init_user_lock_with_checks( lck );
1028  __kmp_set_user_lock_location( lck, loc );
1029 #if USE_ITT_BUILD
1030  __kmp_itt_critical_creating( lck );
1031  // __kmp_itt_critical_creating() should be called *before* the first usage of underlying
1032  // lock. It is the only place where we can guarantee it. There are chances the lock will
1033  // destroyed with no usage, but it is not a problem, because this is not real event seen
1034  // by user but rather setting name for object (lock). See more details in kmp_itt.h.
1035 #endif /* USE_ITT_BUILD */
1036 
1037  //
1038  // Use a cmpxchg instruction to slam the start of the critical
1039  // section with the lock pointer. If another thread beat us
1040  // to it, deallocate the lock, and use the lock that the other
1041  // thread allocated.
1042  //
1043  int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck );
1044 
1045  if ( status == 0 ) {
1046  // Deallocate the lock and reload the value.
1047 #if USE_ITT_BUILD
1048  __kmp_itt_critical_destroyed( lck );
1049  // Let ITT know the lock is destroyed and the same memory location may be reused for
1050  // another purpose.
1051 #endif /* USE_ITT_BUILD */
1052  __kmp_destroy_user_lock_with_checks( lck );
1053  __kmp_user_lock_free( &idx, gtid, lck );
1054  lck = (kmp_user_lock_p)TCR_PTR( *lck_pp );
1055  KMP_DEBUG_ASSERT( lck != NULL );
1056  }
1057  }
1058  return lck;
1059 }
1060 
1071 void
1072 __kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
1073 
1074  kmp_user_lock_p lck;
1075 
1076  KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
1077 
1078  //TODO: add THR_OVHD_STATE
1079 
1080  KMP_CHECK_USER_LOCK_INIT();
1081 
1082  if ( ( __kmp_user_lock_kind == lk_tas )
1083  && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1084  lck = (kmp_user_lock_p)crit;
1085  }
1086 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1087  else if ( ( __kmp_user_lock_kind == lk_futex )
1088  && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1089  lck = (kmp_user_lock_p)crit;
1090  }
1091 #endif
1092  else { // ticket, queuing or drdpa
1093  lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1094  }
1095 
1096  if ( __kmp_env_consistency_check )
1097  __kmp_push_sync( global_tid, ct_critical, loc, lck );
1098 
1099  /* since the critical directive binds to all threads, not just
1100  * the current team we have to check this even if we are in a
1101  * serialized team */
1102  /* also, even if we are the uber thread, we still have to conduct the lock,
1103  * as we have to contend with sibling threads */
1104 
1105 #if USE_ITT_BUILD
1106  __kmp_itt_critical_acquiring( lck );
1107 #endif /* USE_ITT_BUILD */
1108  // Value of 'crit' should be good for using as a critical_id of the critical section directive.
1109 
1110  __kmp_acquire_user_lock_with_checks( lck, global_tid );
1111 
1112 #if USE_ITT_BUILD
1113  __kmp_itt_critical_acquired( lck );
1114 #endif /* USE_ITT_BUILD */
1115 
1116  KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
1117 } // __kmpc_critical
1118 
1128 void
1129 __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
1130 {
1131  kmp_user_lock_p lck;
1132 
1133  KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid ));
1134 
1135  if ( ( __kmp_user_lock_kind == lk_tas )
1136  && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1137  lck = (kmp_user_lock_p)crit;
1138  }
1139 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1140  else if ( ( __kmp_user_lock_kind == lk_futex )
1141  && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) {
1142  lck = (kmp_user_lock_p)crit;
1143  }
1144 #endif
1145  else { // ticket, queuing or drdpa
1146  lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit));
1147  }
1148 
1149  KMP_ASSERT(lck != NULL);
1150 
1151  if ( __kmp_env_consistency_check )
1152  __kmp_pop_sync( global_tid, ct_critical, loc );
1153 
1154 #if USE_ITT_BUILD
1155  __kmp_itt_critical_releasing( lck );
1156 #endif /* USE_ITT_BUILD */
1157  // Value of 'crit' should be good for using as a critical_id of the critical section directive.
1158 
1159  __kmp_release_user_lock_with_checks( lck, global_tid );
1160 
1161  KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
1162 }
1163 
1172 kmp_int32
1173 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
1174 {
1175  int status;
1176 
1177  KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) );
1178 
1179  if (! TCR_4(__kmp_init_parallel))
1180  __kmp_parallel_initialize();
1181 
1182  if ( __kmp_env_consistency_check )
1183  __kmp_check_barrier( global_tid, ct_barrier, loc );
1184 
1185  status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL );
1186 
1187  return (status != 0) ? 0 : 1;
1188 }
1189 
1199 void
1200 __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
1201 {
1202  KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid ));
1203 
1204  __kmp_end_split_barrier ( bs_plain_barrier, global_tid );
1205 }
1206 
1217 kmp_int32
1218 __kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid )
1219 {
1220  kmp_int32 ret;
1221 
1222  KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid ));
1223 
1224  if (! TCR_4(__kmp_init_parallel))
1225  __kmp_parallel_initialize();
1226 
1227  if ( __kmp_env_consistency_check ) {
1228  if ( loc == 0 ) {
1229  KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user?
1230  }
1231  __kmp_check_barrier( global_tid, ct_barrier, loc );
1232  }
1233 
1234  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
1235 
1236  ret = __kmpc_master (loc, global_tid);
1237 
1238  if ( __kmp_env_consistency_check ) {
1239  /* there's no __kmpc_end_master called; so the (stats) */
1240  /* actions of __kmpc_end_master are done here */
1241 
1242  if ( global_tid < 0 ) {
1243  KMP_WARNING( ThreadIdentInvalid );
1244  }
1245  if (ret) {
1246  /* only one thread should do the pop since only */
1247  /* one did the push (see __kmpc_master()) */
1248 
1249  __kmp_pop_sync( global_tid, ct_master, loc );
1250  }
1251  }
1252 
1253  return (ret);
1254 }
1255 
1256 /* The BARRIER for a SINGLE process section is always explicit */
1268 kmp_int32
1269 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
1270 {
1271  kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE );
1272  return rc;
1273 }
1274 
1284 void
1285 __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
1286 {
1287  __kmp_exit_single( global_tid );
1288 }
1289 
1297 void
1298 __kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid )
1299 {
1300  KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1301 
1302  if ( __kmp_env_consistency_check )
1303  __kmp_pop_workshare( global_tid, ct_pdo, loc );
1304 }
1305 
1306 /*
1307  * User routines which take C-style arguments (call by value)
1308  * different from the Fortran equivalent routines
1309  */
1310 
1311 void
1312 ompc_set_num_threads( int arg )
1313 {
1314 // !!!!! TODO: check the per-task binding
1315  __kmp_set_num_threads( arg, __kmp_entry_gtid() );
1316 }
1317 
1318 void
1319 ompc_set_dynamic( int flag )
1320 {
1321  kmp_info_t *thread;
1322 
1323  /* For the thread-private implementation of the internal controls */
1324  thread = __kmp_entry_thread();
1325 
1326  __kmp_save_internal_controls( thread );
1327 
1328  set__dynamic( thread, flag ? TRUE : FALSE );
1329 }
1330 
1331 void
1332 ompc_set_nested( int flag )
1333 {
1334  kmp_info_t *thread;
1335 
1336  /* For the thread-private internal controls implementation */
1337  thread = __kmp_entry_thread();
1338 
1339  __kmp_save_internal_controls( thread );
1340 
1341  set__nested( thread, flag ? TRUE : FALSE );
1342 }
1343 
1344 #if OMP_30_ENABLED
1345 
1346 void
1347 ompc_set_max_active_levels( int max_active_levels )
1348 {
1349  /* TO DO */
1350  /* we want per-task implementation of this internal control */
1351 
1352  /* For the per-thread internal controls implementation */
1353  __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels );
1354 }
1355 
1356 void
1357 ompc_set_schedule( omp_sched_t kind, int modifier )
1358 {
1359 // !!!!! TODO: check the per-task binding
1360  __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier );
1361 }
1362 
1363 int
1364 ompc_get_ancestor_thread_num( int level )
1365 {
1366  return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level );
1367 }
1368 
1369 int
1370 ompc_get_team_size( int level )
1371 {
1372  return __kmp_get_team_size( __kmp_entry_gtid(), level );
1373 }
1374 
1375 #endif // OMP_30_ENABLED
1376 
1377 void
1378 kmpc_set_stacksize( int arg )
1379 {
1380  // __kmp_aux_set_stacksize initializes the library if needed
1381  __kmp_aux_set_stacksize( arg );
1382 }
1383 
1384 void
1385 kmpc_set_stacksize_s( size_t arg )
1386 {
1387  // __kmp_aux_set_stacksize initializes the library if needed
1388  __kmp_aux_set_stacksize( arg );
1389 }
1390 
1391 void
1392 kmpc_set_blocktime( int arg )
1393 {
1394  int gtid, tid;
1395  kmp_info_t *thread;
1396 
1397  gtid = __kmp_entry_gtid();
1398  tid = __kmp_tid_from_gtid(gtid);
1399  thread = __kmp_thread_from_gtid(gtid);
1400 
1401  __kmp_aux_set_blocktime( arg, thread, tid );
1402 }
1403 
1404 void
1405 kmpc_set_library( int arg )
1406 {
1407  // __kmp_user_set_library initializes the library if needed
1408  __kmp_user_set_library( (enum library_type)arg );
1409 }
1410 
1411 void
1412 kmpc_set_defaults( char const * str )
1413 {
1414  // __kmp_aux_set_defaults initializes the library if needed
1415  __kmp_aux_set_defaults( str, strlen( str ) );
1416 }
1417 
1418 #ifdef OMP_30_ENABLED
1419 
1420 int
1421 kmpc_set_affinity_mask_proc( int proc, void **mask )
1422 {
1423 #if defined(KMP_STUB) || !(KMP_OS_WINDOWS || KMP_OS_LINUX)
1424  return -1;
1425 #else
1426  if ( ! TCR_4(__kmp_init_middle) ) {
1427  __kmp_middle_initialize();
1428  }
1429  return __kmp_aux_set_affinity_mask_proc( proc, mask );
1430 #endif
1431 }
1432 
1433 int
1434 kmpc_unset_affinity_mask_proc( int proc, void **mask )
1435 {
1436 #if defined(KMP_STUB) || !(KMP_OS_WINDOWS || KMP_OS_LINUX)
1437  return -1;
1438 #else
1439  if ( ! TCR_4(__kmp_init_middle) ) {
1440  __kmp_middle_initialize();
1441  }
1442  return __kmp_aux_unset_affinity_mask_proc( proc, mask );
1443 #endif
1444 }
1445 
1446 int
1447 kmpc_get_affinity_mask_proc( int proc, void **mask )
1448 {
1449 #if defined(KMP_STUB) || !(KMP_OS_WINDOWS || KMP_OS_LINUX)
1450  return -1;
1451 #else
1452  if ( ! TCR_4(__kmp_init_middle) ) {
1453  __kmp_middle_initialize();
1454  }
1455  return __kmp_aux_get_affinity_mask_proc( proc, mask );
1456 #endif
1457 }
1458 
1459 #endif /* OMP_30_ENABLED */
1460 
1461 /* -------------------------------------------------------------------------- */
1502 void
1503 __kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit )
1504 {
1505  void **data_ptr;
1506 
1507  KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid ));
1508 
1509  KMP_MB();
1510 
1511  data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data;
1512 
1513  if ( __kmp_env_consistency_check ) {
1514  if ( loc == 0 ) {
1515  KMP_WARNING( ConstructIdentInvalid );
1516  }
1517  }
1518 
1519  /* ToDo: Optimize the following two barriers into some kind of split barrier */
1520 
1521  if (didit) *data_ptr = cpy_data;
1522 
1523  /* This barrier is not a barrier region boundary */
1524  __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1525 
1526  if (! didit) (*cpy_func)( cpy_data, *data_ptr );
1527 
1528  /* Consider next barrier the user-visible barrier for barrier region boundaries */
1529  /* Nesting checks are already handled by the single construct checks */
1530 
1531  __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL );
1532 }
1533 
1534 /* -------------------------------------------------------------------------- */
1535 
1536 #define INIT_LOCK __kmp_init_user_lock_with_checks
1537 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
1538 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
1539 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
1540 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
1541 #define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed
1542 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
1543 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
1544 #define TEST_LOCK __kmp_test_user_lock_with_checks
1545 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
1546 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
1547 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
1548 
1549 
1550 /*
1551  * TODO: Make check abort messages use location info & pass it
1552  * into with_checks routines
1553  */
1554 
1555 /* initialize the lock */
1556 void
1557 __kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1558  static char const * const func = "omp_init_lock";
1559  kmp_user_lock_p lck;
1560  KMP_DEBUG_ASSERT( __kmp_init_serial );
1561 
1562  if ( __kmp_env_consistency_check ) {
1563  if ( user_lock == NULL ) {
1564  KMP_FATAL( LockIsUninitialized, func );
1565  }
1566  }
1567 
1568  KMP_CHECK_USER_LOCK_INIT();
1569 
1570  if ( ( __kmp_user_lock_kind == lk_tas )
1571  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1572  lck = (kmp_user_lock_p)user_lock;
1573  }
1574 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1575  else if ( ( __kmp_user_lock_kind == lk_futex )
1576  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1577  lck = (kmp_user_lock_p)user_lock;
1578  }
1579 #endif
1580  else {
1581  lck = __kmp_user_lock_allocate( user_lock, gtid );
1582  }
1583  INIT_LOCK( lck );
1584  __kmp_set_user_lock_location( lck, loc );
1585 
1586 #if USE_ITT_BUILD
1587  __kmp_itt_lock_creating( lck );
1588 #endif /* USE_ITT_BUILD */
1589 } // __kmpc_init_lock
1590 
1591 /* initialize the lock */
1592 void
1593 __kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1594  static char const * const func = "omp_init_nest_lock";
1595  kmp_user_lock_p lck;
1596  KMP_DEBUG_ASSERT( __kmp_init_serial );
1597 
1598  if ( __kmp_env_consistency_check ) {
1599  if ( user_lock == NULL ) {
1600  KMP_FATAL( LockIsUninitialized, func );
1601  }
1602  }
1603 
1604  KMP_CHECK_USER_LOCK_INIT();
1605 
1606  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1607  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1608  lck = (kmp_user_lock_p)user_lock;
1609  }
1610 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1611  else if ( ( __kmp_user_lock_kind == lk_futex )
1612  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1613  <= OMP_NEST_LOCK_T_SIZE ) ) {
1614  lck = (kmp_user_lock_p)user_lock;
1615  }
1616 #endif
1617  else {
1618  lck = __kmp_user_lock_allocate( user_lock, gtid );
1619  }
1620 
1621  INIT_NESTED_LOCK( lck );
1622  __kmp_set_user_lock_location( lck, loc );
1623 
1624 #if USE_ITT_BUILD
1625  __kmp_itt_lock_creating( lck );
1626 #endif /* USE_ITT_BUILD */
1627 } // __kmpc_init_nest_lock
1628 
1629 void
1630 __kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1631 
1632  kmp_user_lock_p lck;
1633 
1634  if ( ( __kmp_user_lock_kind == lk_tas )
1635  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1636  lck = (kmp_user_lock_p)user_lock;
1637  }
1638 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1639  else if ( ( __kmp_user_lock_kind == lk_futex )
1640  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1641  lck = (kmp_user_lock_p)user_lock;
1642  }
1643 #endif
1644  else {
1645  lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" );
1646  }
1647 
1648 #if USE_ITT_BUILD
1649  __kmp_itt_lock_destroyed( lck );
1650 #endif /* USE_ITT_BUILD */
1651  DESTROY_LOCK( lck );
1652 
1653  if ( ( __kmp_user_lock_kind == lk_tas )
1654  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1655  ;
1656  }
1657 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1658  else if ( ( __kmp_user_lock_kind == lk_futex )
1659  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1660  ;
1661  }
1662 #endif
1663  else {
1664  __kmp_user_lock_free( user_lock, gtid, lck );
1665  }
1666 } // __kmpc_destroy_lock
1667 
1668 /* destroy the lock */
1669 void
1670 __kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1671 
1672  kmp_user_lock_p lck;
1673 
1674  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1675  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1676  lck = (kmp_user_lock_p)user_lock;
1677  }
1678 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1679  else if ( ( __kmp_user_lock_kind == lk_futex )
1680  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1681  <= OMP_NEST_LOCK_T_SIZE ) ) {
1682  lck = (kmp_user_lock_p)user_lock;
1683  }
1684 #endif
1685  else {
1686  lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" );
1687  }
1688 
1689 #if USE_ITT_BUILD
1690  __kmp_itt_lock_destroyed( lck );
1691 #endif /* USE_ITT_BUILD */
1692 
1693  DESTROY_NESTED_LOCK( lck );
1694 
1695  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1696  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1697  ;
1698  }
1699 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1700  else if ( ( __kmp_user_lock_kind == lk_futex )
1701  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1702  <= OMP_NEST_LOCK_T_SIZE ) ) {
1703  ;
1704  }
1705 #endif
1706  else {
1707  __kmp_user_lock_free( user_lock, gtid, lck );
1708  }
1709 } // __kmpc_destroy_nest_lock
1710 
1711 void
1712 __kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1713  kmp_user_lock_p lck;
1714 
1715  if ( ( __kmp_user_lock_kind == lk_tas )
1716  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1717  lck = (kmp_user_lock_p)user_lock;
1718  }
1719 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1720  else if ( ( __kmp_user_lock_kind == lk_futex )
1721  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1722  lck = (kmp_user_lock_p)user_lock;
1723  }
1724 #endif
1725  else {
1726  lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" );
1727  }
1728 
1729 #if USE_ITT_BUILD
1730  __kmp_itt_lock_acquiring( lck );
1731 #endif /* USE_ITT_BUILD */
1732 
1733  ACQUIRE_LOCK( lck, gtid );
1734 
1735 #if USE_ITT_BUILD
1736  __kmp_itt_lock_acquired( lck );
1737 #endif /* USE_ITT_BUILD */
1738 }
1739 
1740 
1741 void
1742 __kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) {
1743  kmp_user_lock_p lck;
1744 
1745  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1746  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1747  lck = (kmp_user_lock_p)user_lock;
1748  }
1749 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1750  else if ( ( __kmp_user_lock_kind == lk_futex )
1751  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1752  <= OMP_NEST_LOCK_T_SIZE ) ) {
1753  lck = (kmp_user_lock_p)user_lock;
1754  }
1755 #endif
1756  else {
1757  lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" );
1758  }
1759 
1760 #if USE_ITT_BUILD
1761  __kmp_itt_lock_acquiring( lck );
1762 #endif /* USE_ITT_BUILD */
1763 
1764  ACQUIRE_NESTED_LOCK( lck, gtid );
1765 
1766 #if USE_ITT_BUILD
1767  __kmp_itt_lock_acquired( lck );
1768 #endif /* USE_ITT_BUILD */
1769 }
1770 
1771 void
1772 __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
1773 {
1774  kmp_user_lock_p lck;
1775 
1776  /* Can't use serial interval since not block structured */
1777  /* release the lock */
1778 
1779  if ( ( __kmp_user_lock_kind == lk_tas )
1780  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1781 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1782  // "fast" path implemented to fix customer performance issue
1783 #if USE_ITT_BUILD
1784  __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
1785 #endif /* USE_ITT_BUILD */
1786  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
1787  KMP_MB();
1788  return;
1789 #else
1790  lck = (kmp_user_lock_p)user_lock;
1791 #endif
1792  }
1793 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1794  else if ( ( __kmp_user_lock_kind == lk_futex )
1795  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1796  lck = (kmp_user_lock_p)user_lock;
1797  }
1798 #endif
1799  else {
1800  lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" );
1801  }
1802 
1803 #if USE_ITT_BUILD
1804  __kmp_itt_lock_releasing( lck );
1805 #endif /* USE_ITT_BUILD */
1806 
1807  RELEASE_LOCK( lck, gtid );
1808 }
1809 
1810 /* release the lock */
1811 void
1812 __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
1813 {
1814  kmp_user_lock_p lck;
1815 
1816  /* Can't use serial interval since not block structured */
1817 
1818  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1819  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1820 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1821  // "fast" path implemented to fix customer performance issue
1822  kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock;
1823 #if USE_ITT_BUILD
1824  __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock );
1825 #endif /* USE_ITT_BUILD */
1826  if ( --(tl->lk.depth_locked) == 0 ) {
1827  TCW_4(tl->lk.poll, 0);
1828  }
1829  KMP_MB();
1830  return;
1831 #else
1832  lck = (kmp_user_lock_p)user_lock;
1833 #endif
1834  }
1835 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1836  else if ( ( __kmp_user_lock_kind == lk_futex )
1837  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1838  <= OMP_NEST_LOCK_T_SIZE ) ) {
1839  lck = (kmp_user_lock_p)user_lock;
1840  }
1841 #endif
1842  else {
1843  lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" );
1844  }
1845 
1846 #if USE_ITT_BUILD
1847  __kmp_itt_lock_releasing( lck );
1848 #endif /* USE_ITT_BUILD */
1849 
1850  RELEASE_NESTED_LOCK( lck, gtid );
1851 }
1852 
1853 /* try to acquire the lock */
1854 int
1855 __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
1856 {
1857  kmp_user_lock_p lck;
1858  int rc;
1859 
1860  if ( ( __kmp_user_lock_kind == lk_tas )
1861  && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1862  lck = (kmp_user_lock_p)user_lock;
1863  }
1864 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1865  else if ( ( __kmp_user_lock_kind == lk_futex )
1866  && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) {
1867  lck = (kmp_user_lock_p)user_lock;
1868  }
1869 #endif
1870  else {
1871  lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" );
1872  }
1873 
1874 #if USE_ITT_BUILD
1875  __kmp_itt_lock_acquiring( lck );
1876 #endif /* USE_ITT_BUILD */
1877 
1878  rc = TEST_LOCK( lck, gtid );
1879 #if USE_ITT_BUILD
1880  if ( rc ) {
1881  __kmp_itt_lock_acquired( lck );
1882  } else {
1883  __kmp_itt_lock_cancelled( lck );
1884  }
1885 #endif /* USE_ITT_BUILD */
1886  return ( rc ? FTN_TRUE : FTN_FALSE );
1887 
1888  /* Can't use serial interval since not block structured */
1889 }
1890 
1891 /* try to acquire the lock */
1892 int
1893 __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock )
1894 {
1895  kmp_user_lock_p lck;
1896  int rc;
1897 
1898  if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll )
1899  + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) {
1900  lck = (kmp_user_lock_p)user_lock;
1901  }
1902 #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1903  else if ( ( __kmp_user_lock_kind == lk_futex )
1904  && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked )
1905  <= OMP_NEST_LOCK_T_SIZE ) ) {
1906  lck = (kmp_user_lock_p)user_lock;
1907  }
1908 #endif
1909  else {
1910  lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" );
1911  }
1912 
1913 #if USE_ITT_BUILD
1914  __kmp_itt_lock_acquiring( lck );
1915 #endif /* USE_ITT_BUILD */
1916 
1917  rc = TEST_NESTED_LOCK( lck, gtid );
1918 #if USE_ITT_BUILD
1919  if ( rc ) {
1920  __kmp_itt_lock_acquired( lck );
1921  } else {
1922  __kmp_itt_lock_cancelled( lck );
1923  }
1924 #endif /* USE_ITT_BUILD */
1925  return rc;
1926 
1927  /* Can't use serial interval since not block structured */
1928 }
1929 
1930 
1931 /*--------------------------------------------------------------------------------------------------------------------*/
1932 
1933 /*
1934  * Interface to fast scalable reduce methods routines
1935  */
1936 
1937 // keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions;
1938 // another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then)
1939 // AT: which solution is better?
1940 #define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \
1941  ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) )
1942 
1943 #define __KMP_GET_REDUCTION_METHOD(gtid) \
1944  ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method )
1945 
1946 // description of the packed_reduction_method variable: look at the macros in kmp.h
1947 
1948 
1949 // used in a critical section reduce block
1950 static __forceinline void
1951 __kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
1952 
1953  // this lock was visible to a customer and to the thread profiler as a serial overhead span
1954  // (although it's used for an internal purpose only)
1955  // why was it visible in previous implementation?
1956  // should we keep it visible in new reduce block?
1957  kmp_user_lock_p lck;
1958 
1959  // We know that the fast reduction code is only emitted by Intel compilers
1960  // with 32 byte critical sections. If there isn't enough space, then we
1961  // have to use a pointer.
1962  if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) {
1963  lck = (kmp_user_lock_p)crit;
1964  }
1965  else {
1966  lck = __kmp_get_critical_section_ptr( crit, loc, global_tid );
1967  }
1968  KMP_DEBUG_ASSERT( lck != NULL );
1969 
1970  if ( __kmp_env_consistency_check )
1971  __kmp_push_sync( global_tid, ct_critical, loc, lck );
1972 
1973  __kmp_acquire_user_lock_with_checks( lck, global_tid );
1974 }
1975 
1976 // used in a critical section reduce block
1977 static __forceinline void
1978 __kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) {
1979 
1980  kmp_user_lock_p lck;
1981 
1982  // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical
1983  // sections. If there isn't enough space, then we have to use a pointer.
1984  if ( __kmp_base_user_lock_size > 32 ) {
1985  lck = *( (kmp_user_lock_p *) crit );
1986  KMP_ASSERT( lck != NULL );
1987  } else {
1988  lck = (kmp_user_lock_p) crit;
1989  }
1990 
1991  if ( __kmp_env_consistency_check )
1992  __kmp_pop_sync( global_tid, ct_critical, loc );
1993 
1994  __kmp_release_user_lock_with_checks( lck, global_tid );
1995 
1996 } // __kmp_end_critical_section_reduce_block
1997 
1998 
1999 /* 2.a.i. Reduce Block without a terminating barrier */
2013 kmp_int32
2015  ident_t *loc, kmp_int32 global_tid,
2016  kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),
2017  kmp_critical_name *lck ) {
2018 
2019  int retval;
2020  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2021 
2022  KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) );
2023 
2024  // why do we need this initialization here at all?
2025  // Reduction clause can not be used as a stand-alone directive.
2026 
2027  // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2028  // possible detection of false-positive race by the threadchecker ???
2029  if( ! TCR_4( __kmp_init_parallel ) )
2030  __kmp_parallel_initialize();
2031 
2032  // check correctness of reduce block nesting
2033  if ( __kmp_env_consistency_check )
2034  __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
2035 
2036  // it's better to check an assertion ASSERT( thr_state == THR_WORK_STATE )
2037 
2038  // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable
2039  // the variable should be either a construct-specific or thread-specific property, not a team specific property
2040  // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct)
2041  // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?)
2042  // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed)
2043  // a thread-specific variable is better regarding two issues above (next construct and extra syncs)
2044  // a thread-specific "th_local.reduction_method" variable is used currently
2045  // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs)
2046 
2047  packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2048  __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2049 
2050  if( packed_reduction_method == critical_reduce_block ) {
2051 
2052  __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2053  retval = 1;
2054 
2055  } else if( packed_reduction_method == empty_reduce_block ) {
2056 
2057  // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2058  retval = 1;
2059 
2060  } else if( packed_reduction_method == atomic_reduce_block ) {
2061 
2062  retval = 2;
2063 
2064  // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen)
2065  // (it's not quite good, because the checking block has been closed by this 'pop',
2066  // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction)
2067  if ( __kmp_env_consistency_check )
2068  __kmp_pop_sync( global_tid, ct_reduce, loc );
2069 
2070  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2071 
2072  //AT: performance issue: a real barrier here
2073  //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them)
2074  //AT: (it's not what a customer might expect specifying NOWAIT clause)
2075  //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer)
2076  //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster
2077  // and be more in line with sense of NOWAIT
2078  //AT: TO DO: do epcc test and compare times
2079 
2080  // this barrier should be invisible to a customer and to the thread profiler
2081  // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose)
2082  retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func );
2083  retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2084 
2085  // all other workers except master should do this pop here
2086  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
2087  if ( __kmp_env_consistency_check ) {
2088  if( retval == 0 ) {
2089  __kmp_pop_sync( global_tid, ct_reduce, loc );
2090  }
2091  }
2092 
2093  } else {
2094 
2095  // should never reach this block
2096  KMP_ASSERT( 0 ); // "unexpected method"
2097 
2098  }
2099 
2100  KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2101 
2102  return retval;
2103 }
2104 
2113 void
2114 __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2115 
2116  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2117 
2118  KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) );
2119 
2120  packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2121 
2122  if( packed_reduction_method == critical_reduce_block ) {
2123 
2124  __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2125 
2126  } else if( packed_reduction_method == empty_reduce_block ) {
2127 
2128  // usage: if team size == 1, no synchronization is required ( on Intel platforms only )
2129 
2130  } else if( packed_reduction_method == atomic_reduce_block ) {
2131 
2132  // neither master nor other workers should get here
2133  // (code gen does not generate this call in case 2: atomic reduce block)
2134  // actually it's better to remove this elseif at all;
2135  // after removal this value will checked by the 'else' and will assert
2136 
2137  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2138 
2139  // only master gets here
2140 
2141  } else {
2142 
2143  // should never reach this block
2144  KMP_ASSERT( 0 ); // "unexpected method"
2145 
2146  }
2147 
2148  if ( __kmp_env_consistency_check )
2149  __kmp_pop_sync( global_tid, ct_reduce, loc );
2150 
2151  KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2152 
2153  return;
2154 }
2155 
2156 /* 2.a.ii. Reduce Block with a terminating barrier */
2157 
2171 kmp_int32
2173  ident_t *loc, kmp_int32 global_tid,
2174  kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
2175  void (*reduce_func)(void *lhs_data, void *rhs_data),
2176  kmp_critical_name *lck )
2177 {
2178  int retval;
2179  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2180 
2181  KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) );
2182 
2183  // why do we need this initialization here at all?
2184  // Reduction clause can not be a stand-alone directive.
2185 
2186  // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed
2187  // possible detection of false-positive race by the threadchecker ???
2188  if( ! TCR_4( __kmp_init_parallel ) )
2189  __kmp_parallel_initialize();
2190 
2191  // check correctness of reduce block nesting
2192  if ( __kmp_env_consistency_check )
2193  __kmp_push_sync( global_tid, ct_reduce, loc, NULL );
2194 
2195  // it's better to check an assertion ASSERT( thr_state == THR_WORK_STATE )
2196 
2197  packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck );
2198  __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method );
2199 
2200  if( packed_reduction_method == critical_reduce_block ) {
2201 
2202  __kmp_enter_critical_section_reduce_block( loc, global_tid, lck );
2203  retval = 1;
2204 
2205  } else if( packed_reduction_method == empty_reduce_block ) {
2206 
2207  // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2208  retval = 1;
2209 
2210  } else if( packed_reduction_method == atomic_reduce_block ) {
2211 
2212  retval = 2;
2213 
2214  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2215 
2216  //case tree_reduce_block:
2217  // this barrier should be visible to a customer and to the thread profiler
2218  // (it's a terminating barrier on constructs if NOWAIT not specified)
2219  retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func );
2220  retval = ( retval != 0 ) ? ( 0 ) : ( 1 );
2221 
2222  // all other workers except master should do this pop here
2223  // ( none of other workers except master will enter __kmpc_end_reduce() )
2224  if ( __kmp_env_consistency_check ) {
2225  if( retval == 0 ) { // 0: all other workers; 1: master
2226  __kmp_pop_sync( global_tid, ct_reduce, loc );
2227  }
2228  }
2229 
2230  } else {
2231 
2232  // should never reach this block
2233  KMP_ASSERT( 0 ); // "unexpected method"
2234 
2235  }
2236 
2237  KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) );
2238 
2239  return retval;
2240 }
2241 
2251 void
2252 __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) {
2253 
2254  PACKED_REDUCTION_METHOD_T packed_reduction_method;
2255 
2256  KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) );
2257 
2258  packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid );
2259 
2260  // this barrier should be visible to a customer and to the thread profiler
2261  // (it's a terminating barrier on constructs if NOWAIT not specified)
2262 
2263  if( packed_reduction_method == critical_reduce_block ) {
2264 
2265  __kmp_end_critical_section_reduce_block( loc, global_tid, lck );
2266 
2267  // TODO: implicit barrier: should be exposed
2268  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2269 
2270  } else if( packed_reduction_method == empty_reduce_block ) {
2271 
2272  // usage: if team size == 1, no synchronization is required ( Intel platforms only )
2273 
2274  // TODO: implicit barrier: should be exposed
2275  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2276 
2277  } else if( packed_reduction_method == atomic_reduce_block ) {
2278 
2279  // TODO: implicit barrier: should be exposed
2280  __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL );
2281 
2282  } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) {
2283 
2284  // only master executes here (master releases all other workers)
2285  __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid );
2286 
2287  } else {
2288 
2289  // should never reach this block
2290  KMP_ASSERT( 0 ); // "unexpected method"
2291 
2292  }
2293 
2294  if ( __kmp_env_consistency_check )
2295  __kmp_pop_sync( global_tid, ct_reduce, loc );
2296 
2297  KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) );
2298 
2299  return;
2300 }
2301 
2302 #undef __KMP_GET_REDUCTION_METHOD
2303 #undef __KMP_SET_REDUCTION_METHOD
2304 
2305 /*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/
2306 
2307 kmp_uint64
2308 __kmpc_get_taskid() {
2309 
2310  #if OMP_30_ENABLED
2311 
2312  kmp_int32 gtid;
2313  kmp_info_t * thread;
2314 
2315  gtid = __kmp_get_gtid();
2316  if ( gtid < 0 ) {
2317  return 0;
2318  }; // if
2319  thread = __kmp_thread_from_gtid( gtid );
2320  return thread->th.th_current_task->td_task_id;
2321 
2322  #else
2323 
2324  return 0;
2325 
2326  #endif
2327 
2328 } // __kmpc_get_taskid
2329 
2330 
2331 kmp_uint64
2332 __kmpc_get_parent_taskid() {
2333 
2334  #if OMP_30_ENABLED
2335 
2336  kmp_int32 gtid;
2337  kmp_info_t * thread;
2338  kmp_taskdata_t * parent_task;
2339 
2340  gtid = __kmp_get_gtid();
2341  if ( gtid < 0 ) {
2342  return 0;
2343  }; // if
2344  thread = __kmp_thread_from_gtid( gtid );
2345  parent_task = thread->th.th_current_task->td_parent;
2346  return ( parent_task == NULL ? 0 : parent_task->td_task_id );
2347 
2348  #else
2349 
2350  return 0;
2351 
2352  #endif
2353 
2354 } // __kmpc_get_parent_taskid
2355 
2356 void __kmpc_place_threads(int nC, int nT, int nO)
2357 {
2358 #if KMP_MIC
2359  if ( ! __kmp_init_serial ) {
2360  __kmp_serial_initialize();
2361  }
2362  __kmp_place_num_cores = nC;
2363  __kmp_place_num_threads_per_core = nT;
2364  __kmp_place_core_offset = nO;
2365 #endif
2366 }
2367 
2368 // end of file //
2369