38 #include "kmp_atomic.h"
39 #include "kmp_wrapper_getpid.h"
40 #include "kmp_environment.h"
43 #include "kmp_settings.h"
46 #include "kmp_error.h"
49 #define KMP_USE_PRCTL 0
50 #define KMP_USE_POOLED_ALLOC 0
57 #if defined(KMP_GOMP_COMPAT)
58 char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX
"alternative compiler support: yes";
61 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: "
70 char const __kmp_version_lock[] = KMP_VERSION_PREFIX
"lock type: run time selectable";
72 char const __kmp_version_perf_v19[] = KMP_VERSION_PREFIX
"perf v19: "
73 #if KMP_PERF_V19 == KMP_ON
75 #elif KMP_PERF_V19 == KMP_OFF
78 #error "Must specify KMP_PERF_V19 option"
81 char const __kmp_version_perf_v106[] = KMP_VERSION_PREFIX
"perf v106: "
82 #if KMP_PERF_V106 == KMP_ON
84 #elif KMP_PERF_V106 == KMP_OFF
87 #error "Must specify KMP_PERF_V106 option"
97 kmp_info_t __kmp_monitor;
104 void __kmp_cleanup(
void );
106 static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *,
int tid,
int gtid );
107 static void __kmp_initialize_team(
111 kmp_internal_control_t * new_icvs,
114 int new_set_nproc,
int new_set_dynamic,
int new_set_nested,
115 int new_set_blocktime,
int new_bt_intervals,
int new_bt_set
118 static void __kmp_partition_places( kmp_team_t *team );
119 static void __kmp_do_serial_initialize(
void );
122 #ifdef USE_LOAD_BALANCE
123 static int __kmp_load_balance_nproc( kmp_root_t * root,
int set_nproc );
126 static int __kmp_expand_threads(
int nWish,
int nNeed);
127 static int __kmp_unregister_root_other_thread(
int gtid );
128 static void __kmp_unregister_library(
void );
129 static void __kmp_reap_thread( kmp_info_t * thread,
int is_root );
130 static kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
141 __kmp_get_global_thread_id( )
144 kmp_info_t **other_threads;
150 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
151 __kmp_nth, __kmp_all_nth ));
158 if ( !TCR_4(__kmp_init_gtid) )
return KMP_GTID_DNE;
160 #ifdef KMP_TDATA_GTID
161 if ( TCR_4(__kmp_gtid_mode) >= 3) {
162 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using TDATA\n" ));
166 if ( TCR_4(__kmp_gtid_mode) >= 2) {
167 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n" ));
168 return __kmp_gtid_get_specific();
170 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n" ));
172 stack_addr = (
char*) & stack_data;
173 other_threads = __kmp_threads;
188 for( i = 0 ; i < __kmp_threads_capacity ; i++ ) {
190 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
193 stack_size = (size_t)TCR_PTR(thr -> th.th_info.ds.ds_stacksize);
194 stack_base = (
char *)TCR_PTR(thr -> th.th_info.ds.ds_stackbase);
198 if( stack_addr <= stack_base ) {
199 size_t stack_diff = stack_base - stack_addr;
201 if( stack_diff <= stack_size ) {
204 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i );
211 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
212 "thread, using TLS\n" ));
213 i = __kmp_gtid_get_specific();
221 if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) {
222 KMP_FATAL( StackOverflow, i );
225 stack_base = (
char *) other_threads[i] -> th.th_info.ds.ds_stackbase;
226 if( stack_addr > stack_base ) {
227 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
228 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
229 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base);
231 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr);
235 if ( __kmp_storage_map ) {
236 char *stack_end = (
char *) other_threads[i] -> th.th_info.ds.ds_stackbase;
237 char *stack_beg = stack_end - other_threads[i] -> th.th_info.ds.ds_stacksize;
238 __kmp_print_storage_map_gtid( i, stack_beg, stack_end,
239 other_threads[i] -> th.th_info.ds.ds_stacksize,
240 "th_%d stack (refinement)", i );
246 __kmp_get_global_thread_id_reg( )
250 if ( !__kmp_init_serial ) {
253 #ifdef KMP_TDATA_GTID
254 if ( TCR_4(__kmp_gtid_mode) >= 3 ) {
255 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n" ));
259 if ( TCR_4(__kmp_gtid_mode) >= 2 ) {
260 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n" ));
261 gtid = __kmp_gtid_get_specific();
263 KA_TRACE( 1000, (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n" ));
264 gtid = __kmp_get_global_thread_id();
268 if( gtid == KMP_GTID_DNE ) {
269 KA_TRACE( 10, (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
270 "Registering a new gtid.\n" ));
271 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
272 if( !__kmp_init_serial ) {
273 __kmp_do_serial_initialize();
274 gtid = __kmp_gtid_get_specific();
276 gtid = __kmp_register_root(FALSE);
278 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
282 KMP_DEBUG_ASSERT( gtid >=0 );
289 __kmp_check_stack_overlap( kmp_info_t *th )
292 char *stack_beg = NULL;
293 char *stack_end = NULL;
296 KA_TRACE(10,(
"__kmp_check_stack_overlap: called\n"));
297 if ( __kmp_storage_map ) {
298 stack_end = (
char *) th -> th.th_info.ds.ds_stackbase;
299 stack_beg = stack_end - th -> th.th_info.ds.ds_stacksize;
301 gtid = __kmp_gtid_from_thread( th );
303 if (gtid == KMP_GTID_MONITOR) {
304 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
305 "th_%s stack (%s)",
"mon",
306 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
308 __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
309 "th_%d stack (%s)", gtid,
310 ( th->th.th_info.ds.ds_stackgrow ) ?
"initial" :
"actual" );
315 if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid = __kmp_gtid_from_thread( th )))
317 KA_TRACE(10,(
"__kmp_check_stack_overlap: performing extensive checking\n"));
318 if ( stack_beg == NULL ) {
319 stack_end = (
char *) th -> th.th_info.ds.ds_stackbase;
320 stack_beg = stack_end - th -> th.th_info.ds.ds_stacksize;
323 for( f=0 ; f < __kmp_threads_capacity ; f++ ) {
324 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
326 if( f_th && f_th != th ) {
327 char *other_stack_end = (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
328 char *other_stack_beg = other_stack_end -
329 (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
330 if((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
331 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
334 if ( __kmp_storage_map )
335 __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end,
336 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
337 "th_%d stack (overlapped)",
338 __kmp_gtid_from_thread( f_th ) );
340 __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null );
345 KA_TRACE(10,(
"__kmp_check_stack_overlap: returning\n"));
352 # define __kmp_static_delay( arg )
356 __kmp_static_delay(
int arg )
359 # if KMP_ARCH_X86_64 && KMP_OS_LINUX
360 KMP_ASSERT( arg != 0 );
362 KMP_ASSERT( arg >= 0 );
368 __kmp_static_yield(
int arg )
382 __kmp_wait_sleep( kmp_info_t *this_thr,
383 volatile kmp_uint *spinner,
386 USE_ITT_BUILD_ARG (
void * itt_sync_obj)
390 register volatile kmp_uint *spin = spinner;
391 register kmp_uint check = checker;
392 register kmp_uint32 spins;
393 register int hibernate;
399 KMP_FSYNC_SPIN_INIT( spin, NULL );
400 if( TCR_4(*spin) == check ) {
401 KMP_FSYNC_SPIN_ACQUIRED( spin );
405 th_gtid = this_thr->th.th_info.ds.ds_gtid;
407 KA_TRACE( 20, (
"__kmp_wait_sleep: T#%d waiting for spin(%p) == %d\n",
412 KMP_INIT_YIELD( spins );
414 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
419 #ifdef KMP_ADJUST_BLOCKTIME
420 if ( __kmp_zero_bt && ! this_thr->th.th_team_bt_set ) {
424 hibernate = this_thr->th.th_team_bt_intervals;
427 hibernate = this_thr->th.th_team_bt_intervals;
429 if ( hibernate == 0 ) {
432 hibernate += TCR_4( __kmp_global.g.g_time.dt.t_value );
434 KF_TRACE( 20, (
"__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
435 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
436 hibernate - __kmp_global.g.g_time.dt.t_value ));
442 while( TCR_4(*spin) != check ) {
458 kmp_task_team_t * task_team = NULL;
459 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
460 task_team = this_thr->th.th_task_team;
461 if ( task_team != NULL ) {
462 if ( ! TCR_SYNC_4( task_team->tt.tt_active ) ) {
463 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( this_thr->th.th_info.ds.ds_tid ) );
464 __kmp_unref_task_team( task_team, this_thr );
465 }
else if ( KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
466 __kmp_execute_tasks( this_thr, th_gtid, spin, check, final_spin, &flag
467 USE_ITT_BUILD_ARG( itt_sync_obj )
474 KMP_FSYNC_SPIN_PREPARE( spin );
475 if( TCR_4(__kmp_global.g.g_done) ) {
476 if( __kmp_global.g.g_abort )
477 __kmp_abort_thread( );
481 __kmp_static_delay( 1 );
485 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
489 KMP_YIELD_SPIN( spins );
495 in_pool = !!TCR_4(this_thr->th.th_in_pool);
496 if ( in_pool != !!this_thr->th.th_active_in_pool ) {
502 (kmp_int32 *) &__kmp_thread_pool_active_nth );
503 this_thr->th.th_active_in_pool = TRUE;
523 (kmp_int32 *) &__kmp_thread_pool_active_nth );
524 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
525 this_thr->th.th_active_in_pool = FALSE;
531 if ( ( task_team != NULL ) && TCR_4(task_team->tt.tt_found_tasks) ) {
537 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
542 if( TCR_4( __kmp_global.g.g_time.dt.t_value ) <= hibernate ) {
546 KF_TRACE( 50, (
"__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid ) );
548 __kmp_suspend( th_gtid, spin, check );
550 if( TCR_4( __kmp_global.g.g_done ) && __kmp_global.g.g_abort ) {
551 __kmp_abort_thread( );
558 KMP_FSYNC_SPIN_ACQUIRED( spin );
571 __kmp_release( kmp_info_t *target_thr,
volatile kmp_uint *spin,
572 enum kmp_mem_fence_type fetchadd_fence )
576 int target_gtid = target_thr->th.th_info.ds.ds_gtid;
577 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
580 KF_TRACE( 20, (
"__kmp_release: T#%d releasing T#%d spin(%p) fence_type(%d)\n",
581 gtid, target_gtid, spin, fetchadd_fence ));
583 KMP_DEBUG_ASSERT( spin );
585 KMP_DEBUG_ASSERT( fetchadd_fence == kmp_acquire_fence ||
586 fetchadd_fence == kmp_release_fence );
588 KMP_FSYNC_RELEASING( spin );
590 old_spin = ( fetchadd_fence == kmp_acquire_fence )
591 ? KMP_TEST_THEN_ADD4_ACQ32( (
volatile kmp_int32 *) spin )
592 : KMP_TEST_THEN_ADD4_32( (
volatile kmp_int32 *) spin );
594 KF_TRACE( 100, (
"__kmp_release: T#%d old spin(%p)=%d, set new spin=%d\n",
595 gtid, spin, old_spin, *spin ) );
597 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
599 if ( old_spin & KMP_BARRIER_SLEEP_STATE ) {
601 int target_gtid = target_thr->th.th_info.ds.ds_gtid;
604 KF_TRACE( 50, (
"__kmp_release: T#%d waking up thread T#%d since sleep spin(%p) set\n",
605 gtid, target_gtid, spin ));
606 __kmp_resume( target_gtid, spin );
608 KF_TRACE( 50, (
"__kmp_release: T#%d don't wake up thread T#%d since sleep spin(%p) not set\n",
609 gtid, target_gtid, spin ));
617 __kmp_infinite_loop(
void )
619 static int done = FALSE;
626 #define MAX_MESSAGE 512
629 __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
char const *format, ...) {
630 char buffer[MAX_MESSAGE];
634 va_start( ap, format);
635 sprintf( buffer,
"OMP storage map: %p %p%8lu %s\n", p1, p2, (
unsigned long) size, format );
636 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
637 __kmp_vprintf( kmp_err, buffer, ap );
638 #if KMP_PRINT_DATA_PLACEMENT
640 if(p1 <= p2 && (
char*)p2 - (
char*)p1 == size) {
641 if( __kmp_storage_map_verbose ) {
642 node = __kmp_get_host_node(p1);
644 __kmp_storage_map_verbose = FALSE;
648 int localProc = __kmp_get_cpu_from_gtid(gtid);
650 p1 = (
void *)( (
size_t)p1 & ~((size_t)PAGE_SIZE - 1) );
651 p2 = (
void *)( ((
size_t) p2 - 1) & ~((
size_t)PAGE_SIZE - 1) );
653 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid, localProc>>1);
655 __kmp_printf_no_lock(
" GTID %d\n", gtid);
663 (
char*)p1 += PAGE_SIZE;
664 }
while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
665 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last,
666 (
char*)p1 - 1, lastNode);
669 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
670 (
char*)p1 + (PAGE_SIZE - 1), __kmp_get_host_node(p1));
672 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
673 (
char*)p2 + (PAGE_SIZE - 1), __kmp_get_host_node(p2));
679 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR( StorageMapWarning ) );
682 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
686 __kmp_warn(
char const * format, ... )
688 char buffer[MAX_MESSAGE];
691 if ( __kmp_generate_warnings == kmp_warnings_off ) {
695 va_start( ap, format );
697 snprintf( buffer,
sizeof(buffer) ,
"OMP warning: %s\n", format );
698 __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock );
699 __kmp_vprintf( kmp_err, buffer, ap );
700 __kmp_release_bootstrap_lock( & __kmp_stdio_lock );
706 __kmp_abort_process()
710 __kmp_acquire_bootstrap_lock( & __kmp_exit_lock );
712 if ( __kmp_debug_buf ) {
713 __kmp_dump_debug_buffer();
716 if ( KMP_OS_WINDOWS ) {
719 __kmp_global.g.g_abort = SIGABRT;
737 __kmp_infinite_loop();
738 __kmp_release_bootstrap_lock( & __kmp_exit_lock );
743 __kmp_abort_thread(
void )
747 __kmp_infinite_loop();
758 __kmp_print_thread_storage_map( kmp_info_t *thr,
int gtid )
760 __kmp_print_storage_map_gtid( gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d", gtid );
762 __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team,
sizeof(kmp_desc_t),
763 "th_%d.th_info", gtid );
765 __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head,
sizeof(kmp_local_t),
766 "th_%d.th_local", gtid );
768 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
769 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid );
771 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier],
772 &thr->th.th_bar[bs_plain_barrier+1],
773 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]", gtid);
775 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier],
776 &thr->th.th_bar[bs_forkjoin_barrier+1],
777 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]", gtid);
779 #if KMP_FAST_REDUCTION_BARRIER
780 __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier],
781 &thr->th.th_bar[bs_reduction_barrier+1],
782 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]", gtid);
783 #endif // KMP_FAST_REDUCTION_BARRIER
792 __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
int team_id,
int num_thr )
794 int num_disp_buff = team->t.t_max_nproc > 1 ? KMP_MAX_DISP_BUF : 2;
795 __kmp_print_storage_map_gtid( -1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
798 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier],
799 sizeof(kmp_balign_team_t) * bs_last_barrier,
"%s_%d.t_bar", header, team_id );
802 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1],
803 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]", header, team_id );
805 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1],
806 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[forkjoin]", header, team_id );
808 #if KMP_FAST_REDUCTION_BARRIER
809 __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1],
810 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[reduction]", header, team_id );
811 #endif // KMP_FAST_REDUCTION_BARRIER
813 __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
814 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id );
816 __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
817 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id );
819 __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff],
820 sizeof(dispatch_shared_info_t) * num_disp_buff,
"%s_%d.t_disp_buffer",
855 __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data,
856 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header, team_id );
859 static void __kmp_init_allocator() {}
860 static void __kmp_fini_allocator() {}
861 static void __kmp_fini_allocator_thread() {}
865 #ifdef GUIDEDLL_EXPORTS
870 __kmp_reset_lock( kmp_bootstrap_lock_t* lck ) {
872 __kmp_init_bootstrap_lock( lck );
876 __kmp_reset_locks_on_process_detach(
int gtid_req ) {
893 for( i = 0; i < __kmp_threads_capacity; ++i ) {
894 if( !__kmp_threads )
continue;
895 kmp_info_t* th = __kmp_threads[ i ];
896 if( th == NULL )
continue;
897 int gtid = th->th.th_info.ds.ds_gtid;
898 if( gtid == gtid_req )
continue;
899 if( gtid < 0 )
continue;
901 int alive = __kmp_is_thread_alive( th, &exit_val );
906 if( thread_count == 0 )
break;
913 __kmp_reset_lock( &__kmp_forkjoin_lock );
915 __kmp_reset_lock( &__kmp_stdio_lock );
922 DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) {
925 switch( fdwReason ) {
927 case DLL_PROCESS_ATTACH:
928 KA_TRACE( 10, (
"DllMain: PROCESS_ATTACH\n" ));
932 case DLL_PROCESS_DETACH:
933 KA_TRACE( 10, (
"DllMain: PROCESS_DETACH T#%d\n",
934 __kmp_gtid_get_specific() ));
936 if( lpReserved != NULL )
963 __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() );
966 __kmp_internal_end_library( __kmp_gtid_get_specific() );
970 case DLL_THREAD_ATTACH:
971 KA_TRACE( 10, (
"DllMain: THREAD_ATTACH\n" ));
977 case DLL_THREAD_DETACH:
978 KA_TRACE( 10, (
"DllMain: THREAD_DETACH T#%d\n",
979 __kmp_gtid_get_specific() ));
981 __kmp_internal_end_thread( __kmp_gtid_get_specific() );
997 __kmp_change_library(
int status )
1001 old_status = __kmp_yield_init & 1;
1004 __kmp_yield_init |= 1;
1007 __kmp_yield_init &= ~1;
1020 __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
1022 int gtid = *gtid_ref;
1023 #ifdef BUILD_PARALLEL_ORDERED
1024 kmp_team_t *team = __kmp_team_from_gtid( gtid );
1027 if( __kmp_env_consistency_check ) {
1028 if( __kmp_threads[gtid] -> th.th_root -> r.r_active )
1029 __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL );
1031 #ifdef BUILD_PARALLEL_ORDERED
1032 if( !team -> t.t_serialized ) {
1036 KMP_WAIT_YIELD(&team -> t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL);
1047 __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
1049 int gtid = *gtid_ref;
1050 #ifdef BUILD_PARALLEL_ORDERED
1051 int tid = __kmp_tid_from_gtid( gtid );
1052 kmp_team_t *team = __kmp_team_from_gtid( gtid );
1055 if( __kmp_env_consistency_check ) {
1056 if( __kmp_threads[gtid] -> th.th_root -> r.r_active )
1057 __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref );
1059 #ifdef BUILD_PARALLEL_ORDERED
1060 if ( ! team -> t.t_serialized ) {
1065 team -> t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc );
1081 __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws )
1087 if( ! TCR_4(__kmp_init_parallel) )
1088 __kmp_parallel_initialize();
1090 th = __kmp_threads[ gtid ];
1091 team = th -> th.th_team;
1094 th->th.th_ident = id_ref;
1096 if ( team -> t.t_serialized ) {
1099 kmp_int32 old_this = th->th.th_local.this_construct;
1101 ++th->th.th_local.this_construct;
1106 status = KMP_COMPARE_AND_STORE_ACQ32(&team -> t.t_construct, old_this,
1107 th->th.th_local.this_construct);
1110 if( __kmp_env_consistency_check ) {
1111 if (status && push_ws) {
1112 __kmp_push_workshare( gtid, ct_psingle, id_ref );
1114 __kmp_check_workshare( gtid, ct_psingle, id_ref );
1119 __kmp_itt_single_start( gtid );
1126 __kmp_exit_single(
int gtid )
1129 __kmp_itt_single_end( gtid );
1131 if( __kmp_env_consistency_check )
1132 __kmp_pop_workshare( gtid, ct_psingle, NULL );
1140 __kmp_linear_barrier_gather(
enum barrier_type bt,
1141 kmp_info_t *this_thr,
1144 void (*reduce)(
void *,
void *)
1145 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
1148 register kmp_team_t *team = this_thr -> th.th_team;
1149 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1150 register kmp_info_t **other_threads = team -> t.t_threads;
1152 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
1153 gtid, team->t.t_id, tid, bt ) );
1155 KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
1163 if ( ! KMP_MASTER_TID( tid )) {
1165 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)"
1166 "arrived(%p): %u => %u\n",
1167 gtid, team->t.t_id, tid,
1168 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
1169 &thr_bar -> b_arrived, thr_bar -> b_arrived,
1170 thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
1179 __kmp_release( other_threads[0], &thr_bar -> b_arrived, kmp_release_fence );
1182 register kmp_balign_team_t *team_bar = & team -> t.t_bar[ bt ];
1183 register int nproc = this_thr -> th.th_team_nproc;
1185 register kmp_uint new_state;
1188 new_state = team_bar -> b_arrived + KMP_BARRIER_STATE_BUMP;
1191 for (i = 1; i < nproc; i++) {
1192 #if KMP_CACHE_MANAGE
1195 KMP_CACHE_PREFETCH( &other_threads[ i+1 ] -> th.th_bar[ bt ].bb.b_arrived );
1197 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
1198 "arrived(%p) == %u\n",
1199 gtid, team->t.t_id, tid,
1200 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
1201 &other_threads[i] -> th.th_bar[ bt ].bb.b_arrived,
1205 __kmp_wait_sleep( this_thr,
1206 & other_threads[ i ] -> th.th_bar[ bt ].bb.b_arrived,
1208 USE_ITT_BUILD_ARG( itt_sync_obj )
1213 KA_TRACE( 100, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
1214 gtid, team->t.t_id, tid,
1215 __kmp_gtid_from_tid( i, team ), team->t.t_id, i ) );
1217 (*reduce)( this_thr -> th.th_local.reduce_data,
1218 other_threads[ i ] -> th.th_local.reduce_data );
1225 team_bar -> b_arrived = new_state;
1226 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d "
1227 "arrived(%p) = %u\n",
1228 gtid, team->t.t_id, tid, team->t.t_id,
1229 &team_bar -> b_arrived, new_state ) );
1232 KA_TRACE( 20, (
"__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
1233 gtid, team->t.t_id, tid, bt ) );
1238 __kmp_tree_barrier_gather(
enum barrier_type bt,
1239 kmp_info_t *this_thr,
1242 void (*reduce) (
void *,
void *)
1243 USE_ITT_BUILD_ARG(
void * itt_sync_obj )
1246 register kmp_team_t *team = this_thr -> th.th_team;
1247 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1248 register kmp_info_t **other_threads = team -> t.t_threads;
1249 register kmp_uint32 nproc = this_thr -> th.th_team_nproc;
1250 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[ bt ];
1251 register kmp_uint32 branch_factor = 1 << branch_bits ;
1252 register kmp_uint32 child;
1253 register kmp_int32 child_tid;
1254 register kmp_uint new_state;
1256 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
1257 gtid, team->t.t_id, tid, bt ) );
1259 KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
1267 child_tid = (tid << branch_bits) + 1;
1269 if ( child_tid < nproc ) {
1272 new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
1276 register kmp_info_t *child_thr = other_threads[ child_tid ];
1277 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1278 #if KMP_CACHE_MANAGE
1280 if ( child+1 <= branch_factor && child_tid+1 < nproc )
1281 KMP_CACHE_PREFETCH( &other_threads[ child_tid+1 ] -> th.th_bar[ bt ].bb.b_arrived );
1283 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
1284 "arrived(%p) == %u\n",
1285 gtid, team->t.t_id, tid,
1286 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id, child_tid,
1287 &child_bar -> b_arrived, new_state ) );
1290 __kmp_wait_sleep( this_thr, &child_bar -> b_arrived, new_state, FALSE
1291 USE_ITT_BUILD_ARG( itt_sync_obj)
1296 KA_TRACE( 100, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
1297 gtid, team->t.t_id, tid,
1298 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1301 (*reduce)( this_thr -> th.th_local.reduce_data,
1302 child_thr -> th.th_local.reduce_data );
1309 while ( child <= branch_factor && child_tid < nproc );
1312 if ( !KMP_MASTER_TID(tid) ) {
1314 register kmp_int32 parent_tid = (tid - 1) >> branch_bits;
1316 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
1317 "arrived(%p): %u => %u\n",
1318 gtid, team->t.t_id, tid,
1319 __kmp_gtid_from_tid( parent_tid, team ), team->t.t_id, parent_tid,
1320 &thr_bar -> b_arrived, thr_bar -> b_arrived,
1321 thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
1330 __kmp_release( other_threads[parent_tid], &thr_bar -> b_arrived, kmp_release_fence );
1337 team -> t.t_bar[ bt ].b_arrived = new_state;
1339 team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP;
1341 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %u\n",
1342 gtid, team->t.t_id, tid, team->t.t_id,
1343 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived ) );
1346 KA_TRACE( 20, (
"__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
1347 gtid, team->t.t_id, tid, bt ) );
1352 __kmp_hyper_barrier_gather(
enum barrier_type bt,
1353 kmp_info_t *this_thr,
1356 void (*reduce) (
void *,
void *)
1357 USE_ITT_BUILD_ARG (
void * itt_sync_obj)
1360 register kmp_team_t *team = this_thr -> th.th_team;
1361 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1362 register kmp_info_t **other_threads = team -> t.t_threads;
1363 register kmp_uint new_state = KMP_BARRIER_UNUSED_STATE;
1364 register kmp_uint32 num_threads = this_thr -> th.th_team_nproc;
1365 register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[ bt ];
1366 register kmp_uint32 branch_factor = 1 << branch_bits ;
1367 register kmp_uint32 offset;
1368 register kmp_uint32 level;
1370 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n",
1371 gtid, team->t.t_id, tid, bt ) );
1373 KMP_DEBUG_ASSERT( this_thr == other_threads[this_thr->th.th_info.ds.ds_tid] );
1381 for ( level=0, offset =1;
1382 offset < num_threads;
1383 level += branch_bits, offset <<= branch_bits )
1385 register kmp_uint32 child;
1386 register kmp_int32 child_tid;
1388 if ( ((tid >> level) & (branch_factor - 1)) != 0 ) {
1389 register kmp_int32 parent_tid = tid & ~( (1 << (level + branch_bits)) -1 );
1391 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) "
1392 "arrived(%p): %u => %u\n",
1393 gtid, team->t.t_id, tid,
1394 __kmp_gtid_from_tid( parent_tid, team ), team->t.t_id, parent_tid,
1395 &thr_bar -> b_arrived, thr_bar -> b_arrived,
1396 thr_bar -> b_arrived + KMP_BARRIER_STATE_BUMP
1406 __kmp_release( other_threads[parent_tid], &thr_bar -> b_arrived, kmp_release_fence );
1412 for ( child = 1, child_tid = tid + (1 << level);
1413 child < branch_factor && child_tid < num_threads;
1414 child++, child_tid += (1 << level) )
1416 register kmp_info_t *child_thr = other_threads[ child_tid ];
1417 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1418 #if KMP_CACHE_MANAGE
1419 register kmp_uint32 next_child_tid = child_tid + (1 << level);
1421 if ( child+1 < branch_factor && next_child_tid < num_threads )
1422 KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ] -> th.th_bar[ bt ].bb.b_arrived );
1425 if (new_state == KMP_BARRIER_UNUSED_STATE)
1426 new_state = team -> t.t_bar[ bt ].b_arrived + KMP_BARRIER_STATE_BUMP;
1428 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) "
1429 "arrived(%p) == %u\n",
1430 gtid, team->t.t_id, tid,
1431 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id, child_tid,
1432 &child_bar -> b_arrived, new_state ) );
1435 __kmp_wait_sleep( this_thr, &child_bar -> b_arrived, new_state, FALSE
1436 USE_ITT_BUILD_ARG (itt_sync_obj)
1441 KA_TRACE( 100, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n",
1442 gtid, team->t.t_id, tid,
1443 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1446 (*reduce)( this_thr -> th.th_local.reduce_data,
1447 child_thr -> th.th_local.reduce_data );
1454 if ( KMP_MASTER_TID(tid) ) {
1457 if (new_state == KMP_BARRIER_UNUSED_STATE)
1458 team -> t.t_bar[ bt ].b_arrived += KMP_BARRIER_STATE_BUMP;
1460 team -> t.t_bar[ bt ].b_arrived = new_state;
1462 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %u\n",
1463 gtid, team->t.t_id, tid, team->t.t_id,
1464 &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived ) );
1467 KA_TRACE( 20, (
"__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n",
1468 gtid, team->t.t_id, tid, bt ) );
1473 __kmp_linear_barrier_release(
enum barrier_type bt,
1474 kmp_info_t *this_thr,
1478 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
1481 register kmp_bstate_t *thr_bar = &this_thr -> th.th_bar[ bt ].bb;
1482 register kmp_team_t *team;
1484 if (KMP_MASTER_TID( tid )) {
1486 register kmp_uint32 nproc = this_thr -> th.th_team_nproc;
1487 register kmp_info_t **other_threads;
1489 team = __kmp_threads[ gtid ]-> th.th_team;
1490 KMP_DEBUG_ASSERT( team != NULL );
1491 other_threads = team -> t.t_threads;
1493 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
1494 gtid, team->t.t_id, tid, bt ) );
1498 for (i = 1; i < nproc; i++) {
1499 #if KMP_CACHE_MANAGE
1502 KMP_CACHE_PREFETCH( &other_threads[ i+1 ]-> th.th_bar[ bt ].bb.b_go );
1505 #if KMP_BARRIER_ICV_PUSH
1506 if ( propagate_icvs ) {
1507 __kmp_init_implicit_task( team->t.t_ident,
1508 team->t.t_threads[i], team, i, FALSE );
1509 copy_icvs( &team->t.t_implicit_task_taskdata[i].td_icvs,
1510 &team->t.t_implicit_task_taskdata[0].td_icvs );
1512 #endif // KMP_BARRIER_ICV_PUSH
1514 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) "
1515 "go(%p): %u => %u\n",
1516 gtid, team->t.t_id, tid,
1517 other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i,
1518 &other_threads[i]->th.th_bar[bt].bb.b_go,
1519 other_threads[i]->th.th_bar[bt].bb.b_go,
1520 other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP
1523 __kmp_release( other_threads[ i ],
1524 &other_threads[ i ]-> th.th_bar[ bt ].bb.b_go, kmp_acquire_fence );
1530 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n",
1531 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
1533 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
1534 USE_ITT_BUILD_ARG(itt_sync_obj)
1537 #if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
1538 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1540 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
1542 __kmp_itt_task_starting( itt_sync_obj );
1544 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1547 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1548 if ( itt_sync_obj != NULL )
1549 __kmp_itt_task_finished( itt_sync_obj );
1556 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1562 #if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
1564 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1566 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1567 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
1571 tid = __kmp_tid_from_gtid( gtid );
1572 team = __kmp_threads[ gtid ]-> th.th_team;
1574 KMP_DEBUG_ASSERT( team != NULL );
1576 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1577 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1578 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
1583 KA_TRACE( 20, (
"__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1584 gtid, team->t.t_id, tid, bt ) );
1589 __kmp_tree_barrier_release(
enum barrier_type bt,
1590 kmp_info_t *this_thr,
1594 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
1598 register kmp_team_t *team;
1599 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1600 register kmp_uint32 nproc;
1601 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
1602 register kmp_uint32 branch_factor = 1 << branch_bits ;
1603 register kmp_uint32 child;
1604 register kmp_int32 child_tid;
1611 if ( ! KMP_MASTER_TID( tid )) {
1614 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n",
1615 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
1618 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
1619 USE_ITT_BUILD_ARG(itt_sync_obj)
1622 #if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
1623 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1625 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
1627 __kmp_itt_task_starting( itt_sync_obj );
1629 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1632 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1633 if ( itt_sync_obj != NULL )
1634 __kmp_itt_task_finished( itt_sync_obj );
1641 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1647 #if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
1649 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1651 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1652 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
1655 team = __kmp_threads[ gtid ]-> th.th_team;
1656 KMP_DEBUG_ASSERT( team != NULL );
1657 tid = __kmp_tid_from_gtid( gtid );
1659 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1660 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1661 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
1666 team = __kmp_threads[ gtid ]-> th.th_team;
1667 KMP_DEBUG_ASSERT( team != NULL );
1669 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
1670 gtid, team->t.t_id, tid, bt ) );
1673 nproc = this_thr -> th.th_team_nproc;
1674 child_tid = ( tid << branch_bits ) + 1;
1676 if ( child_tid < nproc ) {
1677 register kmp_info_t **other_threads = team -> t.t_threads;
1682 register kmp_info_t *child_thr = other_threads[ child_tid ];
1683 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1684 #if KMP_CACHE_MANAGE
1686 if ( child+1 <= branch_factor && child_tid+1 < nproc )
1687 KMP_CACHE_PREFETCH( &other_threads[ child_tid+1 ] -> th.th_bar[ bt ].bb.b_go );
1690 #if KMP_BARRIER_ICV_PUSH
1691 if ( propagate_icvs ) {
1692 __kmp_init_implicit_task( team->t.t_ident,
1693 team->t.t_threads[child_tid], team, child_tid, FALSE );
1694 copy_icvs( &team->t.t_implicit_task_taskdata[child_tid].td_icvs,
1695 &team->t.t_implicit_task_taskdata[0].td_icvs );
1697 #endif // KMP_BARRIER_ICV_PUSH
1699 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)"
1700 "go(%p): %u => %u\n",
1701 gtid, team->t.t_id, tid,
1702 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1703 child_tid, &child_bar -> b_go, child_bar -> b_go,
1704 child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
1707 __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
1712 while ( child <= branch_factor && child_tid < nproc );
1715 KA_TRACE( 20, (
"__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1716 gtid, team->t.t_id, tid, bt ) );
1720 #define KMP_REVERSE_HYPER_BAR
1721 #ifdef KMP_REVERSE_HYPER_BAR
1723 __kmp_hyper_barrier_release(
enum barrier_type bt,
1724 kmp_info_t *this_thr,
1728 USE_ITT_BUILD_ARG(
void * itt_sync_obj)
1732 register kmp_team_t *team;
1733 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1734 register kmp_info_t **other_threads;
1735 register kmp_uint32 num_threads;
1736 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
1737 register kmp_uint32 branch_factor = 1 << branch_bits;
1738 register kmp_uint32 child;
1739 register kmp_int32 child_tid;
1740 register kmp_uint32 offset;
1741 register kmp_uint32 level;
1749 if ( ! KMP_MASTER_TID( tid )) {
1752 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
1753 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
1756 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE
1757 USE_ITT_BUILD_ARG( itt_sync_obj )
1760 #if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
1761 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1763 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
1765 __kmp_itt_task_starting( itt_sync_obj );
1767 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1770 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1771 if ( itt_sync_obj != NULL )
1772 __kmp_itt_task_finished( itt_sync_obj );
1779 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1785 #if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
1787 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1789 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1790 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
1793 team = __kmp_threads[ gtid ]-> th.th_team;
1794 KMP_DEBUG_ASSERT( team != NULL );
1795 tid = __kmp_tid_from_gtid( gtid );
1797 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1798 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1799 gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
1804 team = __kmp_threads[ gtid ]-> th.th_team;
1805 KMP_DEBUG_ASSERT( team != NULL );
1807 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n",
1808 gtid, team->t.t_id, tid, bt ) );
1811 num_threads = this_thr -> th.th_team_nproc;
1812 other_threads = team -> t.t_threads;
1815 for ( level = 0, offset = 1;
1816 offset < num_threads && (((tid >> level) & (branch_factor-1)) == 0);
1817 level += branch_bits, offset <<= branch_bits );
1820 for ( level -= branch_bits, offset >>= branch_bits;
1822 level -= branch_bits, offset >>= branch_bits )
1824 register kmp_uint32 child;
1825 register kmp_int32 child_tid;
1829 child = num_threads >> ((level==0)?level:level-1);
1830 for ( child = (child < branch_factor-1) ? child : branch_factor-1,
1831 child_tid = tid + (child << level);
1833 child--, child_tid -= (1 << level) )
1836 if ( child_tid >= num_threads )
continue;
1838 register kmp_info_t *child_thr = other_threads[ child_tid ];
1839 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1840 #if KMP_CACHE_MANAGE
1841 register kmp_uint32 next_child_tid = child_tid - (1 << level);
1843 if ( child-1 >= 1 && next_child_tid < num_threads )
1844 KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go );
1847 #if KMP_BARRIER_ICV_PUSH
1848 if ( propagate_icvs ) {
1849 KMP_DEBUG_ASSERT( team != NULL );
1850 __kmp_init_implicit_task( team->t.t_ident,
1851 team->t.t_threads[child_tid], team, child_tid, FALSE );
1852 copy_icvs( &team->t.t_implicit_task_taskdata[child_tid].td_icvs,
1853 &team->t.t_implicit_task_taskdata[0].td_icvs );
1855 #endif // KMP_BARRIER_ICV_PUSH
1857 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)"
1858 "go(%p): %u => %u\n",
1859 gtid, team->t.t_id, tid,
1860 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
1861 child_tid, &child_bar -> b_go, child_bar -> b_go,
1862 child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
1865 __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
1870 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
1871 gtid, team->t.t_id, tid, bt ) );
1877 __kmp_hyper_barrier_release(
enum barrier_type bt, kmp_info_t *this_thr,
int gtid,
int tid,
int propagate_icvs )
1880 register kmp_team_t *team;
1881 register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
1882 register kmp_info_t **other_threads;
1883 register kmp_uint32 num_threads;
1884 register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ];
1885 register kmp_uint32 branch_factor = 1 << branch_bits;
1886 register kmp_uint32 child;
1887 register kmp_int32 child_tid;
1888 register kmp_uint32 offset;
1889 register kmp_uint32 level;
1897 if ( ! KMP_MASTER_TID( tid )) {
1900 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n",
1901 gtid, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP ) );
1904 __kmp_wait_sleep( this_thr, &thr_bar -> b_go, KMP_BARRIER_STATE_BUMP, TRUE, NULL );
1906 #if USE_ITT_BUILD && OMP_30_ENABLED && USE_ITT_NOTIFY
1907 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1909 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 0, -1 );
1911 __kmp_itt_task_starting( itt_sync_obj );
1913 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1916 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1917 if ( itt_sync_obj != NULL )
1918 __kmp_itt_task_finished( itt_sync_obj );
1925 if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) )
1931 #if USE_ITT_BUILD && !OMP_30_ENABLED && USE_ITT_NOTIFY
1933 if ( ( __itt_sync_create_ptr && itt_sync_obj == NULL ) || KMP_ITT_DEBUG ) {
1935 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1936 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
1939 team = __kmp_threads[ gtid ]-> th.th_team;
1940 KMP_DEBUG_ASSERT( team != NULL );
1941 tid = __kmp_tid_from_gtid( gtid );
1943 TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE);
1944 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n",
1945 gtid, ( team != NULL ) ? team->t.t_id : -1, tid,
1946 &thr_bar->b_go, KMP_INIT_BARRIER_STATE ) );
1951 team = __kmp_threads[ gtid ]-> th.th_team;
1952 KMP_DEBUG_ASSERT( team != NULL );
1954 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) enter for barrier type %d\n",
1955 gtid, team->t.t_id, tid, bt ) );
1959 if ( team == NULL ) {
1961 tid = __kmp_tid_from_gtid( gtid );
1962 team = __kmp_threads[ gtid ]-> th.th_team;
1964 num_threads = this_thr -> th.th_team_nproc;
1965 other_threads = team -> t.t_threads;
1968 for ( level = 0, offset = 1;
1969 offset < num_threads;
1970 level += branch_bits, offset <<= branch_bits )
1972 register kmp_uint32 child;
1973 register kmp_int32 child_tid;
1975 if (((tid >> level) & (branch_factor - 1)) != 0)
1981 for ( child = 1, child_tid = tid + (1 << level);
1982 child < branch_factor && child_tid < num_threads;
1983 child++, child_tid += (1 << level) )
1985 register kmp_info_t *child_thr = other_threads[ child_tid ];
1986 register kmp_bstate_t *child_bar = & child_thr -> th.th_bar[ bt ].bb;
1987 #if KMP_CACHE_MANAGE
1989 register kmp_uint32 next_child_tid = child_tid + (1 << level);
1991 if ( child+1 < branch_factor && next_child_tid < num_threads )
1992 KMP_CACHE_PREFETCH( &other_threads[ next_child_tid ]->th.th_bar[ bt ].bb.b_go );
1996 #if KMP_BARRIER_ICV_PUSH
1997 if ( propagate_icvs ) {
1998 KMP_DEBUG_ASSERT( team != NULL );
1999 __kmp_init_implicit_task( team->t.t_ident,
2000 team->t.t_threads[child_tid], team, child_tid, FALSE );
2001 copy_icvs( &team->t.t_implicit_task_taskdata[child_tid].td_icvs,
2002 &team->t.t_implicit_task_taskdata[0].td_icvs );
2004 #endif // KMP_BARRIER_ICV_PUSH
2006 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) releasing "
2007 "T#%d(%d:%d) go(%p): %u => %u\n",
2008 gtid, team->t.t_id, tid,
2009 __kmp_gtid_from_tid( child_tid, team ), team->t.t_id,
2010 child_tid, &child_bar -> b_go, child_bar -> b_go,
2011 child_bar -> b_go + KMP_BARRIER_STATE_BUMP ) );
2014 __kmp_release( child_thr, &child_bar -> b_go, kmp_acquire_fence );
2018 KA_TRACE( 20, (
"__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n",
2019 gtid, team->t.t_id, tid, bt ) );
2031 __kmp_barrier(
enum barrier_type bt,
int gtid,
int is_split,
2032 size_t reduce_size,
void *reduce_data,
void (*reduce)(
void *,
void *) )
2034 register int tid = __kmp_tid_from_gtid( gtid );
2035 register kmp_info_t *this_thr = __kmp_threads[ gtid ];
2036 register kmp_team_t *team = this_thr -> th.th_team;
2037 register int status = 0;
2039 KA_TRACE( 15, (
"__kmp_barrier: T#%d(%d:%d) has arrived\n",
2040 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) );
2042 if ( ! team->t.t_serialized ) {
2045 void * itt_sync_obj = NULL;
2047 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
2048 itt_sync_obj = __kmp_itt_barrier_object( gtid, bt, 1 );
2052 if ( __kmp_tasking_mode == tskm_extra_barrier ) {
2053 __kmp_tasking_barrier( team, this_thr, gtid );
2054 KA_TRACE( 15, (
"__kmp_barrier: T#%d(%d:%d) past tasking barrier\n",
2055 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid) ) );
2066 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2068 this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
2069 this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
2071 this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
2072 this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
2073 #endif // OMP_30_ENABLED
2077 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
2078 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
2081 if ( reduce != NULL ) {
2083 this_thr -> th.th_local.reduce_data = reduce_data;
2085 if ( __kmp_barrier_gather_pattern[ bt ] == bp_linear_bar || __kmp_barrier_gather_branch_bits[ bt ] == 0 ) {
2086 __kmp_linear_barrier_gather( bt, this_thr, gtid, tid, reduce
2087 USE_ITT_BUILD_ARG( itt_sync_obj )
2089 }
else if ( __kmp_barrier_gather_pattern[ bt ] == bp_tree_bar ) {
2090 __kmp_tree_barrier_gather( bt, this_thr, gtid, tid, reduce
2091 USE_ITT_BUILD_ARG( itt_sync_obj )
2094 __kmp_hyper_barrier_gather( bt, this_thr, gtid, tid, reduce
2095 USE_ITT_BUILD_ARG( itt_sync_obj )
2103 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
2104 __kmp_itt_barrier_middle( gtid, itt_sync_obj );
2109 if ( KMP_MASTER_TID( tid ) ) {
2113 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2114 __kmp_task_team_wait( this_thr, team
2115 USE_ITT_BUILD_ARG( itt_sync_obj )
2117 __kmp_task_team_setup( this_thr, team );
2124 if ( status == 1 || ! is_split ) {
2125 if ( __kmp_barrier_release_pattern[ bt ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bt ] == 0 ) {
2126 __kmp_linear_barrier_release( bt, this_thr, gtid, tid, FALSE
2127 USE_ITT_BUILD_ARG( itt_sync_obj )
2129 }
else if ( __kmp_barrier_release_pattern[ bt ] == bp_tree_bar ) {
2130 __kmp_tree_barrier_release( bt, this_thr, gtid, tid, FALSE
2131 USE_ITT_BUILD_ARG( itt_sync_obj )
2134 __kmp_hyper_barrier_release( bt, this_thr, gtid, tid, FALSE
2135 USE_ITT_BUILD_ARG( itt_sync_obj )
2139 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2140 __kmp_task_team_sync( this_thr, team );
2148 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
2149 __kmp_itt_barrier_finished( gtid, itt_sync_obj );
2157 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2162 KMP_DEBUG_ASSERT( team->t.t_task_team == NULL );
2163 KMP_DEBUG_ASSERT( this_thr->th.th_task_team == NULL );
2168 KA_TRACE( 15, (
"__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n",
2169 gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid),
2176 __kmp_end_split_barrier(
enum barrier_type bt,
int gtid )
2178 int tid = __kmp_tid_from_gtid( gtid );
2179 kmp_info_t *this_thr = __kmp_threads[ gtid ];
2180 kmp_team_t *team = this_thr -> th.th_team;
2182 if( ! team -> t.t_serialized ) {
2183 if( KMP_MASTER_GTID( gtid ) ) {
2184 if ( __kmp_barrier_release_pattern[ bt ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bt ] == 0 ) {
2185 __kmp_linear_barrier_release( bt, this_thr, gtid, tid, FALSE
2190 }
else if ( __kmp_barrier_release_pattern[ bt ] == bp_tree_bar ) {
2191 __kmp_tree_barrier_release( bt, this_thr, gtid, tid, FALSE
2197 __kmp_hyper_barrier_release( bt, this_thr, gtid, tid, FALSE
2204 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2205 __kmp_task_team_sync( this_thr, team );
2224 __kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team,
2225 int master_tid,
int set_nthreads
2233 int use_rml_to_adjust_nth;
2234 KMP_DEBUG_ASSERT( __kmp_init_serial );
2235 KMP_DEBUG_ASSERT( root && parent_team );
2240 if ( set_nthreads == 1 ) {
2241 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d reserving 1 thread; requested %d threads\n",
2242 __kmp_get_gtid(), set_nthreads ));
2245 if ( ( !get__nested_2(parent_team,master_tid) && (root->r.r_in_parallel
2249 ) ) || ( __kmp_library == library_serial ) ) {
2250 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team; requested %d threads\n",
2251 __kmp_get_gtid(), set_nthreads ));
2259 new_nthreads = set_nthreads;
2260 use_rml_to_adjust_nth = FALSE;
2261 if ( ! get__dynamic_2( parent_team, master_tid ) ) {
2264 #ifdef USE_LOAD_BALANCE
2265 else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) {
2266 new_nthreads = __kmp_load_balance_nproc( root, set_nthreads );
2267 if ( new_nthreads == 1 ) {
2268 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n",
2272 if ( new_nthreads < set_nthreads ) {
2273 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n",
2274 master_tid, new_nthreads ));
2278 else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) {
2279 new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
2280 : root->r.r_hot_team->t.t_nproc);
2281 if ( new_nthreads <= 1 ) {
2282 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n",
2286 if ( new_nthreads < set_nthreads ) {
2287 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n",
2288 master_tid, new_nthreads ));
2291 new_nthreads = set_nthreads;
2294 else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) {
2295 if ( set_nthreads > 2 ) {
2296 new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] );
2297 new_nthreads = ( new_nthreads % set_nthreads ) + 1;
2298 if ( new_nthreads == 1 ) {
2299 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n",
2303 if ( new_nthreads < set_nthreads ) {
2304 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n",
2305 master_tid, new_nthreads ));
2316 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
2317 root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) {
2318 int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 :
2319 root->r.r_hot_team->t.t_nproc );
2320 if ( tl_nthreads <= 0 ) {
2327 if ( ! get__dynamic_2( parent_team, master_tid )
2328 && ( ! __kmp_reserve_warn ) ) {
2329 __kmp_reserve_warn = 1;
2332 KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ),
2333 KMP_HNT( Unset_ALL_THREADS ),
2337 if ( tl_nthreads == 1 ) {
2338 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n",
2342 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n",
2343 master_tid, tl_nthreads ));
2344 new_nthreads = tl_nthreads;
2354 capacity = __kmp_threads_capacity;
2355 if ( TCR_PTR(__kmp_threads[0]) == NULL ) {
2358 if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
2359 root->r.r_hot_team->t.t_nproc ) > capacity ) {
2363 int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 :
2364 root->r.r_hot_team->t.t_nproc ) - capacity;
2365 int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired);
2366 if ( slotsAdded < slotsRequired ) {
2370 new_nthreads -= ( slotsRequired - slotsAdded );
2371 KMP_ASSERT( new_nthreads >= 1 );
2376 if ( ! get__dynamic_2( parent_team, master_tid )
2377 && ( ! __kmp_reserve_warn ) ) {
2378 __kmp_reserve_warn = 1;
2379 if ( __kmp_tp_cached ) {
2382 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
2383 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
2384 KMP_HNT( PossibleSystemLimitOnThreads ),
2391 KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ),
2392 KMP_HNT( SystemLimitOnThreads ),
2400 if ( new_nthreads == 1 ) {
2401 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n",
2402 __kmp_get_gtid(), set_nthreads ) );
2406 KC_TRACE( 10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n",
2407 __kmp_get_gtid(), new_nthreads, set_nthreads ));
2408 return new_nthreads;
2419 __kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team,
2420 kmp_info_t *master_th,
int master_gtid )
2424 KA_TRACE( 10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) );
2425 KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() );
2429 master_th -> th.th_info .ds.ds_tid = 0;
2430 master_th -> th.th_team = team;
2431 master_th -> th.th_team_nproc = team -> t.t_nproc;
2432 master_th -> th.th_team_master = master_th;
2433 master_th -> th.th_team_serialized = FALSE;
2434 master_th -> th.th_dispatch = & team -> t.t_dispatch[ 0 ];
2437 if ( team != root->r.r_hot_team ) {
2440 team -> t.t_threads[ 0 ] = master_th;
2441 __kmp_initialize_info( master_th, team, 0, master_gtid );
2444 for ( i=1 ; i < team->t.t_nproc ; i++ ) {
2447 team -> t.t_threads[ i ] = __kmp_allocate_thread( root, team, i );
2448 KMP_DEBUG_ASSERT( team->t.t_threads[i] );
2449 KMP_DEBUG_ASSERT( team->t.t_threads[i]->th.th_team == team );
2451 KA_TRACE( 20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%u, plain=%u\n",
2452 __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0,
2453 __kmp_gtid_from_tid( i, team ), team->t.t_id, i,
2454 team->t.t_bar[ bs_forkjoin_barrier ].b_arrived,
2455 team->t.t_bar[ bs_plain_barrier ].b_arrived ) );
2459 kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar;
2460 for ( b = 0; b < bs_last_barrier; ++ b ) {
2461 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
2466 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
2467 __kmp_partition_places( team );
2476 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc );
2488 microtask_t microtask,
2491 #
if KMP_ARCH_X86_64 && KMP_OS_LINUX
2501 int master_this_cons;
2502 int master_last_cons;
2504 kmp_team_t *parent_team;
2505 kmp_info_t *master_th;
2509 int master_set_numthreads;
2515 KA_TRACE( 20, (
"__kmp_fork_call: enter T#%d\n", gtid ));
2518 KMP_DEBUG_ASSERT( __kmp_init_serial );
2519 if( ! TCR_4(__kmp_init_parallel) )
2520 __kmp_parallel_initialize();
2523 master_th = __kmp_threads[ gtid ];
2524 parent_team = master_th -> th.th_team;
2525 master_tid = master_th -> th.th_info.ds.ds_tid;
2526 master_this_cons = master_th -> th.th_local.this_construct;
2527 master_last_cons = master_th -> th.th_local.last_construct;
2528 root = master_th -> th.th_root;
2529 master_active = root -> r.r_active;
2530 master_set_numthreads = master_th -> th.th_set_nproc;
2533 level = parent_team->t.t_level;
2534 #endif // OMP_30_ENABLED
2536 teams_level = master_th->th.th_teams_level;
2540 master_th->th.th_ident = loc;
2543 if ( master_th->th.th_team_microtask &&
2544 ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) {
2548 parent_team->t.t_ident = loc;
2549 parent_team->t.t_argc = argc;
2550 argv = (
void**)parent_team->t.t_argv;
2551 for( i=argc-1; i >= 0; --i )
2553 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
2554 *argv++ = va_arg( *ap,
void * );
2556 *argv++ = va_arg( ap,
void * );
2559 if ( parent_team == master_th->th.th_serial_team ) {
2562 KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 );
2563 parent_team->t.t_serialized--;
2565 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv );
2568 parent_team->t.t_pkfn = microtask;
2569 parent_team->t.t_invoke = invoker;
2570 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
2571 parent_team->t.t_active_level ++;
2572 parent_team->t.t_level ++;
2575 if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames ) || KMP_ITT_DEBUG )
2576 __kmp_itt_region_forking( gtid );
2579 KF_TRACE( 10, (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
2582 __kmp_internal_fork( loc, gtid, parent_team );
2583 KF_TRACE( 10, (
"__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) );
2586 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2587 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
2589 if (! parent_team->t.t_invoke( gtid )) {
2590 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
2592 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2593 gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );
2596 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2602 #if OMP_30_ENABLED && KMP_DEBUG
2603 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2604 KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
2606 #endif // OMP_30_ENABLED
2609 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
2612 if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) {
2616 #endif // OMP_30_ENABLED
2619 nthreads = master_set_numthreads ?
2620 master_set_numthreads : get__nproc_2( parent_team, master_tid );
2621 nthreads = __kmp_reserve_threads( root, parent_team, master_tid, nthreads
2627 ,( ( ap == NULL && teams_level == 0 ) ||
2628 ( ap && teams_level > 0 && teams_level == level ) )
2632 KMP_DEBUG_ASSERT( nthreads > 0 );
2635 master_th -> th.th_set_nproc = 0;
2639 if ( nthreads == 1 ) {
2641 #if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 )
2642 void * args[ argc ];
2644 void * * args = (
void**) alloca( argc *
sizeof(
void * ) );
2647 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2648 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid ));
2652 if ( exec_master == 0 ) {
2654 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
2656 }
else if ( exec_master == 1 ) {
2658 master_th -> th.th_serial_team -> t.t_ident = loc;
2662 master_th -> th.th_serial_team -> t.t_level--;
2664 __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv );
2665 }
else if ( microtask == (microtask_t)__kmp_teams_master ) {
2666 KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team );
2667 team = master_th->th.th_team;
2669 team->t.t_invoke = invoker;
2670 __kmp_alloc_argv_entries( argc, team, TRUE );
2671 team->t.t_argc = argc;
2672 argv = (
void**) team->t.t_argv;
2674 for( i=argc-1; i >= 0; --i )
2676 #
if KMP_ARCH_X86_64 && KMP_OS_LINUX
2677 *argv++ = va_arg( *ap,
void * );
2679 *argv++ = va_arg( ap,
void * );
2682 for( i=0; i < argc; ++i )
2684 argv[i] = parent_team->t.t_argv[i];
2694 for( i=argc-1; i >= 0; --i )
2696 #
if KMP_ARCH_X86_64 && KMP_OS_LINUX
2697 *argv++ = va_arg( *ap,
void * );
2699 *argv++ = va_arg( ap,
void * );
2702 __kmp_invoke_microtask( microtask, gtid, 0, argc, args );
2708 KMP_ASSERT2( exec_master <= 1,
"__kmp_fork_call: unknown parameter exec_master" );
2711 KA_TRACE( 20, (
"__kmp_fork_call: T#%d serial exit\n", gtid ));
2720 KF_TRACE( 10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n",
2721 parent_team->t.t_active_level, master_th, master_th->th.th_current_task,
2722 master_th->th.th_current_task->td_icvs.max_active_levels ) );
2725 master_th->th.th_current_task->td_flags.executing = 0;
2729 if ( !master_th->th.th_team_microtask || level > teams_level )
2733 KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel );
2740 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
2741 if ( ( level + 1 < __kmp_nested_nth.used ) &&
2742 ( __kmp_nested_nth.nth[level + 1] != nthreads_icv ) ) {
2743 nthreads_icv = __kmp_nested_nth.nth[level + 1];
2753 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
2754 kmp_proc_bind_t proc_bind_icv;
2756 if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) {
2757 proc_bind = proc_bind_false;
2758 proc_bind_icv = proc_bind_default;
2761 proc_bind_icv = master_th->th.th_current_task->td_icvs.proc_bind;
2762 if ( proc_bind == proc_bind_default ) {
2767 proc_bind = proc_bind_icv;
2780 if ( ( level + 1 < __kmp_nested_proc_bind.used )
2781 && ( __kmp_nested_proc_bind.bind_types[level + 1] != proc_bind_icv ) ) {
2782 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
2785 proc_bind_icv = proc_bind_default;
2792 master_th->th.th_set_proc_bind = proc_bind_default;
2795 if ( ( nthreads_icv > 0 )
2797 || ( proc_bind_icv != proc_bind_default )
2801 kmp_internal_control_t new_icvs;
2802 copy_icvs( & new_icvs, & master_th->th.th_current_task->td_icvs );
2803 new_icvs.next = NULL;
2805 if ( nthreads_icv > 0 ) {
2806 new_icvs.nproc = nthreads_icv;
2810 if ( proc_bind_icv != proc_bind_default ) {
2811 new_icvs.proc_bind = proc_bind_icv;
2816 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
2817 team = __kmp_allocate_team(root, nthreads, nthreads,
2826 KF_TRACE( 10, (
"__kmp_fork_call: before __kmp_allocate_team\n" ) );
2827 team = __kmp_allocate_team(root, nthreads, nthreads,
2832 &master_th->th.th_current_task->td_icvs,
2834 parent_team->t.t_set_nproc[master_tid],
2835 parent_team->t.t_set_dynamic[master_tid],
2836 parent_team->t.t_set_nested[master_tid],
2837 parent_team->t.t_set_blocktime[master_tid],
2838 parent_team->t.t_set_bt_intervals[master_tid],
2839 parent_team->t.t_set_bt_set[master_tid],
2844 KF_TRACE( 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n",
2848 team->t.t_master_tid = master_tid;
2849 team->t.t_master_this_cons = master_this_cons;
2850 team->t.t_master_last_cons = master_last_cons;
2852 team->t.t_parent = parent_team;
2853 TCW_SYNC_PTR(team->t.t_pkfn, microtask);
2854 team->t.t_invoke = invoker;
2855 team->t.t_ident = loc;
2859 if ( !master_th->th.th_team_microtask || level > teams_level ) {
2861 team->t.t_level = parent_team->t.t_level + 1;
2862 team->t.t_active_level = parent_team->t.t_active_level + 1;
2866 team->t.t_level = parent_team->t.t_level;
2867 team->t.t_active_level = parent_team->t.t_active_level;
2870 team->t.t_sched = get__sched_2( parent_team, master_tid );
2872 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2873 if ( __kmp_inherit_fp_control ) {
2874 __kmp_store_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
2875 __kmp_store_mxcsr( &team->t.t_mxcsr );
2876 team->t.t_mxcsr &= KMP_X86_MXCSR_MASK;
2877 team->t.t_fp_control_saved = TRUE;
2880 team->t.t_fp_control_saved = FALSE;
2884 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
2889 KMP_DEBUG_ASSERT( master_th->th.th_task_team == parent_team->t.t_task_team );
2890 KA_TRACE( 20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n",
2891 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
2892 parent_team, team->t.t_task_team, team ) );
2893 master_th->th.th_task_team = team->t.t_task_team;
2894 KMP_DEBUG_ASSERT( ( master_th->th.th_task_team == NULL ) || ( team == root->r.r_hot_team ) ) ;
2896 #endif // OMP_30_ENABLED
2898 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2899 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc ));
2900 KMP_DEBUG_ASSERT( team != root->r.r_hot_team ||
2901 ( team->t.t_master_tid == 0 &&
2902 ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) ));
2906 argv = (
void**) team -> t.t_argv;
2910 for( i=argc-1; i >= 0; --i )
2912 #
if KMP_ARCH_X86_64 && KMP_OS_LINUX
2913 *argv++ = va_arg( *ap,
void * );
2915 *argv++ = va_arg( ap,
void * );
2919 for( i=0; i < argc; ++i )
2921 argv[i] = team->t.t_parent->t.t_argv[i];
2927 team->t.t_master_active = master_active;
2928 if (!root -> r.r_active)
2929 root -> r.r_active = TRUE;
2931 __kmp_fork_team_threads( root, team, master_th, gtid );
2934 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
2938 if ( ( __itt_frame_begin_v3_ptr && __kmp_forkjoin_frames ) || KMP_ITT_DEBUG )
2939 __kmp_itt_region_forking( gtid );
2943 KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team );
2946 KF_TRACE( 10, (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", root, team, master_th, gtid ) );
2949 if ( __itt_stack_caller_create_ptr ) {
2950 team->t.t_stack_id = __kmp_itt_stack_caller_create();
2958 __kmp_internal_fork( loc, gtid, team );
2959 KF_TRACE( 10, (
"__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n", root, team, master_th, gtid ) );
2962 if (! exec_master) {
2963 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2968 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n",
2969 gtid, team->t.t_id, team->t.t_pkfn ) );
2971 if (! team->t.t_invoke( gtid )) {
2972 KMP_ASSERT2( 0,
"cannot invoke microtask for MASTER thread" );
2974 KA_TRACE( 20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n",
2975 gtid, team->t.t_id, team->t.t_pkfn ) );
2978 KA_TRACE( 20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid ));
2985 __kmp_join_call(
ident_t *loc,
int gtid
2992 kmp_team_t *parent_team;
2993 kmp_info_t *master_th;
2998 KA_TRACE( 20, (
"__kmp_join_call: enter T#%d\n", gtid ));
3001 master_th = __kmp_threads[ gtid ];
3002 root = master_th -> th.th_root;
3003 team = master_th -> th.th_team;
3004 parent_team = team->t.t_parent;
3006 master_th->th.th_ident = loc;
3008 #if OMP_30_ENABLED && KMP_DEBUG
3009 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3010 KA_TRACE( 20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n",
3011 __kmp_gtid_from_thread( master_th ), team,
3012 team -> t.t_task_team, master_th->th.th_task_team) );
3013 KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team );
3015 #endif // OMP_30_ENABLED
3017 if( team->t.t_serialized ) {
3019 if ( master_th->th.th_team_microtask ) {
3021 int level = team->t.t_level;
3022 int tlevel = master_th->th.th_teams_level;
3023 if ( level == tlevel ) {
3027 }
else if ( level == tlevel + 1 ) {
3030 team->t.t_serialized++;
3038 master_active = team->t.t_master_active;
3046 __kmp_internal_join( loc, gtid, team );
3051 if ( __itt_stack_caller_create_ptr ) {
3052 __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id );
3055 if ( ( __itt_frame_end_v3_ptr && __kmp_forkjoin_frames ) || KMP_ITT_DEBUG )
3056 __kmp_itt_region_joined( gtid );
3060 if ( master_th->th.th_team_microtask &&
3062 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
3063 team->t.t_level == master_th->th.th_teams_level + 1 ) {
3070 team->t.t_active_level --;
3071 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
3076 master_th -> th.th_info .ds.ds_tid = team -> t.t_master_tid;
3077 master_th -> th.th_local.this_construct = team -> t.t_master_this_cons;
3078 master_th -> th.th_local.last_construct = team -> t.t_master_last_cons;
3080 master_th -> th.th_dispatch =
3081 & parent_team -> t.t_dispatch[ team -> t.t_master_tid ];
3087 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3090 if ( !master_th->th.th_team_microtask || team->t.t_level > master_th->th.th_teams_level )
3094 KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel );
3096 KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 );
3099 KF_TRACE( 10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n",
3100 0, master_th, team ) );
3101 __kmp_pop_current_task_from_thread( master_th );
3102 #endif // OMP_30_ENABLED
3104 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
3108 master_th -> th.th_first_place = team -> t.t_first_place;
3109 master_th -> th.th_last_place = team -> t.t_last_place;
3112 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3113 if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) {
3114 __kmp_clear_x87_fpu_status_word();
3115 __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word );
3116 __kmp_load_mxcsr( &team->t.t_mxcsr );
3120 if ( root -> r.r_active != master_active )
3121 root -> r.r_active = master_active;
3123 __kmp_free_team( root, team );
3131 master_th -> th.th_team = parent_team;
3132 master_th -> th.th_team_nproc = parent_team -> t.t_nproc;
3133 master_th -> th.th_team_master = parent_team -> t.t_threads[0];
3134 master_th -> th.th_team_serialized = parent_team -> t.t_serialized;
3137 if( parent_team -> t.t_serialized &&
3138 parent_team != master_th->th.th_serial_team &&
3139 parent_team != root->r.r_root_team ) {
3140 __kmp_free_team( root, master_th -> th.th_serial_team );
3141 master_th -> th.th_serial_team = parent_team;
3145 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3150 if ( ( master_th -> th.th_task_team = parent_team -> t.t_task_team ) != NULL ) {
3151 master_th -> th.th_task_state = master_th -> th.th_task_team -> tt.tt_state;
3153 KA_TRACE( 20, (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
3154 __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team,
3162 master_th->th.th_current_task->td_flags.executing = 1;
3163 #endif // OMP_30_ENABLED
3165 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3168 KA_TRACE( 20, (
"__kmp_join_call: exit T#%d\n", gtid ));
3177 __kmp_save_internal_controls ( kmp_info_t * thread )
3180 if ( thread -> th.th_team != thread -> th.th_serial_team ) {
3183 if (thread -> th.th_team -> t.t_serialized > 1) {
3186 if (thread -> th.th_team -> t.t_control_stack_top == NULL) {
3189 if ( thread -> th.th_team -> t.t_control_stack_top -> serial_nesting_level !=
3190 thread -> th.th_team -> t.t_serialized ) {
3195 kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(
sizeof(kmp_internal_control_t));
3198 copy_icvs( control, & thread->th.th_current_task->td_icvs );
3200 control->nproc = thread->th.th_team->t.t_set_nproc[0];
3201 control->dynamic = thread->th.th_team->t.t_set_dynamic[0];
3202 control->nested = thread->th.th_team->t.t_set_nested[0];
3203 control->blocktime = thread->th.th_team->t.t_set_blocktime[0];
3204 control->bt_intervals = thread->th.th_team->t.t_set_bt_intervals[0];
3205 control->bt_set = thread->th.th_team->t.t_set_bt_set[0];
3206 #endif // OMP_30_ENABLED
3208 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
3210 control->next = thread -> th.th_team -> t.t_control_stack_top;
3211 thread -> th.th_team -> t.t_control_stack_top = control;
3218 __kmp_set_num_threads(
int new_nth,
int gtid )
3223 KF_TRACE( 10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth ));
3224 KMP_DEBUG_ASSERT( __kmp_init_serial );
3228 else if (new_nth > __kmp_max_nth)
3229 new_nth = __kmp_max_nth;
3231 thread = __kmp_threads[gtid];
3233 __kmp_save_internal_controls( thread );
3235 set__nproc( thread, new_nth );
3242 root = thread->th.th_root;
3243 if ( __kmp_init_parallel && ( ! root->r.r_active )
3244 && ( root->r.r_hot_team->t.t_nproc > new_nth ) ) {
3245 kmp_team_t *hot_team = root->r.r_hot_team;
3248 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
3252 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
3253 kmp_task_team_t *task_team = hot_team->t.t_task_team;
3254 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
3261 KMP_DEBUG_ASSERT( hot_team->t.t_nproc > 1 );
3262 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
3265 KA_TRACE( 20, (
"__kmp_set_num_threads: setting task_team %p to NULL\n",
3266 &hot_team->t.t_task_team ) );
3267 hot_team->t.t_task_team = NULL;
3270 KMP_DEBUG_ASSERT( task_team == NULL );
3273 #endif // OMP_30_ENABLED
3278 for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) {
3279 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
3280 __kmp_free_thread( hot_team->t.t_threads[f] );
3281 hot_team->t.t_threads[f] = NULL;
3283 hot_team->t.t_nproc = new_nth;
3286 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
3291 for( f=0 ; f < new_nth; f++ ) {
3292 KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL );
3293 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
3297 hot_team -> t.t_size_changed = -1;
3306 __kmp_set_max_active_levels(
int gtid,
int max_active_levels )
3310 KF_TRACE( 10, (
"__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
3311 KMP_DEBUG_ASSERT( __kmp_init_serial );
3314 if( max_active_levels < 0 ) {
3315 KMP_WARNING( ActiveLevelsNegative, max_active_levels );
3319 KF_TRACE( 10, (
"__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
3322 if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) {
3326 KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT );
3327 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
3332 KF_TRACE( 10, (
"__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) );
3334 thread = __kmp_threads[ gtid ];
3336 __kmp_save_internal_controls( thread );
3338 set__max_active_levels( thread, max_active_levels );
3344 __kmp_get_max_active_levels(
int gtid )
3348 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d\n", gtid ) );
3349 KMP_DEBUG_ASSERT( __kmp_init_serial );
3351 thread = __kmp_threads[ gtid ];
3352 KMP_DEBUG_ASSERT( thread -> th.th_current_task );
3353 KF_TRACE( 10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n",
3354 gtid, thread -> th.th_current_task, thread -> th.th_current_task -> td_icvs.max_active_levels ) );
3355 return thread -> th.th_current_task -> td_icvs.max_active_levels;
3360 __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk )
3365 KF_TRACE( 10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (
int)kind, chunk ));
3366 KMP_DEBUG_ASSERT( __kmp_init_serial );
3372 if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
3373 ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) )
3378 KMP_MSG( ScheduleKindOutOfRange, kind ),
3379 KMP_HNT( DefaultScheduleKindUsed,
"static, no chunk" ),
3382 kind = kmp_sched_default;
3386 thread = __kmp_threads[ gtid ];
3388 __kmp_save_internal_controls( thread );
3390 if ( kind < kmp_sched_upper_std ) {
3391 if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) {
3394 thread -> th.th_current_task -> td_icvs.sched.r_sched_type =
kmp_sch_static;
3396 thread -> th.th_current_task -> td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ];
3400 thread -> th.th_current_task -> td_icvs.sched.r_sched_type =
3401 __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ];
3403 if ( kind == kmp_sched_auto ) {
3405 thread -> th.th_current_task -> td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
3407 thread -> th.th_current_task -> td_icvs.sched.chunk = chunk;
3413 __kmp_get_schedule(
int gtid, kmp_sched_t * kind,
int * chunk )
3419 KF_TRACE( 10, (
"__kmp_get_schedule: thread %d\n", gtid ));
3420 KMP_DEBUG_ASSERT( __kmp_init_serial );
3422 thread = __kmp_threads[ gtid ];
3425 th_type = thread -> th.th_current_task -> td_icvs.sched.r_sched_type;
3427 switch ( th_type ) {
3429 case kmp_sch_static_greedy:
3430 case kmp_sch_static_balanced:
3431 *kind = kmp_sched_static;
3434 case kmp_sch_static_chunked:
3435 *kind = kmp_sched_static;
3437 case kmp_sch_dynamic_chunked:
3438 *kind = kmp_sched_dynamic;
3441 case kmp_sch_guided_iterative_chunked:
3442 case kmp_sch_guided_analytical_chunked:
3443 *kind = kmp_sched_guided;
3446 *kind = kmp_sched_auto;
3448 case kmp_sch_trapezoidal:
3449 *kind = kmp_sched_trapezoidal;
3457 KMP_FATAL( UnknownSchedulingType, th_type );
3461 *chunk = thread -> th.th_current_task -> td_icvs.sched.chunk;
3465 __kmp_get_ancestor_thread_num(
int gtid,
int level ) {
3471 KF_TRACE( 10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level ));
3472 KMP_DEBUG_ASSERT( __kmp_init_serial );
3475 if( level == 0 )
return 0;
3476 if( level < 0 )
return -1;
3477 thr = __kmp_threads[ gtid ];
3478 team = thr->th.th_team;
3479 ii = team -> t.t_level;
3480 if( level > ii )
return -1;
3483 if( thr->th.th_team_microtask ) {
3485 int tlevel = thr->th.th_teams_level;
3486 if( level <= tlevel ) {
3487 KMP_DEBUG_ASSERT( ii >= tlevel );
3489 if ( ii == tlevel ) {
3498 if( ii == level )
return __kmp_tid_from_gtid( gtid );
3500 dd = team -> t.t_serialized;
3504 for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
3507 if( ( team -> t.t_serialized ) && ( !dd ) ) {
3508 team = team->t.t_parent;
3512 team = team->t.t_parent;
3513 dd = team -> t.t_serialized;
3518 return ( dd > 1 ) ? ( 0 ) : ( team -> t.t_master_tid );
3522 __kmp_get_team_size(
int gtid,
int level ) {
3528 KF_TRACE( 10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level ));
3529 KMP_DEBUG_ASSERT( __kmp_init_serial );
3532 if( level == 0 )
return 1;
3533 if( level < 0 )
return -1;
3534 thr = __kmp_threads[ gtid ];
3535 team = thr->th.th_team;
3536 ii = team -> t.t_level;
3537 if( level > ii )
return -1;
3540 if( thr->th.th_team_microtask ) {
3542 int tlevel = thr->th.th_teams_level;
3543 if( level <= tlevel ) {
3544 KMP_DEBUG_ASSERT( ii >= tlevel );
3546 if ( ii == tlevel ) {
3557 for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- )
3560 if( team -> t.t_serialized && ( !dd ) ) {
3561 team = team->t.t_parent;
3565 team = team->t.t_parent;
3570 return team -> t.t_nproc;
3573 #endif // OMP_30_ENABLED
3576 __kmp_get_schedule_global() {
3580 kmp_r_sched_t r_sched;
3586 r_sched.r_sched_type = __kmp_static;
3588 r_sched.r_sched_type = __kmp_guided;
3590 r_sched.r_sched_type = __kmp_sched;
3593 if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) {
3594 r_sched.chunk = KMP_DEFAULT_CHUNK;
3596 r_sched.chunk = __kmp_chunk;
3611 __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc )
3614 KMP_DEBUG_ASSERT( team );
3615 if( !realloc || argc > team -> t.t_max_argc ) {
3617 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n",
3618 team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 ));
3619 #if (KMP_PERF_V106 == KMP_ON)
3621 if ( realloc && team -> t.t_argv != &team -> t.t_inline_argv[0] )
3622 __kmp_free( (
void *) team -> t.t_argv );
3624 if ( argc <= KMP_INLINE_ARGV_ENTRIES ) {
3626 team -> t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3627 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n",
3628 team->t.t_id, team->t.t_max_argc ));
3629 team -> t.t_argv = &team -> t.t_inline_argv[0];
3630 if ( __kmp_storage_map ) {
3631 __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0],
3632 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3633 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
3634 "team_%d.t_inline_argv",
3639 team -> t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
3640 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
3641 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
3642 team->t.t_id, team->t.t_max_argc ));
3643 team -> t.t_argv = (
void**) __kmp_page_allocate(
sizeof(
void*) * team->t.t_max_argc );
3644 if ( __kmp_storage_map ) {
3645 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
3646 sizeof(
void *) * team->t.t_max_argc,
"team_%d.t_argv",
3652 __kmp_free( (
void*) team -> t.t_argv );
3653 team -> t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ?
3654 KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc;
3655 KA_TRACE( 100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n",
3656 team->t.t_id, team->t.t_max_argc ));
3657 team -> t.t_argv = __kmp_page_allocate(
sizeof(
void*) * team->t.t_max_argc );
3658 if ( __kmp_storage_map ) {
3659 __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc],
3660 sizeof(
void *) * team->t.t_max_argc,
"team_%d.t_argv", team->t.t_id );
3668 __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth)
3671 int num_disp_buff = max_nth > 1 ? KMP_MAX_DISP_BUF : 2;
3672 #if KMP_USE_POOLED_ALLOC
3673 char *ptr = __kmp_allocate(max_nth *
3674 (
sizeof(kmp_info_t*) +
sizeof(dispatch_shared_info_t)*2
3675 +
sizeof(kmp_disp_t) +
sizeof(
int)*6
3678 +
sizeof(kmp_r_sched_t)
3679 +
sizeof(kmp_taskdata_t)
3683 team -> t.t_threads = (kmp_info_t**) ptr; ptr +=
sizeof(kmp_info_t*) * max_nth;
3684 team -> t.t_disp_buffer = (dispatch_shared_info_t*) ptr;
3685 ptr +=
sizeof(dispatch_shared_info_t) * num_disp_buff;
3686 team -> t.t_dispatch = (kmp_disp_t*) ptr; ptr +=
sizeof(kmp_disp_t) * max_nth;
3687 team -> t.t_set_nproc = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3688 team -> t.t_set_dynamic = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3689 team -> t.t_set_nested = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3690 team -> t.t_set_blocktime = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3691 team -> t.t_set_bt_intervals = (
int*) ptr; ptr +=
sizeof(int) * max_nth;
3692 team -> t.t_set_bt_set = (
int*) ptr;
3694 ptr +=
sizeof(int) * max_nth;
3696 team -> t.t_set_sched = (kmp_r_sched_t*) ptr;
3697 ptr +=
sizeof(kmp_r_sched_t) * max_nth;
3698 team -> t.t_implicit_task_taskdata = (kmp_taskdata_t*) ptr;
3699 ptr +=
sizeof(kmp_taskdata_t) * max_nth;
3700 # endif // OMP_30_ENABLED
3703 team -> t.t_threads = (kmp_info_t**) __kmp_allocate(
sizeof(kmp_info_t*) * max_nth );
3704 team -> t.t_disp_buffer = (dispatch_shared_info_t*)
3705 __kmp_allocate(
sizeof(dispatch_shared_info_t) * num_disp_buff );
3706 team -> t.t_dispatch = (kmp_disp_t*) __kmp_allocate(
sizeof(kmp_disp_t) * max_nth );
3710 team -> t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth );
3712 team -> t.t_set_nproc = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3713 team -> t.t_set_dynamic = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3714 team -> t.t_set_nested = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3715 team -> t.t_set_blocktime = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3716 team -> t.t_set_bt_intervals = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3717 team -> t.t_set_bt_set = (
int*) __kmp_allocate(
sizeof(
int) * max_nth );
3718 # endif // OMP_30_ENABLED
3720 team->t.t_max_nproc = max_nth;
3723 for(i = 0 ; i < num_disp_buff; ++i)
3724 team -> t.t_disp_buffer[i].buffer_index = i;
3728 __kmp_free_team_arrays(kmp_team_t *team) {
3731 for ( i = 0; i < team->t.t_max_nproc; ++ i ) {
3732 if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) {
3733 __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer );
3734 team->t.t_dispatch[ i ].th_disp_buffer = NULL;
3737 __kmp_free(team->t.t_threads);
3738 #if !KMP_USE_POOLED_ALLOC
3739 __kmp_free(team->t.t_disp_buffer);
3740 __kmp_free(team->t.t_dispatch);
3744 __kmp_free(team->t.t_implicit_task_taskdata);
3746 __kmp_free(team->t.t_set_nproc);
3747 __kmp_free(team->t.t_set_dynamic);
3748 __kmp_free(team->t.t_set_nested);
3749 __kmp_free(team->t.t_set_blocktime);
3750 __kmp_free(team->t.t_set_bt_intervals);
3751 __kmp_free(team->t.t_set_bt_set);
3752 # endif // OMP_30_ENABLED
3754 team->t.t_threads = NULL;
3755 team->t.t_disp_buffer = NULL;
3756 team->t.t_dispatch = NULL;
3760 team->t.t_implicit_task_taskdata = 0;
3762 team->t.t_set_nproc = 0;
3763 team->t.t_set_dynamic = 0;
3764 team->t.t_set_nested = 0;
3765 team->t.t_set_blocktime = 0;
3766 team->t.t_set_bt_intervals = 0;
3767 team->t.t_set_bt_set = 0;
3768 #endif // OMP_30_ENABLED
3772 __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3773 kmp_info_t **oldThreads = team->t.t_threads;
3775 #if !KMP_USE_POOLED_ALLOC
3776 __kmp_free(team->t.t_disp_buffer);
3777 __kmp_free(team->t.t_dispatch);
3781 __kmp_free(team->t.t_implicit_task_taskdata);
3783 __kmp_free(team->t.t_set_nproc);
3784 __kmp_free(team->t.t_set_dynamic);
3785 __kmp_free(team->t.t_set_nested);
3786 __kmp_free(team->t.t_set_blocktime);
3787 __kmp_free(team->t.t_set_bt_intervals);
3788 __kmp_free(team->t.t_set_bt_set);
3789 # endif // OMP_30_ENABLED
3791 __kmp_allocate_team_arrays(team, max_nth);
3793 memcpy(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*));
3795 __kmp_free(oldThreads);
3798 static kmp_internal_control_t
3799 __kmp_get_global_icvs(
void ) {
3802 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3806 KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 );
3809 kmp_internal_control_t g_icvs = {
3812 __kmp_global.g.g_dynamic,
3813 __kmp_dflt_team_nth,
3816 __kmp_dflt_blocktime,
3818 __kmp_env_blocktime,
3820 __kmp_dflt_max_active_levels,
3824 __kmp_nested_proc_bind.bind_types[0],
3832 static kmp_internal_control_t
3833 __kmp_get_x_global_icvs(
const kmp_team_t *team ) {
3836 kmp_internal_control_t gx_icvs;
3837 gx_icvs.serial_nesting_level = 0;
3838 copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs );
3839 gx_icvs.next = NULL;
3841 kmp_internal_control_t gx_icvs =
3844 team->t.t_set_nested[0],
3845 team->t.t_set_dynamic[0],
3846 team->t.t_set_nproc[0],
3847 team->t.t_set_blocktime[0],
3848 team->t.t_set_bt_intervals[0],
3849 team->t.t_set_bt_set[0],
3852 #endif // OMP_30_ENABLED
3858 __kmp_initialize_root( kmp_root_t *root )
3861 kmp_team_t *root_team;
3862 kmp_team_t *hot_team;
3863 size_t disp_size, dispatch_size, bar_size;
3864 int hot_team_max_nth;
3866 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
3867 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3868 #endif // OMP_30_ENABLED
3869 KMP_DEBUG_ASSERT( root );
3870 KMP_ASSERT( ! root->r.r_begin );
3873 __kmp_init_lock( &root->r.r_begin_lock );
3874 root -> r.r_begin = FALSE;
3875 root -> r.r_active = FALSE;
3876 root -> r.r_in_parallel = 0;
3877 root -> r.r_blocktime = __kmp_dflt_blocktime;
3878 root -> r.r_nested = __kmp_dflt_nested;
3882 KF_TRACE( 10, (
"__kmp_initialize_root: before root_team\n" ) );
3884 __kmp_allocate_team(
3889 __kmp_nested_proc_bind.bind_types[0],
3894 __kmp_dflt_team_nth_ub,
3895 __kmp_global.g.g_dynamic,
3897 __kmp_dflt_blocktime,
3899 __kmp_env_blocktime,
3904 KF_TRACE( 10, (
"__kmp_initialize_root: after root_team = %p\n", root_team ) );
3906 root -> r.r_root_team = root_team;
3907 root_team -> t.t_control_stack_top = NULL;
3910 root_team -> t.t_threads[0] = NULL;
3911 root_team -> t.t_nproc = 1;
3912 root_team -> t.t_serialized = 1;
3915 root_team -> t.t_sched.r_sched_type = r_sched.r_sched_type;
3916 root_team -> t.t_sched.chunk = r_sched.chunk;
3917 #endif // OMP_30_ENABLED
3918 KA_TRACE( 20, (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3919 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
3923 KF_TRACE( 10, (
"__kmp_initialize_root: before hot_team\n" ) );
3925 __kmp_allocate_team(
3928 __kmp_dflt_team_nth_ub * 2,
3930 __kmp_nested_proc_bind.bind_types[0],
3935 __kmp_dflt_team_nth_ub,
3936 __kmp_global.g.g_dynamic,
3938 __kmp_dflt_blocktime,
3940 __kmp_env_blocktime,
3944 KF_TRACE( 10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team ) );
3946 root -> r.r_hot_team = hot_team;
3947 root_team -> t.t_control_stack_top = NULL;
3950 hot_team -> t.t_parent = root_team;
3953 hot_team_max_nth = hot_team->t.t_max_nproc;
3954 for ( f = 0; f < hot_team_max_nth; ++ f ) {
3955 hot_team -> t.t_threads[ f ] = NULL;
3957 hot_team -> t.t_nproc = 1;
3960 hot_team -> t.t_sched.r_sched_type = r_sched.r_sched_type;
3961 hot_team -> t.t_sched.chunk = r_sched.chunk;
3962 #endif // OMP_30_ENABLED
3964 hot_team -> t.t_size_changed = 0;
3972 typedef struct kmp_team_list_item {
3973 kmp_team_p
const * entry;
3974 struct kmp_team_list_item * next;
3975 } kmp_team_list_item_t;
3976 typedef kmp_team_list_item_t * kmp_team_list_t;
3980 __kmp_print_structure_team_accum(
3981 kmp_team_list_t list,
3982 kmp_team_p
const * team
3992 KMP_DEBUG_ASSERT( list != NULL );
3993 if ( team == NULL ) {
3997 __kmp_print_structure_team_accum( list, team->t.t_parent );
3998 __kmp_print_structure_team_accum( list, team->t.t_next_pool );
4002 while ( l->next != NULL && l->entry != team ) {
4005 if ( l->next != NULL ) {
4011 while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) {
4017 kmp_team_list_item_t * item =
4018 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
4027 __kmp_print_structure_team(
4029 kmp_team_p
const * team
4032 __kmp_printf(
"%s", title );
4033 if ( team != NULL ) {
4034 __kmp_printf(
"%2x %p\n", team->t.t_id, team );
4036 __kmp_printf(
" - (nil)\n" );
4041 __kmp_print_structure_thread(
4043 kmp_info_p
const * thread
4046 __kmp_printf(
"%s", title );
4047 if ( thread != NULL ) {
4048 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread );
4050 __kmp_printf(
" - (nil)\n" );
4055 __kmp_print_structure(
4059 kmp_team_list_t list;
4062 list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof( kmp_team_list_item_t ) );
4066 __kmp_printf(
"\n------------------------------\nGlobal Thread Table\n------------------------------\n" );
4069 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
4070 __kmp_printf(
"%2d", gtid );
4071 if ( __kmp_threads != NULL ) {
4072 __kmp_printf(
" %p", __kmp_threads[ gtid ] );
4074 if ( __kmp_root != NULL ) {
4075 __kmp_printf(
" %p", __kmp_root[ gtid ] );
4077 __kmp_printf(
"\n" );
4082 __kmp_printf(
"\n------------------------------\nThreads\n------------------------------\n" );
4083 if ( __kmp_threads != NULL ) {
4085 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
4086 kmp_info_t
const * thread = __kmp_threads[ gtid ];
4087 if ( thread != NULL ) {
4088 __kmp_printf(
"GTID %2d %p:\n", gtid, thread );
4089 __kmp_printf(
" Our Root: %p\n", thread->th.th_root );
4090 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team );
4091 __kmp_print_structure_team(
" Serial Team: ", thread->th.th_serial_team );
4092 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc );
4093 __kmp_print_structure_thread(
" Master: ", thread->th.th_team_master );
4094 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized );
4095 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc );
4097 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind );
4099 __kmp_print_structure_thread(
" Next in pool: ", thread->th.th_next_pool );
4100 __kmp_printf(
"\n" );
4101 __kmp_print_structure_team_accum( list, thread->th.th_team );
4102 __kmp_print_structure_team_accum( list, thread->th.th_serial_team );
4106 __kmp_printf(
"Threads array is not allocated.\n" );
4110 __kmp_printf(
"\n------------------------------\nUbers\n------------------------------\n" );
4111 if ( __kmp_root != NULL ) {
4113 for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) {
4114 kmp_root_t
const * root = __kmp_root[ gtid ];
4115 if ( root != NULL ) {
4116 __kmp_printf(
"GTID %2d %p:\n", gtid, root );
4117 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team );
4118 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team );
4119 __kmp_print_structure_thread(
" Uber Thread: ", root->r.r_uber_thread );
4120 __kmp_printf(
" Active?: %2d\n", root->r.r_active );
4121 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested );
4122 __kmp_printf(
" In Parallel: %2d\n", root->r.r_in_parallel );
4123 __kmp_printf(
"\n" );
4124 __kmp_print_structure_team_accum( list, root->r.r_root_team );
4125 __kmp_print_structure_team_accum( list, root->r.r_hot_team );
4129 __kmp_printf(
"Ubers array is not allocated.\n" );
4132 __kmp_printf(
"\n------------------------------\nTeams\n------------------------------\n" );
4133 while ( list->next != NULL ) {
4134 kmp_team_p
const * team = list->entry;
4136 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team );
4137 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent );
4138 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid );
4139 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc );
4140 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized );
4141 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc );
4142 for ( i = 0; i < team->t.t_nproc; ++ i ) {
4143 __kmp_printf(
" Thread %2d: ", i );
4144 __kmp_print_structure_thread(
"", team->t.t_threads[ i ] );
4146 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool );
4147 __kmp_printf(
"\n" );
4152 __kmp_printf(
"\n------------------------------\nPools\n------------------------------\n" );
4153 __kmp_print_structure_thread(
"Thread pool: ", (kmp_info_t *)__kmp_thread_pool );
4154 __kmp_print_structure_team(
"Team pool: ", (kmp_team_t *)__kmp_team_pool );
4155 __kmp_printf(
"\n" );
4158 while ( list != NULL ) {
4159 kmp_team_list_item_t * item = list;
4161 KMP_INTERNAL_FREE( item );
4173 static const unsigned __kmp_primes[] = {
4174 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5,
4175 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b,
4176 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
4177 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b,
4178 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801,
4179 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
4180 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed,
4181 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b,
4182 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
4183 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7,
4184 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7,
4185 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
4186 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b,
4187 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b,
4188 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
4189 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f
4196 __kmp_get_random( kmp_info_t * thread )
4198 unsigned x = thread -> th.th_x;
4199 unsigned short r = x>>16;
4201 thread -> th.th_x = x*thread->th.th_a+1;
4203 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
4204 thread->th.th_info.ds.ds_tid, r) );
4212 __kmp_init_random( kmp_info_t * thread )
4214 unsigned seed = thread->th.th_info.ds.ds_tid;
4216 thread -> th.th_a = __kmp_primes[seed%(
sizeof(__kmp_primes)/
sizeof(__kmp_primes[0]))];
4217 thread -> th.th_x = (seed+1)*thread->th.th_a+1;
4218 KA_TRACE(30, (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread -> th.th_a) );
4225 __kmp_reclaim_dead_roots(
void) {
4228 for(i = 0; i < __kmp_threads_capacity; ++i) {
4229 if( KMP_UBER_GTID( i ) &&
4230 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
4231 !__kmp_root[i]->r.r_active ) {
4232 r += __kmp_unregister_root_other_thread(i);
4261 __kmp_expand_threads(
int nWish,
int nNeed) {
4264 int __kmp_actual_max_nth;
4268 #if KMP_OS_WINDOWS && !defined GUIDEDLL_EXPORTS
4271 added = __kmp_reclaim_dead_roots();
4289 int minimumRequiredCapacity;
4291 kmp_info_t **newThreads;
4292 kmp_root_t **newRoot;
4314 old_tp_cached = __kmp_tp_cached;
4315 __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth;
4316 KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity);
4320 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
4324 if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) {
4330 nTarget = __kmp_actual_max_nth - __kmp_threads_capacity;
4337 minimumRequiredCapacity = __kmp_threads_capacity + nTarget;
4339 newCapacity = __kmp_threads_capacity;
4342 newCapacity <= (__kmp_actual_max_nth >> 1) ?
4343 (newCapacity << 1) :
4344 __kmp_actual_max_nth;
4345 }
while(newCapacity < minimumRequiredCapacity);
4346 newThreads = (kmp_info_t**) __kmp_allocate((
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE);
4347 newRoot = (kmp_root_t**) ((
char*)newThreads +
sizeof(kmp_info_t*) * newCapacity );
4348 memcpy(newThreads, __kmp_threads, __kmp_threads_capacity *
sizeof(kmp_info_t*));
4349 memcpy(newRoot, __kmp_root, __kmp_threads_capacity *
sizeof(kmp_root_t*));
4350 memset(newThreads + __kmp_threads_capacity, 0,
4351 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_info_t*));
4352 memset(newRoot + __kmp_threads_capacity, 0,
4353 (newCapacity - __kmp_threads_capacity) *
sizeof(kmp_root_t*));
4355 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
4361 __kmp_free(newThreads);
4364 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
4365 if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
4367 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
4368 __kmp_free(newThreads);
4374 *(kmp_info_t**
volatile*)&__kmp_threads = newThreads;
4375 *(kmp_root_t**
volatile*)&__kmp_root = newRoot;
4376 added += newCapacity - __kmp_threads_capacity;
4377 *(
volatile int*)&__kmp_threads_capacity = newCapacity;
4378 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
4389 __kmp_register_root(
int initial_thread )
4391 kmp_info_t *root_thread;
4395 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
4396 KA_TRACE( 20, (
"__kmp_register_root: entered\n"));
4414 capacity = __kmp_threads_capacity;
4415 if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) {
4420 if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) {
4421 if ( __kmp_tp_cached ) {
4424 KMP_MSG( CantRegisterNewThread ),
4425 KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ),
4426 KMP_HNT( PossibleSystemLimitOnThreads ),
4433 KMP_MSG( CantRegisterNewThread ),
4434 KMP_HNT( SystemLimitOnThreads ),
4443 for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ );
4444 KA_TRACE( 1, (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid ));
4445 KMP_ASSERT( gtid < __kmp_threads_capacity );
4449 TCW_4(__kmp_nth, __kmp_nth + 1);
4456 if ( __kmp_adjust_gtid_mode ) {
4457 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
4458 if ( TCR_4(__kmp_gtid_mode) != 2) {
4459 TCW_4(__kmp_gtid_mode, 2);
4463 if (TCR_4(__kmp_gtid_mode) != 1 ) {
4464 TCW_4(__kmp_gtid_mode, 1);
4469 #ifdef KMP_ADJUST_BLOCKTIME
4472 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4473 if ( __kmp_nth > __kmp_avail_proc ) {
4474 __kmp_zero_bt = TRUE;
4480 if( ! ( root = __kmp_root[gtid] )) {
4481 root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate(
sizeof(kmp_root_t) );
4482 KMP_DEBUG_ASSERT( ! root->r.r_root_team );
4485 __kmp_initialize_root( root );
4488 if( root -> r.r_uber_thread ) {
4489 root_thread = root -> r.r_uber_thread;
4491 root_thread = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
4492 if ( __kmp_storage_map ) {
4493 __kmp_print_thread_storage_map( root_thread, gtid );
4495 root_thread -> th.th_info .ds.ds_gtid = gtid;
4496 root_thread -> th.th_root = root;
4497 if( __kmp_env_consistency_check ) {
4498 root_thread -> th.th_cons = __kmp_allocate_cons_stack( gtid );
4501 __kmp_initialize_fast_memory( root_thread );
4505 KMP_DEBUG_ASSERT( root_thread -> th.th_local.bget_data == NULL );
4506 __kmp_initialize_bget( root_thread );
4508 __kmp_init_random( root_thread );
4512 if( ! root_thread -> th.th_serial_team ) {
4514 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
4515 #endif // OMP_30_ENABLED
4516 KF_TRACE( 10, (
"__kmp_register_root: before serial_team\n" ) );
4517 root_thread -> th.th_serial_team = __kmp_allocate_team( root, 1, 1,
4524 __kmp_dflt_team_nth_ub,
4525 __kmp_global.g.g_dynamic,
4527 __kmp_dflt_blocktime,
4529 __kmp_env_blocktime,
4533 KMP_ASSERT( root_thread -> th.th_serial_team );
4534 KF_TRACE( 10, (
"__kmp_register_root: after serial_team = %p\n",
4535 root_thread -> th.th_serial_team ) );
4538 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
4540 root -> r.r_root_team -> t.t_threads[0] = root_thread;
4541 root -> r.r_hot_team -> t.t_threads[0] = root_thread;
4542 root_thread -> th.th_serial_team -> t.t_threads[0] = root_thread;
4543 root -> r.r_uber_thread = root_thread;
4546 __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid );
4549 __kmp_gtid_set_specific( gtid );
4550 #ifdef KMP_TDATA_GTID
4553 __kmp_create_worker( gtid, root_thread, __kmp_stksize );
4554 KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid );
4555 TCW_4(__kmp_init_gtid, TRUE);
4557 KA_TRACE( 20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n",
4558 gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ),
4559 root -> r.r_hot_team -> t.t_id, 0, KMP_INIT_BARRIER_STATE,
4560 KMP_INIT_BARRIER_STATE ) );
4563 for ( b = 0; b < bs_last_barrier; ++ b ) {
4564 root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE;
4567 KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE );
4570 #if KMP_OS_WINDOWS || KMP_OS_LINUX
4571 if ( TCR_4(__kmp_init_middle) ) {
4572 __kmp_affinity_set_init_mask( gtid, TRUE );
4576 __kmp_root_counter ++;
4579 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
4588 __kmp_reset_root(
int gtid, kmp_root_t *root)
4590 kmp_team_t * root_team = root->r.r_root_team;
4591 kmp_team_t * hot_team = root->r.r_hot_team;
4592 int n = hot_team->t.t_nproc;
4595 KMP_DEBUG_ASSERT( ! root->r.r_active );
4597 root->r.r_root_team = NULL;
4598 root->r.r_hot_team = NULL;
4601 __kmp_free_team( root, root_team );
4602 __kmp_free_team( root, hot_team );
4609 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
4610 __kmp_wait_to_unref_task_teams();
4616 KA_TRACE( 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
"\n",
4617 (LPVOID)&(root->r.r_uber_thread->th),
4618 root->r.r_uber_thread->th.th_info.ds.ds_thread ) );
4619 __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread );
4622 TCW_4(__kmp_nth, __kmp_nth - 1);
4623 __kmp_reap_thread( root->r.r_uber_thread, 1 );
4626 root->r.r_uber_thread = NULL;
4628 root -> r.r_begin = FALSE;
4634 __kmp_unregister_root_current_thread(
int gtid )
4636 kmp_root_t *root = __kmp_root[gtid];
4638 KA_TRACE( 1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid ));
4639 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
4640 KMP_ASSERT( KMP_UBER_GTID( gtid ));
4641 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
4642 KMP_ASSERT( root->r.r_active == FALSE );
4648 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
4652 __kmp_reset_root(gtid, root);
4655 __kmp_gtid_set_specific( KMP_GTID_DNE );
4656 #ifdef KMP_TDATA_GTID
4657 __kmp_gtid = KMP_GTID_DNE;
4661 KC_TRACE( 10, (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid ));
4663 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
4671 __kmp_unregister_root_other_thread(
int gtid )
4673 kmp_root_t *root = __kmp_root[gtid];
4676 KA_TRACE( 1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid ));
4677 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
4678 KMP_ASSERT( KMP_UBER_GTID( gtid ));
4679 KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root );
4680 KMP_ASSERT( root->r.r_active == FALSE );
4682 r = __kmp_reset_root(gtid, root);
4683 KC_TRACE( 10, (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid ));
4690 void __kmp_task_info() {
4692 kmp_int32 gtid = __kmp_entry_gtid();
4693 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
4694 kmp_info_t *this_thr = __kmp_threads[ gtid ];
4695 kmp_team_t *steam = this_thr -> th.th_serial_team;
4696 kmp_team_t *team = this_thr -> th.th_team;
4698 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n",
4699 gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent );
4703 #endif // OMP_30_ENABLED
4709 __kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int gtid )
4714 KMP_DEBUG_ASSERT( this_thr != NULL );
4715 KMP_DEBUG_ASSERT( this_thr -> th.th_serial_team );
4716 KMP_DEBUG_ASSERT( team );
4717 KMP_DEBUG_ASSERT( team -> t.t_threads );
4718 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
4719 KMP_DEBUG_ASSERT( team -> t.t_threads[0] );
4720 KMP_DEBUG_ASSERT( team -> t.t_threads[0] -> th.th_root );
4724 TCW_SYNC_PTR(this_thr->th.th_team, team);
4726 this_thr->th.th_info.ds.ds_tid = tid;
4727 this_thr->th.th_set_nproc = 0;
4729 this_thr->th.th_set_proc_bind = proc_bind_default;
4730 # if (KMP_OS_WINDOWS || KMP_OS_LINUX)
4731 this_thr->th.th_new_place = this_thr->th.th_current_place;
4734 this_thr->th.th_root = team -> t.t_threads[0] -> th.th_root;
4737 this_thr->th.th_team_nproc = team -> t.t_nproc;
4738 this_thr->th.th_team_master = team -> t.t_threads[0];
4739 this_thr->th.th_team_serialized = team -> t.t_serialized;
4741 this_thr->th.th_team_microtask = team -> t.t_threads[0] -> th.th_team_microtask;
4742 this_thr->th.th_teams_level = team -> t.t_threads[0] -> th.th_teams_level;
4743 this_thr->th.th_set_nth_teams = team -> t.t_threads[0] -> th.th_set_nth_teams;
4745 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4748 KMP_DEBUG_ASSERT( team -> t.t_implicit_task_taskdata );
4749 this_thr->th.th_task_state = 0;
4751 KF_TRACE( 10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4752 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4754 __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE );
4756 KF_TRACE( 10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4757 tid, gtid, this_thr, this_thr->th.th_current_task ) );
4759 #endif // OMP_30_ENABLED
4762 this_thr -> th.th_dispatch = &team -> t.t_dispatch[ tid ];
4764 this_thr->th.th_local.this_construct = 0;
4765 this_thr->th.th_local.last_construct = 0;
4768 this_thr->th.th_local.tv_data = 0;
4771 if ( ! this_thr->th.th_pri_common ) {
4772 this_thr->th.th_pri_common = (
struct common_table *) __kmp_allocate(
sizeof(
struct common_table) );
4773 if ( __kmp_storage_map ) {
4774 __kmp_print_storage_map_gtid(
4775 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4776 sizeof(
struct common_table ),
"th_%d.th_pri_common\n", gtid
4779 this_thr->th.th_pri_head = NULL;
4784 volatile kmp_disp_t *dispatch = this_thr -> th.th_dispatch;
4788 size_t disp_size =
sizeof( dispatch_private_info_t ) *
4789 ( team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF );
4790 KD_TRACE( 10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) );
4791 KMP_ASSERT( dispatch );
4792 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
4793 KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] );
4795 dispatch->th_disp_index = 0;
4797 if( ! dispatch -> th_disp_buffer ) {
4798 dispatch -> th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size );
4800 if ( __kmp_storage_map ) {
4801 __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ],
4802 &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : KMP_MAX_DISP_BUF ],
4803 disp_size,
"th_%d.th_dispatch.th_disp_buffer "
4804 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4805 gtid, team->t.t_id, gtid );
4808 memset( & dispatch -> th_disp_buffer[0],
'\0', disp_size );
4811 dispatch -> th_dispatch_pr_current = 0;
4812 dispatch -> th_dispatch_sh_current = 0;
4814 dispatch -> th_deo_fcn = 0;
4815 dispatch -> th_dxo_fcn = 0;
4818 this_thr->th.th_next_pool = NULL;
4820 KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here );
4821 KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 );
4834 __kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team,
int new_tid )
4836 kmp_team_t *serial_team;
4837 kmp_info_t *new_thr;
4840 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() ));
4841 KMP_DEBUG_ASSERT( root && team );
4842 KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() ));
4846 if ( __kmp_thread_pool ) {
4848 new_thr = (kmp_info_t*)__kmp_thread_pool;
4849 __kmp_thread_pool = (
volatile kmp_info_t *) new_thr->th.th_next_pool;
4850 if ( new_thr == __kmp_thread_pool_insert_pt ) {
4851 __kmp_thread_pool_insert_pt = NULL;
4853 TCW_4(new_thr->th.th_in_pool, FALSE);
4859 __kmp_thread_pool_nth--;
4861 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4862 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid ));
4863 KMP_ASSERT( ! new_thr -> th.th_team );
4864 KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity );
4865 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 );
4868 __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid );
4869 KMP_DEBUG_ASSERT( new_thr->th.th_serial_team );
4871 TCW_4(__kmp_nth, __kmp_nth + 1);
4873 #ifdef KMP_ADJUST_BLOCKTIME
4876 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
4877 if ( __kmp_nth > __kmp_avail_proc ) {
4878 __kmp_zero_bt = TRUE;
4883 KF_TRACE( 10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4884 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid ));
4892 KMP_ASSERT( __kmp_nth == __kmp_all_nth );
4893 KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity );
4899 if ( ! TCR_4( __kmp_init_monitor ) ) {
4900 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
4901 if ( ! TCR_4( __kmp_init_monitor ) ) {
4902 KF_TRACE( 10, (
"before __kmp_create_monitor\n" ) );
4903 TCW_4( __kmp_init_monitor, 1 );
4904 __kmp_create_monitor( & __kmp_monitor );
4905 KF_TRACE( 10, (
"after __kmp_create_monitor\n" ) );
4907 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
4911 for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) {
4912 KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity );
4916 new_thr = (kmp_info_t*) __kmp_allocate(
sizeof(kmp_info_t) );
4918 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4920 if ( __kmp_storage_map ) {
4921 __kmp_print_thread_storage_map( new_thr, new_gtid );
4927 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team );
4928 #endif // OMP_30_ENABLED
4929 KF_TRACE( 10, (
"__kmp_allocate_thread: before th_serial/serial_team\n" ) );
4930 new_thr -> th.th_serial_team = serial_team =
4931 (kmp_team_t*) __kmp_allocate_team( root, 1, 1,
4938 team->t.t_set_nproc[0],
4939 team->t.t_set_dynamic[0],
4940 team->t.t_set_nested[0],
4941 team->t.t_set_blocktime[0],
4942 team->t.t_set_bt_intervals[0],
4943 team->t.t_set_bt_set[0],
4947 KMP_ASSERT ( serial_team );
4948 serial_team -> t.t_threads[0] = new_thr;
4949 KF_TRACE( 10, (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4953 __kmp_initialize_info( new_thr, team, new_tid, new_gtid );
4956 __kmp_initialize_fast_memory( new_thr );
4960 KMP_DEBUG_ASSERT( new_thr -> th.th_local.bget_data == NULL );
4961 __kmp_initialize_bget( new_thr );
4964 __kmp_init_random( new_thr );
4967 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4968 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
4970 new_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
4971 new_thr->th.th_bar[ bs_plain_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
4972 #if KMP_FAST_REDUCTION_BARRIER
4973 new_thr->th.th_bar[ bs_reduction_barrier ].bb.b_go = KMP_INIT_BARRIER_STATE;
4974 #endif // KMP_FAST_REDUCTION_BARRIER
4976 new_thr->th.th_spin_here = FALSE;
4977 new_thr->th.th_next_waiting = 0;
4979 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
4980 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4981 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4982 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4983 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4986 TCW_4(new_thr->th.th_in_pool, FALSE);
4987 new_thr->th.th_active_in_pool = FALSE;
4988 TCW_4(new_thr->th.th_active, TRUE);
4999 if ( __kmp_adjust_gtid_mode ) {
5000 if ( __kmp_all_nth >= __kmp_tls_gtid_min ) {
5001 if ( TCR_4(__kmp_gtid_mode) != 2) {
5002 TCW_4(__kmp_gtid_mode, 2);
5006 if (TCR_4(__kmp_gtid_mode) != 1 ) {
5007 TCW_4(__kmp_gtid_mode, 1);
5012 #ifdef KMP_ADJUST_BLOCKTIME
5015 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
5016 if ( __kmp_nth > __kmp_avail_proc ) {
5017 __kmp_zero_bt = TRUE;
5023 KF_TRACE( 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr ));
5024 __kmp_create_worker( new_gtid, new_thr, __kmp_stksize );
5025 KF_TRACE( 10, (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr ));
5028 KA_TRACE( 20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid ));
5043 __kmp_reinitialize_team(
5047 kmp_internal_control_t * new_icvs,
5050 int new_set_nproc,
int new_set_dynamic,
int new_set_nested,
5051 int new_set_blocktime,
int new_bt_intervals,
int new_bt_set
5056 KMP_DEBUG_ASSERT( team && new_nproc && new_icvs );
5057 KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc );
5058 team->t.t_ident = loc;
5060 KMP_DEBUG_ASSERT( team && new_nproc && new_set_nproc );
5061 #endif // OMP_30_ENABLED
5063 team->t.t_id = KMP_GEN_TEAM_ID();
5065 #if KMP_BARRIER_ICV_PULL
5070 copy_icvs( &team->t.t_initial_icvs, new_icvs );
5078 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
5079 copy_icvs( &team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs );
5080 KF_TRACE( 10, (
"__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
5081 0, team->t.t_threads[0], team ) );
5083 #elif KMP_BARRIER_ICV_PUSH
5088 __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE );
5089 copy_icvs( &team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs );
5090 KF_TRACE( 10, (
"__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
5091 0, team->t.t_threads[0], team ) );
5097 for( f=0 ; f<new_nproc ; f++) {
5100 KF_TRACE( 10, (
"__kmp_reinitialize_team1: T#%d this_thread=%p team=%p\n",
5101 f, team->t.t_threads[f], team ) );
5102 __kmp_init_implicit_task( loc, team->t.t_threads[f], team, f, FALSE );
5103 copy_icvs( &team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs );
5104 KF_TRACE( 10, (
"__kmp_reinitialize_team2: T#%d this_thread=%p team=%p\n",
5105 f, team->t.t_threads[f], team ) );
5107 team -> t.t_set_nproc[f] = new_set_nproc;
5108 team -> t.t_set_dynamic[f] = new_set_dynamic;
5109 team -> t.t_set_nested[f] = new_set_nested;
5110 team -> t.t_set_blocktime[f] = new_set_blocktime;
5111 team -> t.t_set_bt_intervals[f] = new_bt_intervals;
5112 team -> t.t_set_bt_set[f] = new_bt_set;
5113 # endif // OMP_30_ENABLED
5116 #endif // KMP_BARRIER_ICV_PUSH || KMP_BARRIER_ICV_PULL
5124 __kmp_initialize_team(
5128 kmp_internal_control_t * new_icvs,
5131 int new_set_nproc,
int new_set_dynamic,
int new_set_nested,
5132 int new_set_blocktime,
int new_bt_intervals,
int new_bt_set
5136 KMP_DEBUG_ASSERT( team );
5137 KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc );
5138 KMP_DEBUG_ASSERT( team->t.t_threads );
5141 team -> t.t_master_tid = 0;
5143 team -> t.t_serialized = 0;
5144 team -> t.t_nproc = new_nproc;
5147 team -> t.t_next_pool = NULL;
5150 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
5151 team -> t.t_invoke = NULL;
5155 team -> t.t_sched = new_icvs->sched;
5156 #endif // OMP_30_ENABLED
5158 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
5159 team -> t.t_fp_control_saved = FALSE;
5160 team -> t.t_x87_fpu_control_word = 0;
5161 team -> t.t_mxcsr = 0;
5164 team -> t.t_construct = 0;
5165 __kmp_init_lock( & team -> t.t_single_lock );
5167 team -> t.t_ordered .dt.t_value = 0;
5168 team -> t.t_master_active = FALSE;
5170 memset( & team -> t.t_taskq,
'\0',
sizeof( kmp_taskq_t ));
5173 team -> t.t_copypriv_data = NULL;
5175 team -> t.t_copyin_counter = 0;
5177 team -> t.t_control_stack_top = NULL;
5179 __kmp_reinitialize_team(
5185 new_set_nproc, new_set_dynamic, new_set_nested,
5186 new_set_blocktime, new_bt_intervals, new_bt_set
5196 __kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask )
5198 if ( KMP_AFFINITY_CAPABLE() ) {
5200 if ( old_mask != NULL ) {
5201 status = __kmp_get_system_affinity( old_mask, TRUE );
5203 if ( status != 0 ) {
5206 KMP_MSG( ChangeThreadAffMaskError ),
5212 __kmp_set_system_affinity( __kmp_affinity_get_fullMask(), TRUE );
5217 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
5226 __kmp_partition_places( kmp_team_t *team )
5231 kmp_info_t *master_th = team->t.t_threads[0];
5232 KMP_DEBUG_ASSERT( master_th != NULL );
5233 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
5234 int first_place = master_th->th.th_first_place;
5235 int last_place = master_th->th.th_last_place;
5236 int masters_place = master_th->th.th_current_place;
5237 team->t.t_first_place = first_place;
5238 team->t.t_last_place = last_place;
5240 KA_TRACE( 20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n",
5241 proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id,
5242 masters_place, first_place, last_place ) );
5244 switch ( proc_bind ) {
5246 case proc_bind_default:
5252 KMP_DEBUG_ASSERT( team->t.t_nproc == 1 );
5255 case proc_bind_master:
5258 int n_th = team->t.t_nproc;
5259 for ( f = 1; f < n_th; f++ ) {
5260 kmp_info_t *th = team->t.t_threads[f];
5261 KMP_DEBUG_ASSERT( th != NULL );
5262 th->th.th_first_place = first_place;
5263 th->th.th_last_place = last_place;
5264 th->th.th_new_place = masters_place;
5266 KA_TRACE( 100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5267 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5268 team->t.t_id, f, masters_place, first_place, last_place ) );
5273 case proc_bind_close:
5276 int n_th = team->t.t_nproc;
5278 if ( first_place <= last_place ) {
5279 n_places = last_place - first_place + 1;
5282 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
5284 if ( n_th <= n_places ) {
5285 int place = masters_place;
5286 for ( f = 1; f < n_th; f++ ) {
5287 kmp_info_t *th = team->t.t_threads[f];
5288 KMP_DEBUG_ASSERT( th != NULL );
5290 if ( place == last_place ) {
5291 place = first_place;
5293 else if ( place == __kmp_affinity_num_masks - 1) {
5299 th->th.th_first_place = first_place;
5300 th->th.th_last_place = last_place;
5301 th->th.th_new_place = place;
5303 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5304 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5305 team->t.t_id, f, place, first_place, last_place ) );
5309 int S, rem, gap, s_count;
5310 S = n_th / n_places;
5312 rem = n_th - ( S * n_places );
5313 gap = rem > 0 ? n_places/rem : n_places;
5314 int place = masters_place;
5316 for ( f = 0; f < n_th; f++ ) {
5317 kmp_info_t *th = team->t.t_threads[f];
5318 KMP_DEBUG_ASSERT( th != NULL );
5320 th->th.th_first_place = first_place;
5321 th->th.th_last_place = last_place;
5322 th->th.th_new_place = place;
5325 if ( (s_count == S) && rem && (gap_ct == gap) ) {
5328 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
5330 if ( place == last_place ) {
5331 place = first_place;
5333 else if ( place == __kmp_affinity_num_masks - 1) {
5343 else if (s_count == S) {
5344 if ( place == last_place ) {
5345 place = first_place;
5347 else if ( place == __kmp_affinity_num_masks - 1) {
5357 KA_TRACE( 100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5358 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5359 team->t.t_id, f, th->th.th_new_place, first_place,
5362 KMP_DEBUG_ASSERT( place == masters_place );
5367 case proc_bind_spread:
5370 int n_th = team->t.t_nproc;
5372 if ( first_place <= last_place ) {
5373 n_places = last_place - first_place + 1;
5376 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
5378 if ( n_th <= n_places ) {
5379 int place = masters_place;
5380 int S = n_places/n_th;
5381 int s_count, rem, gap, gap_ct;
5382 rem = n_places - n_th*S;
5383 gap = rem ? n_th/rem : 1;
5385 for ( f = 0; f < n_th; f++ ) {
5386 kmp_info_t *th = team->t.t_threads[f];
5387 KMP_DEBUG_ASSERT( th != NULL );
5389 th->th.th_first_place = place;
5390 th->th.th_new_place = place;
5392 while (s_count < S) {
5393 if ( place == last_place ) {
5394 place = first_place;
5396 else if ( place == __kmp_affinity_num_masks - 1) {
5404 if (rem && (gap_ct == gap)) {
5405 if ( place == last_place ) {
5406 place = first_place;
5408 else if ( place == __kmp_affinity_num_masks - 1) {
5417 th->th.th_last_place = place;
5420 if ( place == last_place ) {
5421 place = first_place;
5423 else if ( place == __kmp_affinity_num_masks - 1) {
5430 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5431 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5432 team->t.t_id, f, th->th.th_new_place,
5433 th->th.th_first_place, th->th.th_last_place ) );
5435 KMP_DEBUG_ASSERT( place == masters_place );
5438 int S, rem, gap, s_count;
5439 S = n_th / n_places;
5441 rem = n_th - ( S * n_places );
5442 gap = rem > 0 ? n_places/rem : n_places;
5443 int place = masters_place;
5445 for ( f = 0; f < n_th; f++ ) {
5446 kmp_info_t *th = team->t.t_threads[f];
5447 KMP_DEBUG_ASSERT( th != NULL );
5449 th->th.th_first_place = place;
5450 th->th.th_last_place = place;
5451 th->th.th_new_place = place;
5454 if ( (s_count == S) && rem && (gap_ct == gap) ) {
5457 else if ( (s_count == S+1) && rem && (gap_ct == gap) ) {
5459 if ( place == last_place ) {
5460 place = first_place;
5462 else if ( place == __kmp_affinity_num_masks - 1) {
5472 else if (s_count == S) {
5473 if ( place == last_place ) {
5474 place = first_place;
5476 else if ( place == __kmp_affinity_num_masks - 1) {
5486 KA_TRACE( 100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n",
5487 __kmp_gtid_from_thread( team->t.t_threads[f] ),
5488 team->t.t_id, f, th->th.th_new_place,
5489 th->th.th_first_place, th->th.th_last_place) );
5491 KMP_DEBUG_ASSERT( place == masters_place );
5500 KA_TRACE( 20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id ) );
5507 __kmp_allocate_team( kmp_root_t *root,
int new_nproc,
int max_nproc,
5509 kmp_proc_bind_t new_proc_bind,
5512 kmp_internal_control_t *new_icvs,
5514 int new_set_nproc,
int new_set_dynamic,
int new_set_nested,
5515 int new_set_blocktime,
int new_bt_intervals,
int new_bt_set,
5524 KA_TRACE( 20, (
"__kmp_allocate_team: called\n"));
5525 KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 );
5526 KMP_DEBUG_ASSERT( max_nproc >= new_nproc );
5533 if ( ! root->r.r_active && new_nproc > 1 ) {
5535 KMP_DEBUG_ASSERT( new_nproc == max_nproc );
5537 team = root -> r.r_hot_team;
5539 #if OMP_30_ENABLED && KMP_DEBUG
5540 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5541 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team = %p before reinit\n",
5542 team -> t.t_task_team ));
5547 if( team -> t.t_nproc > new_nproc ) {
5548 KA_TRACE( 20, (
"__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc ));
5551 team -> t.t_size_changed = 1;
5554 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5555 kmp_task_team_t *task_team = team->t.t_task_team;
5556 if ( ( task_team != NULL ) && TCR_SYNC_4(task_team->tt.tt_active) ) {
5563 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
5564 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
5567 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
5568 &team->t.t_task_team ) );
5569 team->t.t_task_team = NULL;
5572 KMP_DEBUG_ASSERT( task_team == NULL );
5575 #endif // OMP_30_ENABLED
5578 for( f = new_nproc ; f < team->t.t_nproc ; f++ ) {
5579 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5580 __kmp_free_thread( team->t.t_threads[ f ] );
5581 team -> t.t_threads[ f ] = NULL;
5584 team -> t.t_nproc = new_nproc;
5587 team -> t.t_sched = new_icvs->sched;
5589 __kmp_reinitialize_team( team, new_nproc,
5592 root->r.r_uber_thread->th.th_ident
5594 new_set_nproc, new_set_dynamic, new_set_nested,
5595 new_set_blocktime, new_bt_intervals, new_bt_set
5600 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5601 kmp_task_team_t *task_team = team->t.t_task_team;
5602 if ( task_team != NULL ) {
5603 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
5604 task_team->tt.tt_nproc = new_nproc;
5605 task_team->tt.tt_unfinished_threads = new_nproc;
5606 task_team->tt.tt_ref_ct = new_nproc - 1;
5612 for( f = 0 ; f < new_nproc ; f++ ) {
5613 team -> t.t_threads[ f ] -> th.th_team_nproc = team->t.t_nproc;
5618 KF_TRACE( 10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n",
5619 0, team->t.t_threads[0], team ) );
5621 __kmp_push_current_task_to_thread( team -> t.t_threads[ 0 ], team, 0 );
5625 for ( f = 0; f < team->t.t_nproc; f++ ) {
5626 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5627 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5632 team->t.t_proc_bind = new_proc_bind;
5633 # if KMP_OS_WINDOWS || KMP_OS_LINUX
5634 __kmp_partition_places( team );
5639 else if ( team -> t.t_nproc < new_nproc ) {
5641 kmp_affin_mask_t *old_mask;
5642 if ( KMP_AFFINITY_CAPABLE() ) {
5643 KMP_CPU_ALLOC(old_mask);
5647 KA_TRACE( 20, (
"__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc ));
5650 team -> t.t_size_changed = 1;
5654 if(team -> t.t_max_nproc < new_nproc) {
5656 __kmp_reallocate_team_arrays(team, new_nproc);
5657 __kmp_reinitialize_team( team, new_nproc,
5662 new_set_nproc, new_set_dynamic, new_set_nested,
5663 new_set_blocktime, new_bt_intervals, new_bt_set
5675 __kmp_set_thread_affinity_mask_full_tmp( old_mask );
5679 for( f = team->t.t_nproc ; f < new_nproc ; f++ ) {
5680 kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f );
5681 KMP_DEBUG_ASSERT( new_worker );
5682 team->t.t_threads[ f ] = new_worker;
5683 new_worker->th.th_team_nproc = team->t.t_nproc;
5685 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init T#%d arrived: join=%u, plain=%u\n",
5686 team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f,
5687 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5688 team->t.t_bar[bs_plain_barrier].b_arrived ) );
5692 kmp_balign_t * balign = new_worker->th.th_bar;
5693 for ( b = 0; b < bp_last_bar; ++ b ) {
5694 balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived;
5700 if ( KMP_AFFINITY_CAPABLE() ) {
5702 __kmp_set_system_affinity( old_mask, TRUE );
5703 KMP_CPU_FREE(old_mask);
5708 __kmp_initialize_team( team, new_nproc,
5711 root->r.r_uber_thread->th.th_ident
5713 new_set_nproc, new_set_dynamic, new_set_nested,
5714 new_set_blocktime, new_bt_intervals, new_bt_set
5719 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5720 kmp_task_team_t *task_team = team->t.t_task_team;
5721 if ( task_team != NULL ) {
5722 KMP_DEBUG_ASSERT( ! TCR_4(task_team->tt.tt_found_tasks) );
5723 task_team->tt.tt_nproc = new_nproc;
5724 task_team->tt.tt_unfinished_threads = new_nproc;
5725 task_team->tt.tt_ref_ct = new_nproc - 1;
5731 for( f = 0 ; f < team->t.t_nproc ; f++ )
5732 __kmp_initialize_info( team->t.t_threads[ f ], team, f,
5733 __kmp_gtid_from_tid( f, team ) );
5735 for ( f = 0; f < team->t.t_nproc; ++ f ) {
5736 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
5737 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
5742 team->t.t_proc_bind = new_proc_bind;
5743 # if KMP_OS_WINDOWS || KMP_OS_LINUX
5744 __kmp_partition_places( team );
5750 KA_TRACE( 20, (
"__kmp_allocate_team: reusing hot team\n" ));
5754 if ( team -> t.t_size_changed == -1 ) {
5755 team -> t.t_size_changed = 1;
5757 team -> t.t_size_changed = 0;
5763 team -> t.t_sched = new_icvs->sched;
5766 __kmp_reinitialize_team( team, new_nproc,
5769 root->r.r_uber_thread->th.th_ident
5771 new_set_nproc, new_set_dynamic, new_set_nested,
5772 new_set_blocktime, new_bt_intervals, new_bt_set
5777 KF_TRACE( 10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n",
5778 0, team->t.t_threads[0], team ) );
5779 __kmp_push_current_task_to_thread( team -> t.t_threads[ 0 ], team, 0 );
5783 # if (KMP_OS_WINDOWS || KMP_OS_LINUX)
5784 if ( team->t.t_proc_bind == new_proc_bind ) {
5785 KA_TRACE( 200, (
"__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n",
5786 team->t.t_id, new_proc_bind, team->t.t_first_place,
5787 team->t.t_last_place ) );
5790 team->t.t_proc_bind = new_proc_bind;
5791 __kmp_partition_places( team );
5794 if ( team->t.t_proc_bind != new_proc_bind ) {
5795 team->t.t_proc_bind = new_proc_bind;
5802 __kmp_alloc_argv_entries( argc, team, TRUE );
5803 team -> t.t_argc = argc;
5809 KF_TRACE( 10, (
" hot_team = %p\n", team ) );
5811 #if OMP_30_ENABLED && KMP_DEBUG
5812 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5813 KA_TRACE( 20, (
"__kmp_allocate_team: hot team task_team = %p after reinit\n",
5814 team -> t.t_task_team ));
5825 for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; )
5828 if ( team->t.t_max_nproc >= max_nproc ) {
5830 __kmp_team_pool = team->t.t_next_pool;
5833 __kmp_initialize_team( team, new_nproc,
5838 new_set_nproc, new_set_dynamic, new_set_nested,
5839 new_set_blocktime, new_bt_intervals, new_bt_set
5844 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
5845 &team->t.t_task_team ) );
5846 team -> t.t_task_team = NULL;
5850 __kmp_alloc_argv_entries( argc, team, TRUE );
5851 team -> t.t_argc = argc;
5853 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5854 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5857 for ( b = 0; b < bs_last_barrier; ++ b) {
5858 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5863 team->t.t_proc_bind = new_proc_bind;
5866 KA_TRACE( 20, (
"__kmp_allocate_team: using team from pool %d.\n", team->t.t_id ));
5875 team = __kmp_reap_team( team );
5876 __kmp_team_pool = team;
5881 team = (kmp_team_t*) __kmp_allocate(
sizeof( kmp_team_t ) );
5884 team -> t.t_max_nproc = max_nproc;
5888 __kmp_allocate_team_arrays( team, max_nproc );
5889 __kmp_initialize_team( team, new_nproc,
5894 new_set_nproc, new_set_dynamic, new_set_nested,
5895 new_set_blocktime, new_bt_intervals, new_bt_set
5900 KA_TRACE( 20, (
"__kmp_allocate_team: setting task_team %p to NULL\n",
5901 &team->t.t_task_team ) );
5902 team -> t.t_task_team = NULL;
5905 if ( __kmp_storage_map ) {
5906 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc );
5910 __kmp_alloc_argv_entries( argc, team, FALSE );
5911 team -> t.t_argc = argc;
5913 KA_TRACE( 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5914 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE ));
5917 for ( b = 0; b < bs_last_barrier; ++ b ) {
5918 team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE;
5923 team->t.t_proc_bind = new_proc_bind;
5928 KA_TRACE( 20, (
"__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id ));
5939 __kmp_free_team( kmp_root_t *root, kmp_team_t *team )
5942 KA_TRACE( 20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id ));
5945 KMP_DEBUG_ASSERT( root );
5946 KMP_DEBUG_ASSERT( team );
5947 KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc );
5948 KMP_DEBUG_ASSERT( team->t.t_threads );
5951 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
5952 team -> t.t_copyin_counter = 0;
5956 if( team != root->r.r_hot_team ) {
5959 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
5960 kmp_task_team_t *task_team = team->t.t_task_team;
5961 if ( task_team != NULL ) {
5968 KA_TRACE( 20, (
"__kmp_free_team: deactivating task_team %p\n",
5970 KMP_DEBUG_ASSERT( team->t.t_nproc > 1 );
5971 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
5973 team->t.t_task_team = NULL;
5979 team -> t.t_parent = NULL;
5983 for ( f = 1; f < team->t.t_nproc; ++ f ) {
5984 KMP_DEBUG_ASSERT( team->t.t_threads[ f ] );
5985 __kmp_free_thread( team->t.t_threads[ f ] );
5986 team->t.t_threads[ f ] = NULL;
5992 team -> t.t_next_pool = (kmp_team_t*) __kmp_team_pool;
5993 __kmp_team_pool = (
volatile kmp_team_t*) team;
6002 __kmp_reap_team( kmp_team_t *team )
6004 kmp_team_t *next_pool = team -> t.t_next_pool;
6006 KMP_DEBUG_ASSERT( team );
6007 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
6008 KMP_DEBUG_ASSERT( team -> t.t_disp_buffer );
6009 KMP_DEBUG_ASSERT( team -> t.t_threads );
6012 KMP_DEBUG_ASSERT( team -> t.t_set_nproc );
6014 KMP_DEBUG_ASSERT( team -> t.t_argv );
6020 __kmp_free_team_arrays( team );
6021 #if (KMP_PERF_V106 == KMP_ON)
6022 if ( team -> t.t_argv != &team -> t.t_inline_argv[0] )
6023 __kmp_free( (
void*) team -> t.t_argv );
6025 __kmp_free( (
void*) team -> t.t_argv );
6061 __kmp_free_thread( kmp_info_t *this_th )
6066 KA_TRACE( 20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
6067 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid ));
6069 KMP_DEBUG_ASSERT( this_th );
6073 TCW_PTR(this_th->th.th_team, NULL);
6074 TCW_PTR(this_th->th.th_root, NULL);
6075 TCW_PTR(this_th->th.th_dispatch, NULL);
6081 gtid = this_th->th.th_info.ds.ds_gtid;
6082 if ( __kmp_thread_pool_insert_pt != NULL ) {
6083 KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL );
6084 if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) {
6085 __kmp_thread_pool_insert_pt = NULL;
6096 if ( __kmp_thread_pool_insert_pt != NULL ) {
6097 scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool );
6100 scan = (kmp_info_t **)&__kmp_thread_pool;
6102 for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid );
6103 scan = &( (*scan)->th.th_next_pool ) );
6109 TCW_PTR(this_th->th.th_next_pool, *scan);
6110 __kmp_thread_pool_insert_pt = *scan = this_th;
6111 KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL )
6112 || ( this_th->th.th_info.ds.ds_gtid
6113 < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) );
6114 TCW_4(this_th->th.th_in_pool, TRUE);
6115 __kmp_thread_pool_nth++;
6117 TCW_4(__kmp_nth, __kmp_nth - 1);
6119 #ifdef KMP_ADJUST_BLOCKTIME
6122 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6123 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6124 if ( __kmp_nth <= __kmp_avail_proc ) {
6125 __kmp_zero_bt = FALSE;
6134 __kmp_join_barrier(
int gtid )
6136 register kmp_info_t *this_thr = __kmp_threads[ gtid ];
6137 register kmp_team_t *team;
6138 register kmp_uint count;
6139 register kmp_uint nproc;
6140 kmp_info_t *master_thread;
6146 void * itt_sync_obj = NULL;
6148 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
6149 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
6156 team = this_thr -> th.th_team;
6158 nproc = this_thr -> th.th_team_nproc;
6159 KMP_DEBUG_ASSERT( nproc == team->t.t_nproc );
6160 tid = __kmp_tid_from_gtid(gtid);
6162 team_id = team -> t.t_id;
6165 master_thread = this_thr -> th.th_team_master;
6167 if ( master_thread != team->t.t_threads[0] ) {
6168 __kmp_print_structure();
6171 KMP_DEBUG_ASSERT( master_thread == team->t.t_threads[0] );
6175 KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] );
6176 KMP_DEBUG_ASSERT( TCR_PTR(this_thr->th.th_team) );
6177 KMP_DEBUG_ASSERT( TCR_PTR(this_thr->th.th_root) );
6178 KMP_DEBUG_ASSERT( this_thr == team -> t.t_threads[tid] );
6180 KA_TRACE( 10, (
"__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n",
6181 gtid, team_id, tid ));
6184 if ( __kmp_tasking_mode == tskm_extra_barrier ) {
6185 __kmp_tasking_barrier( team, this_thr, gtid );
6187 KA_TRACE( 10, (
"__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n",
6188 gtid, team_id, tid ));
6191 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6192 KA_TRACE( 20, (
"__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n",
6193 __kmp_gtid_from_thread( this_thr ), team_id, team -> t.t_task_team,
6194 this_thr->th.th_task_team ) );
6195 KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
6208 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
6210 this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
6211 this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
6213 this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
6214 this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
6215 #endif // OMP_30_ENABLED
6230 if( KMP_MASTER_TID( tid ) && TCR_4(__kmp_init_monitor) < 2 ) {
6231 __kmp_wait_sleep( this_thr, (
volatile kmp_uint32*)&__kmp_init_monitor, 2, 0
6232 USE_ITT_BUILD_ARG( itt_sync_obj )
6238 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
6239 __kmp_itt_barrier_starting( gtid, itt_sync_obj );
6242 if ( __kmp_barrier_gather_pattern[ bs_forkjoin_barrier ] == bp_linear_bar || __kmp_barrier_gather_branch_bits[ bs_forkjoin_barrier ] == 0 ) {
6243 __kmp_linear_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
6244 USE_ITT_BUILD_ARG( itt_sync_obj )
6246 }
else if ( __kmp_barrier_gather_pattern[ bs_forkjoin_barrier ] == bp_tree_bar ) {
6247 __kmp_tree_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
6248 USE_ITT_BUILD_ARG( itt_sync_obj )
6251 __kmp_hyper_barrier_gather( bs_forkjoin_barrier, this_thr, gtid, tid, NULL
6252 USE_ITT_BUILD_ARG( itt_sync_obj )
6257 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG )
6258 __kmp_itt_barrier_middle( gtid, itt_sync_obj );
6271 if ( KMP_MASTER_TID( tid ) ) {
6272 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6275 __kmp_task_team_wait( this_thr, team
6276 USE_ITT_BUILD_ARG( itt_sync_obj )
6283 if( KMP_MASTER_TID( tid )) {
6284 KA_TRACE( 15, (
"__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n",
6285 gtid, team_id, tid, nproc ));
6292 KA_TRACE( 10, (
"__kmp_join_barrier: T#%d(%d:%d) leaving\n",
6293 gtid, team_id, tid ));
6300 __kmp_fork_barrier(
int gtid,
int tid )
6302 kmp_info_t *this_thr = __kmp_threads[ gtid ];
6303 kmp_team_t *team = ( tid == 0 ) ? this_thr -> th.th_team : NULL;
6305 void * itt_sync_obj = NULL;
6308 KA_TRACE( 10, (
"__kmp_fork_barrier: T#%d(%d:%d) has arrived\n",
6309 gtid, ( team != NULL ) ? team->t.t_id : -1, tid ));
6312 if ( KMP_MASTER_TID( tid ) ) {
6314 #if USE_ITT_BUILD && USE_ITT_NOTIFY
6315 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
6316 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier, 1 );
6318 __kmp_itt_barrier_middle( gtid, itt_sync_obj );
6324 register kmp_info_t **other_threads = team -> t.t_threads;
6330 for( i = 1; i < team -> t.t_nproc ; i++ ) {
6331 KA_TRACE( 500, (
"__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork "
6333 gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid,
6334 team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid,
6335 other_threads[i]->th.th_bar[ bs_forkjoin_barrier ].bb.b_go ) );
6337 KMP_DEBUG_ASSERT( ( TCR_4( other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go )
6338 & ~(KMP_BARRIER_SLEEP_STATE) )
6339 == KMP_INIT_BARRIER_STATE );
6340 KMP_DEBUG_ASSERT( other_threads[i]->th.th_team == team );
6346 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6347 __kmp_task_team_setup( this_thr, team );
6361 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
6363 this_thr -> th.th_team_bt_intervals = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals;
6364 this_thr -> th.th_team_bt_set = team -> t.t_implicit_task_taskdata[tid].td_icvs.bt_set;
6366 this_thr -> th.th_team_bt_intervals = team -> t.t_set_bt_intervals[tid];
6367 this_thr -> th.th_team_bt_set= team -> t.t_set_bt_set[tid];
6368 #endif // OMP_30_ENABLED
6372 if ( __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] == bp_linear_bar || __kmp_barrier_release_branch_bits[ bs_forkjoin_barrier ] == 0 ) {
6373 __kmp_linear_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
6374 USE_ITT_BUILD_ARG( itt_sync_obj )
6376 }
else if ( __kmp_barrier_release_pattern[ bs_forkjoin_barrier ] == bp_tree_bar ) {
6377 __kmp_tree_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
6378 USE_ITT_BUILD_ARG( itt_sync_obj )
6381 __kmp_hyper_barrier_release( bs_forkjoin_barrier, this_thr, gtid, tid, TRUE
6382 USE_ITT_BUILD_ARG( itt_sync_obj )
6389 if ( TCR_4(__kmp_global.g.g_done) ) {
6392 if ( this_thr->th.th_task_team != NULL ) {
6393 if ( KMP_MASTER_TID( tid ) ) {
6394 TCW_PTR(this_thr->th.th_task_team, NULL);
6397 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
6402 #if USE_ITT_BUILD && USE_ITT_NOTIFY
6403 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
6404 if ( !KMP_MASTER_TID( tid ) ) {
6405 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
6407 __kmp_itt_barrier_finished( gtid, itt_sync_obj );
6411 KA_TRACE( 10, (
"__kmp_fork_barrier: T#%d is leaving early\n", gtid ));
6422 team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team);
6423 KMP_DEBUG_ASSERT( team != NULL );
6424 tid = __kmp_tid_from_gtid( gtid );
6428 # if KMP_BARRIER_ICV_PULL
6434 if (! KMP_MASTER_TID( tid ) ) {
6439 __kmp_init_implicit_task( team->t.t_ident, team->t.t_threads[tid],
6441 copy_icvs( &team->t.t_implicit_task_taskdata[tid].td_icvs,
6442 &team->t.t_initial_icvs );
6444 # endif // KMP_BARRIER_ICV_PULL
6446 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
6447 __kmp_task_team_sync( this_thr, team );
6452 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
6453 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
6454 if ( proc_bind == proc_bind_intel ) {
6460 if( __kmp_affinity_type == affinity_balanced && team->t.t_size_changed ) {
6461 __kmp_balanced_affinity( tid, team->t.t_nproc );
6464 #if OMP_40_ENABLED && (KMP_OS_WINDOWS || KMP_OS_LINUX)
6466 else if ( ( proc_bind != proc_bind_false )
6467 && ( proc_bind != proc_bind_disabled )) {
6468 if ( this_thr->th.th_new_place == this_thr->th.th_current_place ) {
6469 KA_TRACE( 100, (
"__kmp_fork_barrier: T#%d already in correct place %d\n",
6470 __kmp_gtid_from_thread( this_thr ), this_thr->th.th_current_place ) );
6473 __kmp_affinity_set_place( gtid );
6478 #if USE_ITT_BUILD && USE_ITT_NOTIFY
6479 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
6480 if ( !KMP_MASTER_TID( tid ) ) {
6481 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
6482 __kmp_itt_barrier_finished( gtid, itt_sync_obj );
6486 KA_TRACE( 10, (
"__kmp_fork_barrier: T#%d(%d:%d) is leaving\n",
6487 gtid, team->t.t_id, tid ));
6495 __kmp_launch_thread( kmp_info_t *this_thr )
6497 int gtid = this_thr->th.th_info.ds.ds_gtid;
6499 kmp_team_t *(*
volatile pteam);
6502 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d start\n", gtid ) );
6504 if( __kmp_env_consistency_check ) {
6505 this_thr -> th.th_cons = __kmp_allocate_cons_stack( gtid );
6509 while( ! TCR_4(__kmp_global.g.g_done) ) {
6510 KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] );
6514 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid ));
6517 __kmp_fork_barrier( gtid, KMP_GTID_DNE );
6519 pteam = (kmp_team_t *(*))(& this_thr->th.th_team);
6522 if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) {
6524 if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) {
6526 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
6527 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn ));
6529 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
6530 if ( __kmp_inherit_fp_control && (*pteam)->t.t_fp_control_saved ) {
6531 __kmp_clear_x87_fpu_status_word();
6532 __kmp_load_x87_fpu_control_word( &(*pteam)->t.t_x87_fpu_control_word );
6533 __kmp_load_mxcsr( &(*pteam)->t.t_mxcsr );
6537 rc = (*pteam) -> t.t_invoke( gtid );
6541 KA_TRACE( 20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
6542 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn ));
6546 __kmp_join_barrier( gtid );
6549 TCR_SYNC_PTR(__kmp_global.g.g_done);
6552 if ( TCR_PTR( this_thr->th.th_task_team ) != NULL ) {
6553 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
6558 __kmp_common_destroy_gtid( gtid );
6560 KA_TRACE( 10, (
"__kmp_launch_thread: T#%d done\n", gtid ) );
6571 __kmp_internal_end_dest(
void *specific_gtid )
6573 #ifdef __INTEL_COMPILER
6574 #pragma warning( push )
6575 #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits
6578 int gtid = (kmp_intptr_t)specific_gtid - 1;
6579 #ifdef __INTEL_COMPILER
6580 #pragma warning( pop )
6583 KA_TRACE( 30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
6597 if(gtid >= 0 && KMP_UBER_GTID(gtid))
6598 __kmp_gtid_set_specific( gtid );
6599 #ifdef KMP_TDATA_GTID
6602 __kmp_internal_end_thread( gtid );
6605 #if KMP_OS_UNIX && GUIDEDLL_EXPORTS
6611 __attribute__(( destructor ))
6613 __kmp_internal_end_dtor(
void )
6615 __kmp_internal_end_atexit();
6619 __kmp_internal_end_fini(
void )
6621 __kmp_internal_end_atexit();
6628 __kmp_internal_end_atexit(
void )
6630 KA_TRACE( 30, (
"__kmp_internal_end_atexit\n" ) );
6652 __kmp_internal_end_library( -1 );
6654 __kmp_close_console();
6660 kmp_info_t * thread,
6668 KMP_DEBUG_ASSERT( thread != NULL );
6670 gtid = thread->th.th_info.ds.ds_gtid;
6674 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
6676 KA_TRACE( 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) );
6680 &thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go,
6687 __kmp_reap_worker( thread );
6702 if ( thread->th.th_active_in_pool ) {
6703 thread->th.th_active_in_pool = FALSE;
6704 KMP_TEST_THEN_DEC32(
6705 (kmp_int32 *) &__kmp_thread_pool_active_nth );
6706 KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 );
6710 KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 );
6711 --__kmp_thread_pool_nth;
6716 __kmp_free_fast_memory( thread );
6719 __kmp_suspend_uninitialize_thread( thread );
6721 KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread );
6722 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
6727 #ifdef KMP_ADJUST_BLOCKTIME
6730 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
6731 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
6732 if ( __kmp_nth <= __kmp_avail_proc ) {
6733 __kmp_zero_bt = FALSE;
6739 if( __kmp_env_consistency_check ) {
6740 if ( thread->th.th_cons ) {
6741 __kmp_free_cons_stack( thread->th.th_cons );
6742 thread->th.th_cons = NULL;
6746 if ( thread->th.th_pri_common != NULL ) {
6747 __kmp_free( thread->th.th_pri_common );
6748 thread->th.th_pri_common = NULL;
6752 if ( thread->th.th_local.bget_data != NULL ) {
6753 __kmp_finalize_bget( thread );
6757 #if (KMP_OS_WINDOWS || KMP_OS_LINUX)
6758 if ( thread->th.th_affin_mask != NULL ) {
6759 KMP_CPU_FREE( thread->th.th_affin_mask );
6760 thread->th.th_affin_mask = NULL;
6764 __kmp_reap_team( thread->th.th_serial_team );
6765 thread->th.th_serial_team = NULL;
6766 __kmp_free( thread );
6773 __kmp_internal_end(
void)
6778 __kmp_unregister_library();
6786 __kmp_reclaim_dead_roots();
6789 for( i=0 ; i<__kmp_threads_capacity ; i++ )
6791 if( __kmp_root[i] -> r.r_active )
6794 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6796 if ( i < __kmp_threads_capacity ) {
6814 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
6815 if ( TCR_4( __kmp_init_monitor ) ) {
6816 __kmp_reap_monitor( & __kmp_monitor );
6817 TCW_4( __kmp_init_monitor, 0 );
6819 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
6820 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
6825 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
6826 if( __kmp_root[i] ) {
6827 KMP_ASSERT( ! KMP_UBER_GTID( i ) );
6828 KMP_ASSERT( ! __kmp_root[i] -> r.r_active );
6837 while ( __kmp_thread_pool != NULL ) {
6839 kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool;
6840 __kmp_thread_pool = thread->th.th_next_pool;
6842 thread->th.th_next_pool = NULL;
6843 thread->th.th_in_pool = FALSE;
6844 __kmp_reap_thread( thread, 0 );
6846 __kmp_thread_pool_insert_pt = NULL;
6849 while ( __kmp_team_pool != NULL ) {
6851 kmp_team_t * team = (kmp_team_t *) __kmp_team_pool;
6852 __kmp_team_pool = team->t.t_next_pool;
6854 team->t.t_next_pool = NULL;
6855 __kmp_reap_team( team );
6859 __kmp_reap_task_teams( );
6862 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
6869 TCW_SYNC_4(__kmp_init_common, FALSE);
6871 KA_TRACE( 10, (
"__kmp_internal_end: all workers reaped\n" ) );
6880 __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock );
6881 if ( TCR_4( __kmp_init_monitor ) ) {
6882 __kmp_reap_monitor( & __kmp_monitor );
6883 TCW_4( __kmp_init_monitor, 0 );
6885 __kmp_release_bootstrap_lock( & __kmp_monitor_lock );
6886 KA_TRACE( 10, (
"__kmp_internal_end: monitor reaped\n" ) );
6889 TCW_4(__kmp_init_gtid, FALSE);
6897 __kmp_internal_end_library(
int gtid_req )
6907 if( __kmp_global.g.g_abort ) {
6908 KA_TRACE( 11, (
"__kmp_internal_end_library: abort, exiting\n" ));
6912 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6913 KA_TRACE( 10, (
"__kmp_internal_end_library: already finished\n" ));
6922 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
6923 KA_TRACE( 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req ));
6924 if( gtid == KMP_GTID_SHUTDOWN ) {
6925 KA_TRACE( 10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" ));
6927 }
else if( gtid == KMP_GTID_MONITOR ) {
6928 KA_TRACE( 10, (
"__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" ));
6930 }
else if( gtid == KMP_GTID_DNE ) {
6931 KA_TRACE( 10, (
"__kmp_internal_end_library: gtid not registered or system shutdown\n" ));
6933 }
else if( KMP_UBER_GTID( gtid )) {
6935 if( __kmp_root[gtid] -> r.r_active ) {
6936 __kmp_global.g.g_abort = -1;
6937 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6938 KA_TRACE( 10, (
"__kmp_internal_end_library: root still active, abort T#%d\n", gtid ));
6941 KA_TRACE( 10, (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid ));
6942 __kmp_unregister_root_current_thread( gtid );
6949 #ifdef DUMP_DEBUG_ON_EXIT
6950 if ( __kmp_debug_buf )
6951 __kmp_dump_debug_buffer( );
6957 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
6960 if( __kmp_global.g.g_abort ) {
6961 KA_TRACE( 10, (
"__kmp_internal_end_library: abort, exiting\n" ));
6963 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6966 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
6967 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6977 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
6980 __kmp_internal_end();
6982 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
6983 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
6985 KA_TRACE( 10, (
"__kmp_internal_end_library: exit\n" ) );
6987 #ifdef DUMP_DEBUG_ON_EXIT
6988 if ( __kmp_debug_buf )
6989 __kmp_dump_debug_buffer();
6993 __kmp_close_console();
6996 __kmp_fini_allocator();
7001 __kmp_internal_end_thread(
int gtid_req )
7011 if( __kmp_global.g.g_abort ) {
7012 KA_TRACE( 11, (
"__kmp_internal_end_thread: abort, exiting\n" ));
7016 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
7017 KA_TRACE( 10, (
"__kmp_internal_end_thread: already finished\n" ));
7025 int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific();
7026 KA_TRACE( 10, (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req ));
7027 if( gtid == KMP_GTID_SHUTDOWN ) {
7028 KA_TRACE( 10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" ));
7030 }
else if( gtid == KMP_GTID_MONITOR ) {
7031 KA_TRACE( 10, (
"__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" ));
7033 }
else if( gtid == KMP_GTID_DNE ) {
7034 KA_TRACE( 10, (
"__kmp_internal_end_thread: gtid not registered or system shutdown\n" ));
7037 }
else if( KMP_UBER_GTID( gtid )) {
7039 if( __kmp_root[gtid] -> r.r_active ) {
7040 __kmp_global.g.g_abort = -1;
7041 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
7042 KA_TRACE( 10, (
"__kmp_internal_end_thread: root still active, abort T#%d\n", gtid ));
7045 KA_TRACE( 10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid ));
7046 __kmp_unregister_root_current_thread( gtid );
7050 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid ));
7054 kmp_info_t *this_thr = __kmp_threads[ gtid ];
7055 if (TCR_PTR(this_thr->th.th_task_team) != NULL) {
7056 __kmp_unref_task_team(this_thr->th.th_task_team, this_thr);
7061 KA_TRACE( 10, (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid ));
7065 #if defined GUIDEDLL_EXPORTS
7073 KA_TRACE( 10, (
"__kmp_internal_end_thread: exiting\n") );
7077 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7080 if( __kmp_global.g.g_abort ) {
7081 KA_TRACE( 10, (
"__kmp_internal_end_thread: abort, exiting\n" ));
7083 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7086 if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) {
7087 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7099 __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock );
7101 for ( i = 0; i < __kmp_threads_capacity; ++ i ) {
7102 if ( KMP_UBER_GTID( i ) ) {
7103 KA_TRACE( 10, (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i ));
7104 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
7105 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7112 __kmp_internal_end();
7114 __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock );
7115 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7117 KA_TRACE( 10, (
"__kmp_internal_end_thread: exit\n" ) );
7119 #ifdef DUMP_DEBUG_ON_EXIT
7120 if ( __kmp_debug_buf )
7121 __kmp_dump_debug_buffer();
7128 static long __kmp_registration_flag = 0;
7130 static char * __kmp_registration_str = NULL;
7136 __kmp_reg_status_name() {
7142 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int) getpid() );
7147 __kmp_register_library_startup(
7151 char * name = __kmp_reg_status_name();
7158 __kmp_initialize_system_tick();
7160 __kmp_read_system_time( & time.dtime );
7161 __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL );
7162 __kmp_registration_str =
7165 & __kmp_registration_flag,
7166 __kmp_registration_flag,
7170 KA_TRACE( 50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) );
7174 char * value = NULL;
7177 __kmp_env_set( name, __kmp_registration_str, 0 );
7179 value = __kmp_env_get( name );
7180 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
7189 char * tail = value;
7190 char * flag_addr_str = NULL;
7191 char * flag_val_str = NULL;
7192 char const * file_name = NULL;
7193 __kmp_str_split( tail,
'-', & flag_addr_str, & tail );
7194 __kmp_str_split( tail,
'-', & flag_val_str, & tail );
7196 if ( tail != NULL ) {
7197 long * flag_addr = 0;
7199 sscanf( flag_addr_str,
"%p", & flag_addr );
7200 sscanf( flag_val_str,
"%lx", & flag_val );
7201 if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name,
"" ) != 0 ) {
7205 if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) {
7213 switch ( neighbor ) {
7218 file_name =
"unknown library";
7222 char * duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK" );
7223 if ( ! __kmp_str_match_true( duplicate_ok ) ) {
7227 KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ),
7228 KMP_HNT( DuplicateLibrary ),
7232 KMP_INTERNAL_FREE( duplicate_ok );
7233 __kmp_duplicate_library_ok = 1;
7238 __kmp_env_unset( name );
7241 KMP_DEBUG_ASSERT( 0 );
7246 KMP_INTERNAL_FREE( (
void *) value );
7249 KMP_INTERNAL_FREE( (
void *) name );
7255 __kmp_unregister_library(
void ) {
7257 char * name = __kmp_reg_status_name();
7258 char * value = __kmp_env_get( name );
7260 KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 );
7261 KMP_DEBUG_ASSERT( __kmp_registration_str != NULL );
7262 if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) {
7264 __kmp_env_unset( name );
7267 KMP_INTERNAL_FREE( __kmp_registration_str );
7268 KMP_INTERNAL_FREE( value );
7269 KMP_INTERNAL_FREE( name );
7271 __kmp_registration_flag = 0;
7272 __kmp_registration_str = NULL;
7281 __kmp_do_serial_initialize(
void )
7286 KA_TRACE( 10, (
"__kmp_serial_initialize: enter\n" ) );
7288 KMP_DEBUG_ASSERT(
sizeof( kmp_int32 ) == 4 );
7289 KMP_DEBUG_ASSERT(
sizeof( kmp_uint32 ) == 4 );
7290 KMP_DEBUG_ASSERT(
sizeof( kmp_int64 ) == 8 );
7291 KMP_DEBUG_ASSERT(
sizeof( kmp_uint64 ) == 8 );
7292 KMP_DEBUG_ASSERT(
sizeof( kmp_intptr_t ) ==
sizeof(
void * ) );
7294 __kmp_validate_locks();
7297 __kmp_init_allocator();
7303 __kmp_register_library_startup( );
7306 if( TCR_4(__kmp_global.g.g_done) ) {
7307 KA_TRACE( 10, (
"__kmp_do_serial_initialize: reinitialization of library\n" ) );
7310 __kmp_global.g.g_abort = 0;
7311 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
7314 #if KMP_USE_ADAPTIVE_LOCKS
7315 #if KMP_DEBUG_ADAPTIVE_LOCKS
7316 __kmp_init_speculative_stats();
7319 __kmp_init_lock( & __kmp_global_lock );
7320 __kmp_init_queuing_lock( & __kmp_dispatch_lock );
7321 __kmp_init_lock( & __kmp_debug_lock );
7322 __kmp_init_atomic_lock( & __kmp_atomic_lock );
7323 __kmp_init_atomic_lock( & __kmp_atomic_lock_1i );
7324 __kmp_init_atomic_lock( & __kmp_atomic_lock_2i );
7325 __kmp_init_atomic_lock( & __kmp_atomic_lock_4i );
7326 __kmp_init_atomic_lock( & __kmp_atomic_lock_4r );
7327 __kmp_init_atomic_lock( & __kmp_atomic_lock_8i );
7328 __kmp_init_atomic_lock( & __kmp_atomic_lock_8r );
7329 __kmp_init_atomic_lock( & __kmp_atomic_lock_8c );
7330 __kmp_init_atomic_lock( & __kmp_atomic_lock_10r );
7331 __kmp_init_atomic_lock( & __kmp_atomic_lock_16r );
7332 __kmp_init_atomic_lock( & __kmp_atomic_lock_16c );
7333 __kmp_init_atomic_lock( & __kmp_atomic_lock_20c );
7334 __kmp_init_atomic_lock( & __kmp_atomic_lock_32c );
7335 __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock );
7336 __kmp_init_bootstrap_lock( & __kmp_exit_lock );
7337 __kmp_init_bootstrap_lock( & __kmp_monitor_lock );
7338 __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock );
7342 __kmp_runtime_initialize();
7348 __kmp_abort_delay = 0;
7352 __kmp_dflt_team_nth_ub = __kmp_xproc;
7353 if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) {
7354 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
7356 if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) {
7357 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
7359 __kmp_max_nth = __kmp_sys_max_nth;
7360 __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub );
7363 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
7364 __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
7365 __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups );
7367 __kmp_library = library_throughput;
7369 __kmp_static = kmp_sch_static_balanced;
7374 #endif // OMP_30_ENABLED
7377 #if KMP_FAST_REDUCTION_BARRIER
7378 #define kmp_reduction_barrier_gather_bb ((int)1)
7379 #define kmp_reduction_barrier_release_bb ((int)1)
7380 #define kmp_reduction_barrier_gather_pat bp_hyper_bar
7381 #define kmp_reduction_barrier_release_pat bp_hyper_bar
7382 #endif // KMP_FAST_REDUCTION_BARRIER
7383 for ( i=bs_plain_barrier; i<bs_last_barrier; i++ ) {
7384 __kmp_barrier_gather_branch_bits [ i ] = __kmp_barrier_gather_bb_dflt;
7385 __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt;
7386 __kmp_barrier_gather_pattern [ i ] = __kmp_barrier_gather_pat_dflt;
7387 __kmp_barrier_release_pattern[ i ] = __kmp_barrier_release_pat_dflt;
7388 #if KMP_FAST_REDUCTION_BARRIER
7389 if( i == bs_reduction_barrier ) {
7390 __kmp_barrier_gather_branch_bits [ i ] = kmp_reduction_barrier_gather_bb;
7391 __kmp_barrier_release_branch_bits[ i ] = kmp_reduction_barrier_release_bb;
7392 __kmp_barrier_gather_pattern [ i ] = kmp_reduction_barrier_gather_pat;
7393 __kmp_barrier_release_pattern[ i ] = kmp_reduction_barrier_release_pat;
7395 #endif // KMP_FAST_REDUCTION_BARRIER
7397 #if KMP_FAST_REDUCTION_BARRIER
7398 #undef kmp_reduction_barrier_release_pat
7399 #undef kmp_reduction_barrier_gather_pat
7400 #undef kmp_reduction_barrier_release_bb
7401 #undef kmp_reduction_barrier_gather_bb
7402 #endif // KMP_FAST_REDUCTION_BARRIER
7405 __kmp_barrier_gather_branch_bits [ 0 ] = 3;
7406 __kmp_barrier_release_branch_bits[ 1 ] = 1;
7411 __kmp_env_checks = TRUE;
7413 __kmp_env_checks = FALSE;
7417 __kmp_foreign_tp = TRUE;
7419 __kmp_global.g.g_dynamic = FALSE;
7420 __kmp_global.g.g_dynamic_mode = dynamic_default;
7422 __kmp_env_initialize( NULL );
7425 char const * val = __kmp_env_get(
"KMP_DUMP_CATALOG" );
7426 if ( __kmp_str_match_true( val ) ) {
7427 kmp_str_buf_t buffer;
7428 __kmp_str_buf_init( & buffer );
7429 __kmp_i18n_dump_catalog( buffer );
7430 __kmp_printf(
"%s", buffer.str );
7431 __kmp_str_buf_free( & buffer );
7433 __kmp_env_free( & val );
7437 __kmp_tp_capacity = __kmp_default_tp_capacity(__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
7444 KMP_DEBUG_ASSERT( __kmp_thread_pool == NULL );
7445 KMP_DEBUG_ASSERT( __kmp_thread_pool_insert_pt == NULL );
7446 KMP_DEBUG_ASSERT( __kmp_team_pool == NULL );
7447 __kmp_thread_pool = NULL;
7448 __kmp_thread_pool_insert_pt = NULL;
7449 __kmp_team_pool = NULL;
7454 size = (
sizeof(kmp_info_t*) +
sizeof(kmp_root_t*))*__kmp_threads_capacity + CACHE_LINE;
7455 __kmp_threads = (kmp_info_t**) __kmp_allocate( size );
7456 __kmp_root = (kmp_root_t**) ((
char*)__kmp_threads +
sizeof(kmp_info_t*) * __kmp_threads_capacity );
7459 KMP_DEBUG_ASSERT( __kmp_all_nth == 0 );
7460 KMP_DEBUG_ASSERT( __kmp_nth == 0 );
7465 gtid = __kmp_register_root( TRUE );
7466 KA_TRACE( 10, (
"__kmp_do_serial_initialize T#%d\n", gtid ));
7467 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
7468 KMP_ASSERT( KMP_INITIAL_GTID( gtid ) );
7472 __kmp_common_initialize();
7476 __kmp_register_atfork();
7479 #if ! defined GUIDEDLL_EXPORTS
7484 int rc = atexit( __kmp_internal_end_atexit );
7486 __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError,
"atexit()" ), KMP_ERR( rc ), __kmp_msg_null );
7491 #if KMP_HANDLE_SIGNALS
7498 __kmp_install_signals( FALSE );
7501 __kmp_install_signals( TRUE );
7506 __kmp_init_counter ++;
7508 __kmp_init_serial = TRUE;
7510 if (__kmp_settings) {
7516 KA_TRACE( 10, (
"__kmp_do_serial_initialize: exit\n" ) );
7520 __kmp_serial_initialize(
void )
7522 if ( __kmp_init_serial ) {
7525 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7526 if ( __kmp_init_serial ) {
7527 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7530 __kmp_do_serial_initialize();
7531 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7535 __kmp_do_middle_initialize(
void )
7538 int prev_dflt_team_nth;
7540 if( !__kmp_init_serial ) {
7541 __kmp_do_serial_initialize();
7544 KA_TRACE( 10, (
"__kmp_middle_initialize: enter\n" ) );
7550 prev_dflt_team_nth = __kmp_dflt_team_nth;
7552 #if KMP_OS_WINDOWS || KMP_OS_LINUX
7557 __kmp_affinity_initialize();
7563 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
7564 if ( TCR_PTR( __kmp_threads[ i ] ) != NULL ) {
7565 __kmp_affinity_set_init_mask( i, TRUE );
7570 KMP_ASSERT( __kmp_xproc > 0 );
7571 if ( __kmp_avail_proc == 0 ) {
7572 __kmp_avail_proc = __kmp_xproc;
7577 while ( __kmp_nested_nth.used && ! __kmp_nested_nth.nth[ j ] ) {
7578 __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc;
7582 if ( __kmp_dflt_team_nth == 0 ) {
7583 #ifdef KMP_DFLT_NTH_CORES
7587 __kmp_dflt_team_nth = __kmp_ncores;
7588 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n",
7589 __kmp_dflt_team_nth ) );
7594 __kmp_dflt_team_nth = __kmp_avail_proc;
7595 KA_TRACE( 20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n",
7596 __kmp_dflt_team_nth ) );
7600 if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) {
7601 __kmp_dflt_team_nth = KMP_MIN_NTH;
7603 if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) {
7604 __kmp_dflt_team_nth = __kmp_sys_max_nth;
7611 KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub );
7613 if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) {
7620 for ( i = 0; i < __kmp_threads_capacity; i++ ) {
7621 kmp_info_t *thread = __kmp_threads[ i ];
7622 if ( thread == NULL )
continue;
7624 if ( thread->th.th_current_task->td_icvs.nproc != 0 )
continue;
7626 if ( thread->th.th_team->t.t_set_nproc[ thread->th.th_info.ds.ds_tid ] != 0 )
continue;
7629 set__nproc_p( __kmp_threads[ i ], __kmp_dflt_team_nth );
7632 KA_TRACE( 20, (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
7633 __kmp_dflt_team_nth) );
7635 #ifdef KMP_ADJUST_BLOCKTIME
7638 if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) {
7639 KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 );
7640 if ( __kmp_nth > __kmp_avail_proc ) {
7641 __kmp_zero_bt = TRUE;
7647 TCW_SYNC_4(__kmp_init_middle, TRUE);
7649 KA_TRACE( 10, (
"__kmp_do_middle_initialize: exit\n" ) );
7653 __kmp_middle_initialize(
void )
7655 if ( __kmp_init_middle ) {
7658 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7659 if ( __kmp_init_middle ) {
7660 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7663 __kmp_do_middle_initialize();
7664 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7668 __kmp_parallel_initialize(
void )
7670 int gtid = __kmp_entry_gtid();
7673 if( TCR_4(__kmp_init_parallel) )
return;
7674 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
7675 if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock );
return; }
7678 if( TCR_4(__kmp_global.g.g_done) ) {
7679 KA_TRACE( 10, (
"__kmp_parallel_initialize: attempt to init while shutting down\n" ) );
7680 __kmp_infinite_loop();
7686 if( !__kmp_init_middle ) {
7687 __kmp_do_middle_initialize();
7691 KA_TRACE( 10, (
"__kmp_parallel_initialize: enter\n" ) );
7692 KMP_ASSERT( KMP_UBER_GTID( gtid ) );
7694 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
7699 __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word );
7700 __kmp_store_mxcsr( &__kmp_init_mxcsr );
7701 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
7705 # if KMP_HANDLE_SIGNALS
7707 __kmp_install_signals( TRUE );
7711 __kmp_suspend_initialize();
7713 # if defined(USE_LOAD_BALANCE)
7714 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
7715 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
7718 if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) {
7719 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7723 if ( __kmp_version ) {
7724 __kmp_print_version_2();
7728 TCW_SYNC_4(__kmp_init_parallel, TRUE);
7731 KA_TRACE( 10, (
"__kmp_parallel_initialize: exit\n" ) );
7733 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
7740 __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7743 kmp_disp_t *dispatch;
7748 this_thr->th.th_local.this_construct = 0;
7749 this_thr->th.th_local.last_construct = 0;
7750 #if KMP_CACHE_MANAGE
7751 KMP_CACHE_PREFETCH( &this_thr -> th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived );
7753 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
7754 KMP_DEBUG_ASSERT( dispatch );
7755 KMP_DEBUG_ASSERT( team -> t.t_dispatch );
7758 dispatch -> th_disp_index = 0;
7760 if( __kmp_env_consistency_check )
7761 __kmp_push_parallel( gtid, team->t.t_ident );
7767 __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
7770 if( __kmp_env_consistency_check )
7771 __kmp_pop_parallel( gtid, team->t.t_ident );
7775 __kmp_invoke_task_func(
int gtid )
7778 int tid = __kmp_tid_from_gtid( gtid );
7779 kmp_info_t *this_thr = __kmp_threads[ gtid ];
7780 kmp_team_t *team = this_thr -> th.th_team;
7782 __kmp_run_before_invoked_task( gtid, tid, this_thr, team );
7784 if ( __itt_stack_caller_create_ptr ) {
7785 __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id );
7788 rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
7789 gtid, tid, (
int) team->t.t_argc, (
void **) team->t.t_argv );
7792 if ( __itt_stack_caller_create_ptr ) {
7793 __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id );
7796 __kmp_run_after_invoked_task( gtid, tid, this_thr, team );
7803 __kmp_teams_master( microtask_t microtask,
int gtid )
7806 kmp_info_t *this_thr = __kmp_threads[ gtid ];
7807 kmp_team_t *team = this_thr -> th.th_team;
7808 ident_t *loc = team->t.t_ident;
7811 int tid = __kmp_tid_from_gtid( gtid );
7812 KA_TRACE( 20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n",
7813 gtid, tid, microtask) );
7818 this_thr->th.th_set_nproc = this_thr->th.th_set_nth_teams;
7819 __kmp_fork_call( loc, gtid, TRUE,
7822 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
7824 __kmp_join_call( loc, gtid, 1 );
7829 __kmp_invoke_teams_master(
int gtid )
7832 if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized )
7833 KMP_DEBUG_ASSERT( (
void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (
void*)__kmp_teams_master );
7836 __kmp_teams_master( (microtask_t)__kmp_threads[gtid]->th.th_team_microtask, gtid );
7848 __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads )
7850 kmp_info_t *thr = __kmp_threads[gtid];
7852 if( num_threads > 0 )
7853 thr -> th.th_set_nproc = num_threads;
7861 __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
int num_threads )
7863 kmp_info_t *thr = __kmp_threads[gtid];
7865 if( num_teams > 0 ) {
7866 thr -> th.th_set_nproc = num_teams;
7868 thr -> th.th_set_nproc = 1;
7872 if( num_threads > 0 ) {
7873 thr -> th.th_set_nth_teams = num_threads;
7875 if( !TCR_4(__kmp_init_middle) )
7876 __kmp_middle_initialize();
7877 thr -> th.th_set_nth_teams = __kmp_avail_proc / thr -> th.th_set_nproc;
7886 __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind )
7888 kmp_info_t *thr = __kmp_threads[gtid];
7889 thr -> th.th_set_proc_bind = proc_bind;
7897 __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team )
7899 kmp_info_t *this_thr = __kmp_threads[gtid];
7905 KMP_DEBUG_ASSERT( team );
7906 KMP_DEBUG_ASSERT( this_thr -> th.th_team == team );
7907 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7910 team -> t.t_construct = 0;
7911 team -> t.t_ordered.dt.t_value = 0;
7914 KMP_DEBUG_ASSERT( team -> t.t_disp_buffer );
7915 if ( team->t.t_max_nproc > 1 ) {
7917 for (i = 0; i < KMP_MAX_DISP_BUF; ++i)
7918 team -> t.t_disp_buffer[ i ].buffer_index = i;
7920 team -> t.t_disp_buffer[ 0 ].buffer_index = 0;
7924 KMP_ASSERT( this_thr -> th.th_team == team );
7927 for( f=0 ; f<team->t.t_nproc ; f++ ) {
7928 KMP_DEBUG_ASSERT( team->t.t_threads[f] &&
7929 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc );
7934 __kmp_fork_barrier( gtid, 0 );
7939 __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team )
7941 kmp_info_t *this_thr = __kmp_threads[gtid];
7943 KMP_DEBUG_ASSERT( team );
7944 KMP_DEBUG_ASSERT( this_thr -> th.th_team == team );
7945 KMP_ASSERT( KMP_MASTER_GTID(gtid) );
7951 if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) {
7952 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]);
7953 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n",
7954 gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc);
7955 __kmp_print_structure();
7957 KMP_DEBUG_ASSERT( __kmp_threads[gtid] &&
7958 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc );
7961 __kmp_join_barrier( gtid );
7964 KMP_ASSERT( this_thr -> th.th_team == team );
7971 #ifdef USE_LOAD_BALANCE
7978 __kmp_active_hot_team_nproc( kmp_root_t *root )
7982 kmp_team_t *hot_team;
7984 if ( root->r.r_active ) {
7987 hot_team = root->r.r_hot_team;
7988 if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) {
7989 return hot_team->t.t_nproc - 1;
7996 for ( i = 1; i < hot_team->t.t_nproc; i++ ) {
7997 if ( hot_team->t.t_threads[i]->th.th_active ) {
8009 __kmp_load_balance_nproc( kmp_root_t *root,
int set_nproc )
8013 int hot_team_active;
8014 int team_curr_active;
8017 KB_TRACE( 20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n",
8018 root, set_nproc ) );
8019 KMP_DEBUG_ASSERT( root );
8021 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE );
8023 KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_set_dynamic[0] == TRUE );
8025 KMP_DEBUG_ASSERT( set_nproc > 1 );
8027 if ( set_nproc == 1) {
8028 KB_TRACE( 20, (
"__kmp_load_balance_nproc: serial execution.\n" ) );
8039 pool_active = TCR_4(__kmp_thread_pool_active_nth);
8040 hot_team_active = __kmp_active_hot_team_nproc( root );
8041 team_curr_active = pool_active + hot_team_active + 1;
8046 system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active );
8047 KB_TRACE( 30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n",
8048 system_active, pool_active, hot_team_active ) );
8050 if ( system_active < 0 ) {
8057 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
8058 KMP_WARNING( CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit" );
8063 retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1
8064 : root->r.r_hot_team->t.t_nproc);
8065 if ( retval > set_nproc ) {
8068 if ( retval < KMP_MIN_NTH ) {
8069 retval = KMP_MIN_NTH;
8072 KB_TRACE( 20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) );
8082 if ( system_active < team_curr_active ) {
8083 system_active = team_curr_active;
8085 retval = __kmp_avail_proc - system_active + team_curr_active;
8086 if ( retval > set_nproc ) {
8089 if ( retval < KMP_MIN_NTH ) {
8090 retval = KMP_MIN_NTH;
8093 KB_TRACE( 20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval ) );
8105 __kmp_cleanup(
void )
8109 KA_TRACE( 10, (
"__kmp_cleanup: enter\n" ) );
8111 if (TCR_4(__kmp_init_parallel)) {
8112 #if KMP_HANDLE_SIGNALS
8113 __kmp_remove_signals();
8115 TCW_4(__kmp_init_parallel, FALSE);
8118 if (TCR_4(__kmp_init_middle)) {
8119 #if KMP_OS_WINDOWS || KMP_OS_LINUX
8120 __kmp_affinity_uninitialize();
8122 TCW_4(__kmp_init_middle, FALSE);
8125 KA_TRACE( 10, (
"__kmp_cleanup: go serial cleanup\n" ) );
8127 if (__kmp_init_serial) {
8129 __kmp_runtime_destroy();
8131 __kmp_init_serial = FALSE;
8134 for ( f = 0; f < __kmp_threads_capacity; f++ ) {
8135 if ( __kmp_root[ f ] != NULL ) {
8136 __kmp_free( __kmp_root[ f ] );
8137 __kmp_root[ f ] = NULL;
8140 __kmp_free( __kmp_threads );
8143 __kmp_threads = NULL;
8145 __kmp_threads_capacity = 0;
8147 __kmp_cleanup_user_locks();
8149 #if KMP_OS_LINUX || KMP_OS_WINDOWS
8150 KMP_INTERNAL_FREE( (
void *) __kmp_cpuinfo_file );
8151 __kmp_cpuinfo_file = NULL;
8154 #if KMP_USE_ADAPTIVE_LOCKS
8155 #if KMP_DEBUG_ADAPTIVE_LOCKS
8156 __kmp_print_speculative_stats();
8159 KMP_INTERNAL_FREE( __kmp_nested_nth.nth );
8160 __kmp_nested_nth.nth = NULL;
8161 __kmp_nested_nth.size = 0;
8162 __kmp_nested_nth.used = 0;
8164 __kmp_i18n_catclose();
8166 KA_TRACE( 10, (
"__kmp_cleanup: exit\n" ) );
8173 __kmp_ignore_mppbeg(
void )
8177 if ((env = getenv(
"KMP_IGNORE_MPPBEG" )) != NULL) {
8178 if (__kmp_str_match_false( env ))
8186 __kmp_ignore_mppend(
void )
8190 if ((env = getenv(
"KMP_IGNORE_MPPEND" )) != NULL) {
8191 if (__kmp_str_match_false( env ))
8199 __kmp_internal_begin(
void )
8206 gtid = __kmp_entry_gtid();
8207 root = __kmp_threads[ gtid ] -> th.th_root;
8208 KMP_ASSERT( KMP_UBER_GTID( gtid ));
8210 if( root->r.r_begin )
return;
8211 __kmp_acquire_lock( &root->r.r_begin_lock, gtid );
8212 if( root->r.r_begin ) {
8213 __kmp_release_lock( & root->r.r_begin_lock, gtid );
8217 root -> r.r_begin = TRUE;
8219 __kmp_release_lock( & root->r.r_begin_lock, gtid );
8227 __kmp_user_set_library (
enum library_type arg)
8235 gtid = __kmp_entry_gtid();
8236 thread = __kmp_threads[ gtid ];
8238 root = thread -> th.th_root;
8240 KA_TRACE( 20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial ));
8241 if (root->r.r_in_parallel) {
8242 KMP_WARNING( SetLibraryIncorrectCall );
8247 case library_serial :
8248 thread -> th.th_set_nproc = 0;
8249 set__nproc_p( thread, 1 );
8251 case library_turnaround :
8252 thread -> th.th_set_nproc = 0;
8253 set__nproc_p( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
8255 case library_throughput :
8256 thread -> th.th_set_nproc = 0;
8257 set__nproc_p( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub );
8260 KMP_FATAL( UnknownLibraryType, arg );
8263 __kmp_aux_set_library ( arg );
8267 __kmp_aux_set_stacksize(
size_t arg )
8269 if (! __kmp_init_serial)
8270 __kmp_serial_initialize();
8273 if (arg & (0x1000 - 1)) {
8274 arg &= ~(0x1000 - 1);
8279 __kmp_acquire_bootstrap_lock( &__kmp_initz_lock );
8282 if (! TCR_4(__kmp_init_parallel)) {
8285 if (value < __kmp_sys_min_stksize )
8286 value = __kmp_sys_min_stksize ;
8287 else if (value > KMP_MAX_STKSIZE)
8288 value = KMP_MAX_STKSIZE;
8290 __kmp_stksize = value;
8292 __kmp_env_stksize = TRUE;
8295 __kmp_release_bootstrap_lock( &__kmp_initz_lock );
8301 __kmp_aux_set_library (
enum library_type arg)
8303 __kmp_library = arg;
8305 switch ( __kmp_library ) {
8306 case library_serial :
8308 KMP_INFORM( LibraryIsSerial );
8309 (void) __kmp_change_library( TRUE );
8312 case library_turnaround :
8313 (void) __kmp_change_library( TRUE );
8315 case library_throughput :
8316 (void) __kmp_change_library( FALSE );
8319 KMP_FATAL( UnknownLibraryType, arg );
8327 __kmp_aux_set_blocktime (
int arg, kmp_info_t *thread,
int tid)
8329 int blocktime = arg;
8333 __kmp_save_internal_controls( thread );
8336 if (blocktime < KMP_MIN_BLOCKTIME)
8337 blocktime = KMP_MIN_BLOCKTIME;
8338 else if (blocktime > KMP_MAX_BLOCKTIME)
8339 blocktime = KMP_MAX_BLOCKTIME;
8341 set__blocktime_team( thread -> th.th_team, tid, blocktime );
8342 set__blocktime_team( thread -> th.th_serial_team, 0, blocktime );
8345 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
8347 set__bt_intervals_team( thread -> th.th_team, tid, bt_intervals );
8348 set__bt_intervals_team( thread -> th.th_serial_team, 0, bt_intervals );
8353 set__bt_set_team( thread -> th.th_team, tid, bt_set );
8354 set__bt_set_team( thread -> th.th_serial_team, 0, bt_set );
8355 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, bt_intervals=%d, monitor_updates=%d\n",
8356 __kmp_gtid_from_tid(tid, thread->th.th_team),
8357 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, __kmp_monitor_wakeups ) );
8361 __kmp_aux_set_defaults(
8365 if ( ! __kmp_init_serial ) {
8366 __kmp_serial_initialize();
8368 __kmp_env_initialize( str );
8370 if (__kmp_settings) {
8389 PACKED_REDUCTION_METHOD_T
8390 __kmp_determine_reduction_method(
ident_t *loc, kmp_int32 global_tid,
8391 kmp_int32 num_vars,
size_t reduce_size,
void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
8392 kmp_critical_name *lck )
8400 PACKED_REDUCTION_METHOD_T retval;
8404 KMP_DEBUG_ASSERT( loc );
8405 KMP_DEBUG_ASSERT( lck );
8407 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) )
8408 #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) )
8410 retval = critical_reduce_block;
8412 team_size = __kmp_get_team_num_threads( global_tid );
8414 if( team_size == 1 ) {
8416 retval = empty_reduce_block;
8420 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8421 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8425 #if KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN
8427 #define REDUCTION_TEAMSIZE_CUTOFF 8
8429 #define REDUCTION_TEAMSIZE_CUTOFF 4
8431 if( tree_available ) {
8432 if( team_size <= REDUCTION_TEAMSIZE_CUTOFF ) {
8433 if ( atomic_available ) {
8434 retval = atomic_reduce_block;
8437 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8439 }
else if ( atomic_available ) {
8440 retval = atomic_reduce_block;
8443 #error "Unknown or unsupported OS"
8444 #endif // KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_DARWIN
8448 #if KMP_OS_LINUX || KMP_OS_WINDOWS
8464 if( atomic_available ) {
8465 if( num_vars <= 2 ) {
8466 retval = atomic_reduce_block;
8473 if( atomic_available && ( num_vars <= 3 ) ) {
8474 retval = atomic_reduce_block;
8475 }
else if( tree_available ) {
8476 if( ( reduce_size > ( 9 *
sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 *
sizeof( kmp_real64 ) ) ) ) {
8477 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
8482 #error "Unknown or unsupported OS"
8486 #error "Unknown or unsupported architecture"
8505 if( __kmp_force_reduction_method != reduction_method_not_defined ) {
8507 PACKED_REDUCTION_METHOD_T forced_retval;
8509 int atomic_available, tree_available;
8511 switch( ( forced_retval = __kmp_force_reduction_method ) )
8513 case critical_reduce_block:
8515 if( team_size <= 1 ) {
8516 forced_retval = empty_reduce_block;
8520 case atomic_reduce_block:
8521 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
8522 KMP_ASSERT( atomic_available );
8525 case tree_reduce_block:
8526 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
8527 KMP_ASSERT( tree_available );
8528 #if KMP_FAST_REDUCTION_BARRIER
8529 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
8537 retval = forced_retval;
8540 KA_TRACE(10, (
"reduction method selected=%08x\n", retval ) );
8542 #undef FAST_REDUCTION_TREE_METHOD_GENERATED
8543 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED
8550 __kmp_get_reduce_method(
void ) {
8551 return ( ( __kmp_entry_thread() -> th.th_local.packed_reduction_method ) >> 8 );