49 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
50 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
51 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
54 # define __kmp_static_delay( arg )
58 __kmp_static_delay(
int arg )
61 # if KMP_ARCH_X86_64 && KMP_OS_LINUX
62 KMP_ASSERT( arg != 0 );
64 KMP_ASSERT( arg >= 0 );
70 __kmp_static_yield(
int arg )
75 #ifdef BUILD_TIED_TASK_STACK
87 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data,
int threshold,
char *location )
89 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
90 kmp_taskdata_t **stack_top = task_stack -> ts_top;
91 kmp_int32 entries = task_stack -> ts_entries;
92 kmp_taskdata_t *tied_task;
94 KA_TRACE(threshold, (
"__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
95 "first_block = %p, stack_top = %p \n",
96 location, gtid, entries, task_stack->ts_first_block, stack_top ) );
98 KMP_DEBUG_ASSERT( stack_top != NULL );
99 KMP_DEBUG_ASSERT( entries > 0 );
101 while ( entries != 0 )
103 KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
105 if ( entries & TASK_STACK_INDEX_MASK == 0 )
107 kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
109 stack_block = stack_block -> sb_prev;
110 stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
117 tied_task = * stack_top;
119 KMP_DEBUG_ASSERT( tied_task != NULL );
120 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
122 KA_TRACE(threshold, (
"__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
123 "stack_top=%p, tied_task=%p\n",
124 location, gtid, entries, stack_top, tied_task ) );
126 KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
128 KA_TRACE(threshold, (
"__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
141 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
143 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
144 kmp_stack_block_t *first_block;
147 first_block = & task_stack -> ts_first_block;
148 task_stack -> ts_top = (kmp_taskdata_t **) first_block;
149 memset( (
void *) first_block,
'\0', TASK_STACK_BLOCK_SIZE *
sizeof(kmp_taskdata_t *));
152 task_stack -> ts_entries = TASK_STACK_EMPTY;
153 first_block -> sb_next = NULL;
154 first_block -> sb_prev = NULL;
165 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
167 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
168 kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
170 KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
172 while ( stack_block != NULL ) {
173 kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
175 stack_block -> sb_next = NULL;
176 stack_block -> sb_prev = NULL;
177 if (stack_block != & task_stack -> ts_first_block) {
178 __kmp_thread_free( thread, stack_block );
180 stack_block = next_block;
183 task_stack -> ts_entries = 0;
184 task_stack -> ts_top = NULL;
197 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
200 kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
201 tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
202 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
204 if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
208 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
209 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
211 KA_TRACE(20, (
"__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
212 gtid, thread, tied_task ) );
214 * (task_stack -> ts_top) = tied_task;
217 task_stack -> ts_top++;
218 task_stack -> ts_entries++;
220 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
223 kmp_stack_block_t *stack_block =
224 (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
227 if ( stack_block -> sb_next != NULL )
229 task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
233 kmp_stack_block_t *new_block = (kmp_stack_block_t *)
234 __kmp_thread_calloc(thread,
sizeof(kmp_stack_block_t));
236 task_stack -> ts_top = & new_block -> sb_block[0];
237 stack_block -> sb_next = new_block;
238 new_block -> sb_prev = stack_block;
239 new_block -> sb_next = NULL;
241 KA_TRACE(30, (
"__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
242 gtid, tied_task, new_block ) );
245 KA_TRACE(20, (
"__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
258 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
261 kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
262 kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
263 kmp_taskdata_t *tied_task;
265 if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
269 KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
270 KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
272 KA_TRACE(20, (
"__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
275 if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
277 kmp_stack_block_t *stack_block =
278 (kmp_stack_block_t *) (task_stack -> ts_top) ;
280 stack_block = stack_block -> sb_prev;
281 task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
285 task_stack -> ts_top--;
286 task_stack -> ts_entries--;
288 tied_task = * (task_stack -> ts_top );
290 KMP_DEBUG_ASSERT( tied_task != NULL );
291 KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
292 KMP_DEBUG_ASSERT( tied_task == ending_task );
294 KA_TRACE(20, (
"__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
303 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
305 kmp_info_t * thread = __kmp_threads[ gtid ];
306 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
307 kmp_task_team_t * task_team = thread->th.th_task_team;
308 kmp_int32 tid = __kmp_tid_from_gtid( gtid );
309 kmp_thread_data_t * thread_data;
311 KA_TRACE(20, (
"__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
314 if ( taskdata->td_flags.task_serial ) {
315 KA_TRACE(20, (
"__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
317 return TASK_NOT_PUSHED;
321 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
322 if ( ! KMP_TASKING_ENABLED( task_team, thread->th.th_task_state ) ) {
323 __kmp_enable_tasking( task_team, thread );
325 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
326 KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
329 thread_data = & task_team -> tt.tt_threads_data[ tid ];
332 if (thread_data -> td.td_deque == NULL ) {
333 __kmp_alloc_task_deque( thread, thread_data );
337 if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
339 KA_TRACE(20, (
"__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
341 return TASK_NOT_PUSHED;
345 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
348 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
350 thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata;
352 thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
353 TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1);
355 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
357 KA_TRACE(20, (
"__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
358 "task=%p ntasks=%d head=%u tail=%u\n",
359 gtid, taskdata, thread_data->td.td_deque_ntasks,
360 thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
362 return TASK_SUCCESSFULLY_PUSHED;
371 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
373 KF_TRACE( 10, (
"__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
374 "curtask_parent=%p\n",
375 0, this_thr, this_thr -> th.th_current_task,
376 this_thr -> th.th_current_task -> td_parent ) );
378 this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
380 KF_TRACE( 10, (
"__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
381 "curtask_parent=%p\n",
382 0, this_thr, this_thr -> th.th_current_task,
383 this_thr -> th.th_current_task -> td_parent ) );
394 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team,
int tid )
397 KF_TRACE( 10, (
"__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
399 tid, this_thr, this_thr->th.th_current_task,
400 team->t.t_implicit_task_taskdata[tid].td_parent ) );
402 KMP_DEBUG_ASSERT (this_thr != NULL);
405 if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
406 team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
407 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
410 team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
411 this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
414 KF_TRACE( 10, (
"__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
416 tid, this_thr, this_thr->th.th_current_task,
417 team->t.t_implicit_task_taskdata[tid].td_parent ) );
428 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
430 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
431 kmp_info_t * thread = __kmp_threads[ gtid ];
433 KA_TRACE(10, (
"__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
434 gtid, taskdata, current_task) );
436 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
441 current_task -> td_flags.executing = 0;
444 #ifdef BUILD_TIED_TASK_STACK
445 if ( taskdata -> td_flags.tiedness == TASK_TIED )
447 __kmp_push_task_stack( gtid, thread, taskdata );
452 thread -> th.th_current_task = taskdata;
454 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
455 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
456 taskdata -> td_flags.started = 1;
457 taskdata -> td_flags.executing = 1;
458 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
459 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
466 KA_TRACE(10, (
"__kmp_task_start(exit): T#%d task=%p\n",
480 __kmpc_omp_task_begin_if0(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
482 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
483 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
485 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
486 gtid, loc_ref, taskdata, current_task ) );
488 taskdata -> td_flags.task_serial = 1;
489 __kmp_task_start( gtid, task, current_task );
491 KA_TRACE(10, (
"__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
492 gtid, loc_ref, taskdata ) );
503 __kmpc_omp_task_begin(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
505 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
507 KA_TRACE(10, (
"__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
508 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
510 __kmp_task_start( gtid, task, current_task );
512 KA_TRACE(10, (
"__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
513 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
517 #endif // TASK_UNUSED
527 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
529 KA_TRACE(30, (
"__kmp_free_task: T#%d freeing data from task %p\n",
533 KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
534 KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
535 KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
536 KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
537 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
538 KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
540 taskdata->td_flags.freed = 1;
543 __kmp_fast_free( thread, taskdata );
545 __kmp_thread_free( thread, taskdata );
548 KA_TRACE(20, (
"__kmp_free_task: T#%d freed task %p\n",
560 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
562 kmp_int32 children = 0;
563 kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
565 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
567 if ( !team_or_tasking_serialized ) {
568 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
569 KMP_DEBUG_ASSERT( children >= 0 );
573 while ( children == 0 )
575 kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
577 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
578 "and freeing itself\n", gtid, taskdata) );
581 __kmp_free_task( gtid, taskdata, thread );
583 taskdata = parent_taskdata;
587 if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
590 if ( !team_or_tasking_serialized ) {
592 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
593 KMP_DEBUG_ASSERT( children >= 0 );
597 KA_TRACE(20, (
"__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
598 "not freeing it yet\n", gtid, taskdata, children) );
608 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
610 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
611 kmp_info_t * thread = __kmp_threads[ gtid ];
612 kmp_int32 children = 0;
614 KA_TRACE(10, (
"__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
615 gtid, taskdata, resumed_task) );
617 KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
620 #ifdef BUILD_TIED_TASK_STACK
621 if ( taskdata -> td_flags.tiedness == TASK_TIED )
623 __kmp_pop_task_stack( gtid, thread, taskdata );
627 KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
628 KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
629 taskdata -> td_flags.executing = 0;
630 taskdata -> td_flags.complete = 1;
631 KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
632 KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
635 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
637 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
638 KMP_DEBUG_ASSERT( children >= 0 );
640 if ( taskdata->td_taskgroup )
641 KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
645 KA_TRACE(20, (
"__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
646 gtid, taskdata, children) );
650 KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
651 taskdata->td_flags.task_serial);
652 if ( taskdata->td_flags.task_serial )
654 if (resumed_task == NULL) {
655 resumed_task = taskdata->td_parent;
659 KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
663 KMP_DEBUG_ASSERT( resumed_task != NULL );
667 __kmp_free_task_and_ancestors(gtid, taskdata, thread);
669 __kmp_threads[ gtid ] -> th.th_current_task = resumed_task;
673 resumed_task->td_flags.executing = 1;
675 KA_TRACE(10, (
"__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
676 gtid, taskdata, resumed_task) );
688 __kmpc_omp_task_complete_if0(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
690 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
691 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
693 __kmp_task_finish( gtid, task, NULL );
695 KA_TRACE(10, (
"__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
696 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
707 __kmpc_omp_task_complete(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
709 KA_TRACE(10, (
"__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
710 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
712 __kmp_task_finish( gtid, task, NULL );
714 KA_TRACE(10, (
"__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
715 gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
718 #endif // TASK_UNUSED
733 __kmp_init_implicit_task(
ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team,
int tid,
int set_curr_task )
735 kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
737 KF_TRACE(10, (
"__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
738 tid, team, task, set_curr_task ?
"TRUE" :
"FALSE" ) );
740 task->td_task_id = KMP_GEN_TASK_ID();
741 task->td_team = team;
743 task->td_ident = loc_ref;
744 task->td_taskwait_ident = NULL;
745 task->td_taskwait_counter = 0;
746 task->td_taskwait_thread = 0;
748 task->td_flags.tiedness = TASK_TIED;
749 task->td_flags.tasktype = TASK_IMPLICIT;
751 task->td_flags.task_serial = 1;
752 task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
753 task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
755 task->td_flags.started = 1;
756 task->td_flags.executing = 1;
757 task->td_flags.complete = 0;
758 task->td_flags.freed = 0;
761 task->td_incomplete_child_tasks = 0;
762 task->td_allocated_child_tasks = 0;
764 task->td_taskgroup = NULL;
766 __kmp_push_current_task_to_thread( this_thr, team, tid );
768 KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
769 KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
772 KF_TRACE(10, (
"__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
779 __kmp_round_up_to_val(
size_t size,
size_t val ) {
780 if ( size & ( val - 1 ) ) {
781 size &= ~ ( val - 1 );
782 if ( size <= KMP_SIZE_T_MAX - val ) {
803 __kmp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
804 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
805 kmp_routine_entry_t task_entry )
808 kmp_taskdata_t *taskdata;
809 kmp_info_t *thread = __kmp_threads[ gtid ];
810 kmp_team_t *team = thread->th.th_team;
811 kmp_taskdata_t *parent_task = thread->th.th_current_task;
812 size_t shareds_offset;
814 KA_TRACE(10, (
"__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
815 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
816 gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
817 sizeof_shareds, task_entry) );
819 if ( parent_task->td_flags.final ) {
820 if (flags->merged_if0) {
827 shareds_offset =
sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
828 shareds_offset = __kmp_round_up_to_val( shareds_offset,
sizeof(
void * ));
831 KA_TRACE(30, (
"__kmp_task_alloc: T#%d First malloc size: %ld\n",
832 gtid, shareds_offset) );
833 KA_TRACE(30, (
"__kmp_task_alloc: T#%d Second malloc size: %ld\n",
834 gtid, sizeof_shareds) );
838 taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
840 taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
843 task = KMP_TASKDATA_TO_TASK(taskdata);
847 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (
sizeof(
double)-1) ) == 0 );
848 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (
sizeof(
double)-1) ) == 0 );
850 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (
sizeof(_Quad)-1) ) == 0 );
851 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (
sizeof(_Quad)-1) ) == 0 );
853 if (sizeof_shareds > 0) {
855 task->shareds = & ((
char *) taskdata)[ shareds_offset ];
857 KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (
sizeof(
void *)-1) ) == 0 );
859 task->shareds = NULL;
861 task->routine = task_entry;
864 taskdata->td_task_id = KMP_GEN_TASK_ID();
865 taskdata->td_team = team;
866 taskdata->td_alloc_thread = thread;
867 taskdata->td_parent = parent_task;
868 taskdata->td_level = parent_task->td_level + 1;
869 taskdata->td_ident = loc_ref;
870 taskdata->td_taskwait_ident = NULL;
871 taskdata->td_taskwait_counter = 0;
872 taskdata->td_taskwait_thread = 0;
873 KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
874 copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
876 taskdata->td_flags.tiedness = flags->tiedness;
877 taskdata->td_flags.final = flags->final;
878 taskdata->td_flags.merged_if0 = flags->merged_if0;
879 taskdata->td_flags.tasktype = TASK_EXPLICIT;
882 taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
885 taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
890 taskdata->td_flags.task_serial = ( taskdata->td_flags.final
891 || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
893 taskdata->td_flags.started = 0;
894 taskdata->td_flags.executing = 0;
895 taskdata->td_flags.complete = 0;
896 taskdata->td_flags.freed = 0;
898 taskdata->td_flags.native = flags->native;
900 taskdata->td_incomplete_child_tasks = 0;
901 taskdata->td_allocated_child_tasks = 1;
903 taskdata->td_taskgroup = parent_task->td_taskgroup;
906 if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
907 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
909 if ( parent_task->td_taskgroup )
910 KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
913 if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
914 KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
918 KA_TRACE(20, (
"__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
919 gtid, taskdata, taskdata->td_parent) );
926 __kmpc_omp_task_alloc(
ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
927 size_t sizeof_kmp_task_t,
size_t sizeof_shareds,
928 kmp_routine_entry_t task_entry )
931 kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
933 input_flags->native = FALSE;
936 KA_TRACE(10, (
"__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
937 "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
938 gtid, loc_ref, input_flags->tiedness ?
"tied " :
"untied",
939 sizeof_kmp_task_t, sizeof_shareds, task_entry) );
941 retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
942 sizeof_shareds, task_entry );
944 KA_TRACE(20, (
"__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
957 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
959 kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
960 KA_TRACE(30, (
"__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
961 gtid, taskdata, current_task) );
963 __kmp_task_start( gtid, task, current_task );
969 #ifdef KMP_GOMP_COMPAT
970 if (taskdata->td_flags.native) {
971 ((void (*)(
void *))(*(task->routine)))(task->shareds);
976 (*(task->routine))(gtid, task);
979 __kmp_task_finish( gtid, task, current_task );
981 KA_TRACE(30, (
"__kmp_inovke_task(exit): T#%d completed task %p, resuming task %p\n",
982 gtid, taskdata, current_task) );
997 __kmpc_omp_task_parts(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
999 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1001 KA_TRACE(10, (
"__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1002 gtid, loc_ref, new_taskdata ) );
1007 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1009 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1010 new_taskdata->td_flags.task_serial = 1;
1011 __kmp_invoke_task( gtid, new_task, current_task );
1014 KA_TRACE(10, (
"__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1015 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1018 return TASK_CURRENT_NOT_QUEUED;
1033 __kmpc_omp_task(
ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1035 kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1038 KA_TRACE(10, (
"__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1039 gtid, loc_ref, new_taskdata ) );
1044 if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED )
1046 kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1047 new_taskdata -> td_flags.task_serial = 1;
1048 __kmp_invoke_task( gtid, new_task, current_task );
1051 KA_TRACE(10, (
"__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1052 gtid, loc_ref, new_taskdata ) );
1054 return TASK_CURRENT_NOT_QUEUED;
1062 __kmpc_omp_taskwait(
ident_t *loc_ref, kmp_int32 gtid )
1064 kmp_taskdata_t * taskdata;
1065 kmp_info_t * thread;
1066 int thread_finished = FALSE;
1068 KA_TRACE(10, (
"__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1071 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1074 thread = __kmp_threads[ gtid ];
1075 taskdata = thread -> th.th_current_task;
1079 taskdata->td_taskwait_counter += 1;
1080 taskdata->td_taskwait_ident = loc_ref;
1081 taskdata->td_taskwait_thread = gtid + 1;
1084 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1085 if ( itt_sync_obj != NULL )
1086 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1089 if ( ! taskdata->td_flags.team_serial ) {
1091 while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1092 __kmp_execute_tasks( thread, gtid, &(taskdata->td_incomplete_child_tasks),
1093 0, FALSE, &thread_finished,
1097 __kmp_task_stealing_constraint );
1101 if ( itt_sync_obj != NULL )
1102 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1106 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1109 KA_TRACE(10, (
"__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1110 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1112 return TASK_CURRENT_NOT_QUEUED;
1120 __kmpc_omp_taskyield(
ident_t *loc_ref, kmp_int32 gtid,
int end_part )
1122 kmp_taskdata_t * taskdata;
1123 kmp_info_t * thread;
1124 int thread_finished = FALSE;
1126 KA_TRACE(10, (
"__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1127 gtid, loc_ref, end_part) );
1129 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1132 thread = __kmp_threads[ gtid ];
1133 taskdata = thread -> th.th_current_task;
1138 taskdata->td_taskwait_counter += 1;
1139 taskdata->td_taskwait_ident = loc_ref;
1140 taskdata->td_taskwait_thread = gtid + 1;
1143 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1144 if ( itt_sync_obj != NULL )
1145 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1147 if ( ! taskdata->td_flags.team_serial ) {
1148 __kmp_execute_tasks( thread, gtid, NULL, 0, FALSE, &thread_finished,
1152 __kmp_task_stealing_constraint );
1156 if ( itt_sync_obj != NULL )
1157 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1161 taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1164 KA_TRACE(10, (
"__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1165 "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1167 return TASK_CURRENT_NOT_QUEUED;
1176 __kmpc_taskgroup(
ident* loc,
int gtid )
1178 kmp_info_t * thread = __kmp_threads[ gtid ];
1179 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1180 kmp_taskgroup_t * tg_new =
1181 (kmp_taskgroup_t *)__kmp_thread_malloc( thread,
sizeof( kmp_taskgroup_t ) );
1182 KA_TRACE(10, (
"__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1184 tg_new->parent = taskdata->td_taskgroup;
1185 taskdata->td_taskgroup = tg_new;
1194 __kmpc_end_taskgroup(
ident* loc,
int gtid )
1196 kmp_info_t * thread = __kmp_threads[ gtid ];
1197 kmp_taskdata_t * taskdata = thread->th.th_current_task;
1198 kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1199 int thread_finished = FALSE;
1201 KA_TRACE(10, (
"__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1202 KMP_DEBUG_ASSERT( taskgroup != NULL );
1204 if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1207 void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1208 if ( itt_sync_obj != NULL )
1209 __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1212 if ( ! taskdata->td_flags.team_serial ) {
1213 while ( TCR_4(taskgroup->count) != 0 ) {
1214 __kmp_execute_tasks( thread, gtid, &(taskgroup->count),
1215 0, FALSE, &thread_finished,
1219 __kmp_task_stealing_constraint );
1224 if ( itt_sync_obj != NULL )
1225 __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1228 KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1231 taskdata->td_taskgroup = taskgroup->parent;
1232 __kmp_thread_free( thread, taskgroup );
1234 KA_TRACE(10, (
"__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1243 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1244 kmp_int32 is_constrained )
1247 kmp_taskdata_t * taskdata;
1248 kmp_thread_data_t *thread_data;
1251 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1252 KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL );
1254 thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1256 KA_TRACE(10, (
"__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1257 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1258 thread_data->td.td_deque_tail) );
1260 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1261 KA_TRACE(10, (
"__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1262 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1263 thread_data->td.td_deque_tail) );
1267 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1269 if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1270 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1271 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1272 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1273 thread_data->td.td_deque_tail) );
1277 tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;
1278 taskdata = thread_data -> td.td_deque[ tail ];
1280 if (is_constrained) {
1283 kmp_taskdata_t * current = thread->th.th_current_task;
1284 kmp_int32 level = current->td_level;
1285 kmp_taskdata_t * parent = taskdata->td_parent;
1286 while ( parent != current && parent->td_level > level ) {
1287 parent = parent->td_parent;
1288 KMP_DEBUG_ASSERT(parent != NULL);
1290 if ( parent != current ) {
1292 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1293 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1294 gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1295 thread_data->td.td_deque_tail) );
1300 thread_data -> td.td_deque_tail = tail;
1301 TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1303 __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1305 KA_TRACE(10, (
"__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1306 gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1307 thread_data->td.td_deque_tail) );
1309 task = KMP_TASKDATA_TO_TASK( taskdata );
1320 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1321 volatile kmp_uint32 *unfinished_threads,
int *thread_finished,
1322 kmp_int32 is_constrained )
1325 kmp_taskdata_t * taskdata;
1326 kmp_thread_data_t *victim_td, *threads_data;
1327 kmp_int32 victim_tid, thread_tid;
1329 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1331 threads_data = task_team -> tt.tt_threads_data;
1332 KMP_DEBUG_ASSERT( threads_data != NULL );
1334 victim_tid = victim->th.th_info.ds.ds_tid;
1335 victim_td = & threads_data[ victim_tid ];
1337 KA_TRACE(10, (
"__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1338 "head=%u tail=%u\n",
1339 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1340 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1342 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1343 (TCR_PTR(victim->th.th_task_team) != task_team))
1345 KA_TRACE(10, (
"__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1346 "ntasks=%d head=%u tail=%u\n",
1347 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1348 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1352 __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1355 if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1356 (TCR_PTR(victim->th.th_task_team) != task_team))
1358 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1359 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1360 "ntasks=%d head=%u tail=%u\n",
1361 gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1362 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1366 KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1368 if ( !is_constrained ) {
1369 taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1371 victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1374 kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK;
1375 taskdata = victim_td -> td.td_deque[ tail ];
1378 kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1379 kmp_int32 level = current->td_level;
1380 kmp_taskdata_t * parent = taskdata->td_parent;
1381 while ( parent != current && parent->td_level > level ) {
1382 parent = parent->td_parent;
1383 KMP_DEBUG_ASSERT(parent != NULL);
1385 if ( parent != current ) {
1387 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1388 KA_TRACE(10, (
"__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1389 "ntasks=%d head=%u tail=%u\n",
1390 gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1391 task_team, victim_td->td.td_deque_ntasks,
1392 victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1395 victim_td -> td.td_deque_tail = tail;
1397 if (*thread_finished) {
1401 kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1403 KA_TRACE(20, (
"__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1404 gtid, count + 1, task_team) );
1406 *thread_finished = FALSE;
1408 TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1410 __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1412 KA_TRACE(10, (
"__kmp_steal_task(exit #3): T#%d stole task %p from T#d: task_team=%p "
1413 "ntasks=%d head=%u tail=%u\n",
1414 gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1415 victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1416 victim_td->td.td_deque_tail) );
1418 task = KMP_TASKDATA_TO_TASK( taskdata );
1434 __kmp_execute_tasks( kmp_info_t *thread,
1436 volatile kmp_uint *spinner,
1439 int *thread_finished,
1441 void * itt_sync_obj,
1443 kmp_int32 is_constrained )
1445 kmp_task_team_t * task_team;
1447 kmp_thread_data_t * threads_data;
1449 kmp_taskdata_t * current_task = thread -> th.th_current_task;
1450 volatile kmp_uint32 * unfinished_threads;
1451 kmp_int32 nthreads, last_stolen, k, tid;
1453 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1454 KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1456 task_team = thread -> th.th_task_team;
1457 KMP_DEBUG_ASSERT( task_team != NULL );
1459 KA_TRACE(15, (
"__kmp_execute_tasks(enter): T#%d final_spin=%d *thread_finished=%d\n",
1460 gtid, final_spin, *thread_finished) );
1462 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1463 KMP_DEBUG_ASSERT( threads_data != NULL );
1465 nthreads = task_team -> tt.tt_nproc;
1466 unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1467 KMP_DEBUG_ASSERT( nthreads > 1 );
1468 KMP_DEBUG_ASSERT( TCR_4((
int)*unfinished_threads) >= 0 );
1472 while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1473 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1474 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1475 if ( itt_sync_obj == NULL ) {
1477 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1479 __kmp_itt_task_starting( itt_sync_obj );
1482 __kmp_invoke_task( gtid, task, current_task );
1484 if ( itt_sync_obj != NULL )
1485 __kmp_itt_task_finished( itt_sync_obj );
1493 if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1494 KA_TRACE(15, (
"__kmp_execute_tasks(exit #1): T#%d spin condition satisfied\n", gtid) );
1497 KMP_YIELD( __kmp_library == library_throughput );
1506 if (! *thread_finished) {
1507 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1508 KA_TRACE(20, (
"__kmp_execute_tasks(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
1509 gtid, count, task_team) );
1510 *thread_finished = TRUE;
1518 if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1519 KA_TRACE(15, (
"__kmp_execute_tasks(exit #2): T#%d spin condition satisfied\n", gtid) );
1525 tid = thread -> th.th_info.ds.ds_tid;
1526 last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1528 if (last_stolen != -1) {
1529 kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1531 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1532 thread_finished, is_constrained )) != NULL)
1534 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1535 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1536 if ( itt_sync_obj == NULL ) {
1538 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1540 __kmp_itt_task_starting( itt_sync_obj );
1543 __kmp_invoke_task( gtid, task, current_task );
1545 if ( itt_sync_obj != NULL )
1546 __kmp_itt_task_finished( itt_sync_obj );
1550 if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1551 KA_TRACE(15, (
"__kmp_execute_tasks(exit #3): T#%d spin condition satisfied\n",
1556 KMP_YIELD( __kmp_library == library_throughput );
1559 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1560 KA_TRACE(20, (
"__kmp_execute_tasks: T#%d stolen task spawned other tasks, restart\n",
1567 threads_data[ tid ].td.td_deque_last_stolen = -1;
1575 if (! *thread_finished) {
1576 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1577 KA_TRACE(20, (
"__kmp_execute_tasks(dec #2): T#%d dec unfinished_threads to %d "
1578 "task_team=%p\n", gtid, count, task_team) );
1579 *thread_finished = TRUE;
1588 if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1589 KA_TRACE(15, (
"__kmp_execute_tasks(exit #4): T#%d spin condition satisfied\n",
1602 k = __kmp_get_random( thread ) % (nthreads - 1);
1603 if ( k >= thread -> th.th_info.ds.ds_tid ) {
1607 kmp_info_t *other_thread = threads_data[k].td.td_thr;
1617 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1618 (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1619 (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1621 __kmp_resume( __kmp_gtid_from_thread( other_thread ), NULL );
1633 while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1634 thread_finished, is_constrained )) != NULL)
1636 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1637 if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1638 if ( itt_sync_obj == NULL ) {
1640 itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1642 __kmp_itt_task_starting( itt_sync_obj );
1645 __kmp_invoke_task( gtid, task, current_task );
1647 if ( itt_sync_obj != NULL )
1648 __kmp_itt_task_finished( itt_sync_obj );
1653 threads_data[ tid ].td.td_deque_last_stolen = k;
1658 if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1659 KA_TRACE(15, (
"__kmp_execute_tasks(exit #5): T#%d spin condition satisfied\n",
1663 KMP_YIELD( __kmp_library == library_throughput );
1667 if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1668 KA_TRACE(20, (
"__kmp_execute_tasks: T#%d stolen task spawned other tasks, restart\n",
1683 if (! *thread_finished) {
1684 kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1685 KA_TRACE(20, (
"__kmp_execute_tasks(dec #3): T#%d dec unfinished_threads to %d; "
1687 gtid, count, task_team) );
1688 *thread_finished = TRUE;
1697 if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1698 KA_TRACE(15, (
"__kmp_execute_tasks(exit #6): T#%d spin condition satisfied\n",
1705 KA_TRACE(15, (
"__kmp_execute_tasks(exit #7): T#%d can't find work\n", gtid) );
1716 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
1718 kmp_team_t *team = this_thr->th.th_team;
1719 kmp_thread_data_t *threads_data;
1720 int nthreads, i, is_init_thread;
1722 KA_TRACE( 10, (
"__kmp_enable_tasking(enter): T#%d\n",
1723 __kmp_gtid_from_thread( this_thr ) ) );
1725 KMP_DEBUG_ASSERT(task_team != NULL);
1726 KMP_DEBUG_ASSERT(team != NULL);
1728 nthreads = task_team->tt.tt_nproc;
1729 KMP_DEBUG_ASSERT(nthreads > 0);
1730 KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
1733 is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
1735 if (!is_init_thread) {
1737 KA_TRACE( 20, (
"__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
1738 __kmp_gtid_from_thread( this_thr ) ) );
1741 threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1742 KMP_DEBUG_ASSERT( threads_data != NULL );
1744 if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1745 ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
1750 for (i = 0; i < nthreads; i++) {
1751 volatile kmp_uint *sleep_loc;
1752 kmp_info_t *thread = threads_data[i].td.td_thr;
1754 if (i == this_thr->th.th_info.ds.ds_tid) {
1764 if ( ( sleep_loc = (
volatile kmp_uint *)
1765 TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
1767 KF_TRACE( 50, (
"__kmp_enable_tasking: T#%d waking up thread T#%d\n",
1768 __kmp_gtid_from_thread( this_thr ),
1769 __kmp_gtid_from_thread( thread ) ) );
1770 __kmp_resume( __kmp_gtid_from_thread( thread ), sleep_loc );
1773 KF_TRACE( 50, (
"__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
1774 __kmp_gtid_from_thread( this_thr ),
1775 __kmp_gtid_from_thread( thread ) ) );
1780 KA_TRACE( 10, (
"__kmp_enable_tasking(exit): T#%d\n",
1781 __kmp_gtid_from_thread( this_thr ) ) );
1820 static kmp_task_team_t *__kmp_free_task_teams = NULL;
1822 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
1834 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
1836 __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
1837 KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
1840 thread_data -> td.td_deque_last_stolen = -1;
1842 KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
1843 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
1844 KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
1846 KE_TRACE( 10, (
"__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
1847 __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
1851 thread_data -> td.td_deque = (kmp_taskdata_t **)
1852 __kmp_allocate( TASK_DEQUE_SIZE *
sizeof(kmp_taskdata_t *));
1862 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
1864 __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1866 if ( thread_data -> td.td_deque != NULL ) {
1867 TCW_4(thread_data -> td.td_deque_ntasks, 0);
1868 __kmp_free( thread_data -> td.td_deque );
1869 thread_data -> td.td_deque = NULL;
1871 __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1873 #ifdef BUILD_TIED_TASK_STACK
1875 if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
1876 __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
1878 #endif // BUILD_TIED_TASK_STACK
1892 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
1894 kmp_thread_data_t ** threads_data_p;
1895 kmp_int32 nthreads, maxthreads;
1896 int is_init_thread = FALSE;
1898 if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
1903 threads_data_p = & task_team -> tt.tt_threads_data;
1904 nthreads = task_team -> tt.tt_nproc;
1905 maxthreads = task_team -> tt.tt_max_threads;
1909 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
1911 if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
1913 kmp_team_t *team = thread -> th.th_team;
1916 is_init_thread = TRUE;
1917 if ( maxthreads < nthreads ) {
1919 if ( *threads_data_p != NULL ) {
1920 kmp_thread_data_t *old_data = *threads_data_p;
1921 kmp_thread_data_t *new_data = NULL;
1923 KE_TRACE( 10, (
"__kmp_realloc_task_threads_data: T#%d reallocating "
1924 "threads data for task_team %p, new_size = %d, old_size = %d\n",
1925 __kmp_gtid_from_thread( thread ), task_team,
1926 nthreads, maxthreads ) );
1931 new_data = (kmp_thread_data_t *)
1932 __kmp_allocate( nthreads *
sizeof(kmp_thread_data_t) );
1934 memcpy( (
void *) new_data, (
void *) old_data,
1935 maxthreads *
sizeof(kmp_taskdata_t *) );
1937 #ifdef BUILD_TIED_TASK_STACK
1939 for (i = maxthreads; i < nthreads; i++) {
1940 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1941 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
1943 #endif // BUILD_TIED_TASK_STACK
1945 (*threads_data_p) = new_data;
1946 __kmp_free( old_data );
1949 KE_TRACE( 10, (
"__kmp_realloc_task_threads_data: T#%d allocating "
1950 "threads data for task_team %p, size = %d\n",
1951 __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
1955 *threads_data_p = (kmp_thread_data_t *)
1956 __kmp_allocate( nthreads *
sizeof(kmp_thread_data_t) );
1957 #ifdef BUILD_TIED_TASK_STACK
1959 for (i = 0; i < nthreads; i++) {
1960 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1961 __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
1963 #endif // BUILD_TIED_TASK_STACK
1965 task_team -> tt.tt_max_threads = nthreads;
1969 KMP_DEBUG_ASSERT( *threads_data_p != NULL );
1973 for (i = 0; i < nthreads; i++) {
1974 kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1975 thread_data -> td.td_thr = team -> t.t_threads[i];
1977 if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
1981 thread_data -> td.td_deque_last_stolen = -1;
1986 TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
1989 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
1990 return is_init_thread;
2000 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2002 __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2003 if ( task_team -> tt.tt_threads_data != NULL ) {
2005 for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2006 __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2008 __kmp_free( task_team -> tt.tt_threads_data );
2009 task_team -> tt.tt_threads_data = NULL;
2011 __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2020 static kmp_task_team_t *
2021 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2023 kmp_task_team_t *task_team = NULL;
2026 KA_TRACE( 20, (
"__kmp_allocate_task_team: T#%d entering; team = %p\n",
2027 (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2029 if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2031 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2032 if (__kmp_free_task_teams != NULL) {
2033 task_team = __kmp_free_task_teams;
2034 TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2035 task_team -> tt.tt_next = NULL;
2037 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2040 if (task_team == NULL) {
2041 KE_TRACE( 10, (
"__kmp_allocate_task_team: T#%d allocating "
2042 "task team for team %p\n",
2043 __kmp_gtid_from_thread( thread ), team ) );
2047 task_team = (kmp_task_team_t *) __kmp_allocate(
sizeof(kmp_task_team_t) );
2048 __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2054 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2055 task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2057 task_team -> tt.tt_state = 0;
2058 TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2059 TCW_4( task_team -> tt.tt_active, TRUE );
2060 TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2062 KA_TRACE( 20, (
"__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2063 (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2075 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2077 KA_TRACE( 20, (
"__kmp_free_task_team: T#%d task_team = %p\n",
2078 thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2080 KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2083 __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2085 KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2086 task_team -> tt.tt_next = __kmp_free_task_teams;
2087 TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2088 TCW_PTR(__kmp_free_task_teams, task_team);
2090 __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2101 __kmp_reap_task_teams(
void )
2103 kmp_task_team_t *task_team;
2105 if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2107 __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2108 while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2109 __kmp_free_task_teams = task_team -> tt.tt_next;
2110 task_team -> tt.tt_next = NULL;
2113 if ( task_team -> tt.tt_threads_data != NULL ) {
2114 __kmp_free_task_threads_data( task_team );
2116 __kmp_free( task_team );
2118 __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2130 __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2134 ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2136 KA_TRACE( 20, (
"__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2137 __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2140 if ( ref_ct == 0 ) {
2141 __kmp_free_task_team( thread, task_team );
2144 TCW_PTR( *((
volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2154 __kmp_wait_to_unref_task_teams(
void)
2160 KMP_INIT_YIELD( spins );
2170 for (thread = (kmp_info_t *)__kmp_thread_pool;
2172 thread = thread->th.th_next_pool)
2174 volatile kmp_uint *sleep_loc;
2178 if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2179 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2180 __kmp_gtid_from_thread( thread ) ) );
2185 if (!__kmp_is_thread_alive(thread, &exit_val)) {
2186 if (TCR_PTR(thread->th.th_task_team) != NULL) {
2187 __kmp_unref_task_team( thread->th.th_task_team, thread );
2195 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2196 __kmp_gtid_from_thread( thread ) ) );
2198 if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2200 if ( ( sleep_loc = (
volatile kmp_uint *) TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2201 KA_TRACE( 10, (
"__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2202 __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2203 __kmp_resume( __kmp_gtid_from_thread( thread ), sleep_loc );
2214 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2215 KMP_YIELD_SPIN( spins );
2228 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team )
2230 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2232 if ( ( team->t.t_task_team == NULL ) && ( team->t.t_nproc > 1 ) ) {
2240 team->t.t_task_team = __kmp_allocate_task_team( this_thr, team );
2241 KA_TRACE( 20, (
"__kmp_task_team_setup: Master T#%d created new "
2242 "task_team %p for team %d\n",
2243 __kmp_gtid_from_thread( this_thr ), team->t.t_task_team,
2244 ((team != NULL) ? team->t.t_id : -1)) );
2252 if ( team->t.t_task_team != NULL ) {
2255 team->t.t_task_team->tt.tt_state = 1 - this_thr->th.th_task_state;
2266 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2268 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2272 if ( this_thr->th.th_task_team != NULL ) {
2273 if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2274 KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2275 __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
2280 KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
2288 TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team);
2289 if ( this_thr->th.th_task_team != NULL ) {
2296 this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2297 KMP_DEBUG_ASSERT( this_thr->th.th_task_state == TCR_4(team->t.t_task_team->tt.tt_state) );
2299 KA_TRACE( 20, (
"__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2300 __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2301 this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2311 __kmp_task_team_wait( kmp_info_t *this_thr,
2314 ,
void * itt_sync_obj
2318 kmp_task_team_t *task_team = team->t.t_task_team;
2320 KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2321 KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2323 if ( ( task_team != NULL ) && KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
2324 KA_TRACE( 20, (
"__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2325 __kmp_gtid_from_thread( this_thr ), task_team ) );
2333 __kmp_wait_sleep( this_thr, &task_team->tt.tt_unfinished_threads, 0, TRUE
2345 KA_TRACE( 20, (
"__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2346 __kmp_gtid_from_thread( this_thr ), task_team ) );
2347 KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2348 TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2351 TCW_PTR(this_thr->th.th_task_team, NULL);
2352 team->t.t_task_team = NULL;
2366 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread,
int gtid )
2368 volatile kmp_uint32 *spin = &team->t.t_task_team->tt.tt_unfinished_threads;
2370 KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2373 KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2375 while (! __kmp_execute_tasks( thread, gtid, spin, 0, TRUE, &flag, NULL ) ) {
2378 KMP_FSYNC_SPIN_PREPARE( spin );
2381 if( TCR_4(__kmp_global.g.g_done) ) {
2382 if( __kmp_global.g.g_abort )
2383 __kmp_abort_thread( );
2389 KMP_FSYNC_SPIN_ACQUIRED( (
void*) spin );
2393 #endif // OMP_30_ENABLED