Intel® OpenMP* Runtime Library
 All Classes Functions Variables Typedefs Enumerations Enumerator Groups Pages
kmp_tasking.c
1 /*
2  * kmp_tasking.c -- OpenMP 3.0 tasking support.
3  * $Revision: 42489 $
4  * $Date: 2013-07-08 11:00:09 -0500 (Mon, 08 Jul 2013) $
5  */
6 
7 /* <copyright>
8  Copyright (c) 1997-2013 Intel Corporation. All Rights Reserved.
9 
10  Redistribution and use in source and binary forms, with or without
11  modification, are permitted provided that the following conditions
12  are met:
13 
14  * Redistributions of source code must retain the above copyright
15  notice, this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright
17  notice, this list of conditions and the following disclaimer in the
18  documentation and/or other materials provided with the distribution.
19  * Neither the name of Intel Corporation nor the names of its
20  contributors may be used to endorse or promote products derived
21  from this software without specific prior written permission.
22 
23  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
29  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
30  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
33  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 
35 </copyright> */
36 
37 #include "kmp.h"
38 #include "kmp_i18n.h"
39 #include "kmp_itt.h"
40 
41 
42 #if OMP_30_ENABLED
43 
44 /* ------------------------------------------------------------------------ */
45 /* ------------------------------------------------------------------------ */
46 
47 
48 /* forward declaration */
49 static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr );
50 static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data );
51 static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team );
52 
53 #ifndef KMP_DEBUG
54 # define __kmp_static_delay( arg ) /* nothing to do */
55 #else
56 
57 static void
58 __kmp_static_delay( int arg )
59 {
60 /* Work around weird code-gen bug that causes assert to trip */
61 # if KMP_ARCH_X86_64 && KMP_OS_LINUX
62  KMP_ASSERT( arg != 0 );
63 # else
64  KMP_ASSERT( arg >= 0 );
65 # endif
66 }
67 #endif /* KMP_DEBUG */
68 
69 static void
70 __kmp_static_yield( int arg )
71 {
72  __kmp_yield( arg );
73 }
74 
75 #ifdef BUILD_TIED_TASK_STACK
76 
77 //---------------------------------------------------------------------------
78 // __kmp_trace_task_stack: print the tied tasks from the task stack in order
79 // from top do bottom
80 //
81 // gtid: global thread identifier for thread containing stack
82 // thread_data: thread data for task team thread containing stack
83 // threshold: value above which the trace statement triggers
84 // location: string identifying call site of this function (for trace)
85 
86 static void
87 __kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location )
88 {
89  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
90  kmp_taskdata_t **stack_top = task_stack -> ts_top;
91  kmp_int32 entries = task_stack -> ts_entries;
92  kmp_taskdata_t *tied_task;
93 
94  KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, "
95  "first_block = %p, stack_top = %p \n",
96  location, gtid, entries, task_stack->ts_first_block, stack_top ) );
97 
98  KMP_DEBUG_ASSERT( stack_top != NULL );
99  KMP_DEBUG_ASSERT( entries > 0 );
100 
101  while ( entries != 0 )
102  {
103  KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] );
104  // fix up ts_top if we need to pop from previous block
105  if ( entries & TASK_STACK_INDEX_MASK == 0 )
106  {
107  kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ;
108 
109  stack_block = stack_block -> sb_prev;
110  stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
111  }
112 
113  // finish bookkeeping
114  stack_top--;
115  entries--;
116 
117  tied_task = * stack_top;
118 
119  KMP_DEBUG_ASSERT( tied_task != NULL );
120  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
121 
122  KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, "
123  "stack_top=%p, tied_task=%p\n",
124  location, gtid, entries, stack_top, tied_task ) );
125  }
126  KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] );
127 
128  KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n",
129  location, gtid ) );
130 }
131 
132 //---------------------------------------------------------------------------
133 // __kmp_init_task_stack: initialize the task stack for the first time
134 // after a thread_data structure is created.
135 // It should not be necessary to do this again (assuming the stack works).
136 //
137 // gtid: global thread identifier of calling thread
138 // thread_data: thread data for task team thread containing stack
139 
140 static void
141 __kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
142 {
143  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
144  kmp_stack_block_t *first_block;
145 
146  // set up the first block of the stack
147  first_block = & task_stack -> ts_first_block;
148  task_stack -> ts_top = (kmp_taskdata_t **) first_block;
149  memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *));
150 
151  // initialize the stack to be empty
152  task_stack -> ts_entries = TASK_STACK_EMPTY;
153  first_block -> sb_next = NULL;
154  first_block -> sb_prev = NULL;
155 }
156 
157 
158 //---------------------------------------------------------------------------
159 // __kmp_free_task_stack: free the task stack when thread_data is destroyed.
160 //
161 // gtid: global thread identifier for calling thread
162 // thread_data: thread info for thread containing stack
163 
164 static void
165 __kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data )
166 {
167  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks;
168  kmp_stack_block_t *stack_block = & task_stack -> ts_first_block;
169 
170  KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY );
171  // free from the second block of the stack
172  while ( stack_block != NULL ) {
173  kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL;
174 
175  stack_block -> sb_next = NULL;
176  stack_block -> sb_prev = NULL;
177  if (stack_block != & task_stack -> ts_first_block) {
178  __kmp_thread_free( thread, stack_block ); // free the block, if not the first
179  }
180  stack_block = next_block;
181  }
182  // initialize the stack to be empty
183  task_stack -> ts_entries = 0;
184  task_stack -> ts_top = NULL;
185 }
186 
187 
188 //---------------------------------------------------------------------------
189 // __kmp_push_task_stack: Push the tied task onto the task stack.
190 // Grow the stack if necessary by allocating another block.
191 //
192 // gtid: global thread identifier for calling thread
193 // thread: thread info for thread containing stack
194 // tied_task: the task to push on the stack
195 
196 static void
197 __kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task )
198 {
199  // GEH - need to consider what to do if tt_threads_data not allocated yet
200  kmp_thread_data_t *thread_data = & thread -> th.th_task_team ->
201  tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
202  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
203 
204  if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) {
205  return; // Don't push anything on stack if team or team tasks are serialized
206  }
207 
208  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
209  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
210 
211  KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n",
212  gtid, thread, tied_task ) );
213  // Store entry
214  * (task_stack -> ts_top) = tied_task;
215 
216  // Do bookkeeping for next push
217  task_stack -> ts_top++;
218  task_stack -> ts_entries++;
219 
220  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
221  {
222  // Find beginning of this task block
223  kmp_stack_block_t *stack_block =
224  (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE);
225 
226  // Check if we already have a block
227  if ( stack_block -> sb_next != NULL )
228  { // reset ts_top to beginning of next block
229  task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0];
230  }
231  else
232  { // Alloc new block and link it up
233  kmp_stack_block_t *new_block = (kmp_stack_block_t *)
234  __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t));
235 
236  task_stack -> ts_top = & new_block -> sb_block[0];
237  stack_block -> sb_next = new_block;
238  new_block -> sb_prev = stack_block;
239  new_block -> sb_next = NULL;
240 
241  KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n",
242  gtid, tied_task, new_block ) );
243  }
244  }
245  KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
246 }
247 
248 //---------------------------------------------------------------------------
249 // __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return
250 // the task, just check to make sure it matches the ending task passed in.
251 //
252 // gtid: global thread identifier for the calling thread
253 // thread: thread info structure containing stack
254 // tied_task: the task popped off the stack
255 // ending_task: the task that is ending (should match popped task)
256 
257 static void
258 __kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task )
259 {
260  // GEH - need to consider what to do if tt_threads_data not allocated yet
261  kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
262  kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ;
263  kmp_taskdata_t *tied_task;
264 
265  if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) {
266  return; // Don't pop anything from stack if team or team tasks are serialized
267  }
268 
269  KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL );
270  KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 );
271 
272  KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) );
273 
274  // fix up ts_top if we need to pop from previous block
275  if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 )
276  {
277  kmp_stack_block_t *stack_block =
278  (kmp_stack_block_t *) (task_stack -> ts_top) ;
279 
280  stack_block = stack_block -> sb_prev;
281  task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE];
282  }
283 
284  // finish bookkeeping
285  task_stack -> ts_top--;
286  task_stack -> ts_entries--;
287 
288  tied_task = * (task_stack -> ts_top );
289 
290  KMP_DEBUG_ASSERT( tied_task != NULL );
291  KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED );
292  KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly
293 
294  KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) );
295  return;
296 }
297 #endif /* BUILD_TIED_TASK_STACK */
298 
299 //---------------------------------------------------
300 // __kmp_push_task: Add a task to the thread's deque
301 
302 static kmp_int32
303 __kmp_push_task(kmp_int32 gtid, kmp_task_t * task )
304 {
305  kmp_info_t * thread = __kmp_threads[ gtid ];
306  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
307  kmp_task_team_t * task_team = thread->th.th_task_team;
308  kmp_int32 tid = __kmp_tid_from_gtid( gtid );
309  kmp_thread_data_t * thread_data;
310 
311  KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) );
312 
313  // The first check avoids building task_team thread data if serialized
314  if ( taskdata->td_flags.task_serial ) {
315  KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n",
316  gtid, taskdata ) );
317  return TASK_NOT_PUSHED;
318  }
319 
320  // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode
321  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
322  if ( ! KMP_TASKING_ENABLED( task_team, thread->th.th_task_state ) ) {
323  __kmp_enable_tasking( task_team, thread );
324  }
325  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE );
326  KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL );
327 
328  // Find tasking deque specific to encountering thread
329  thread_data = & task_team -> tt.tt_threads_data[ tid ];
330 
331  // No lock needed since only owner can allocate
332  if (thread_data -> td.td_deque == NULL ) {
333  __kmp_alloc_task_deque( thread, thread_data );
334  }
335 
336  // Check if deque is full
337  if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE )
338  {
339  KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n",
340  gtid, taskdata ) );
341  return TASK_NOT_PUSHED;
342  }
343 
344  // Lock the deque for the task push operation
345  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
346 
347  // Must have room since no thread can add tasks but calling thread
348  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE );
349 
350  thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata
351  // Wrap index.
352  thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK;
353  TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count
354 
355  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
356 
357  KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
358  "task=%p ntasks=%d head=%u tail=%u\n",
359  gtid, taskdata, thread_data->td.td_deque_ntasks,
360  thread_data->td.td_deque_tail, thread_data->td.td_deque_head) );
361 
362  return TASK_SUCCESSFULLY_PUSHED;
363 }
364 
365 
366 //-----------------------------------------------------------------------------------------
367 // __kmp_pop_current_task_from_thread: set up current task from called thread when team ends
368 // this_thr: thread structure to set current_task in.
369 
370 void
371 __kmp_pop_current_task_from_thread( kmp_info_t *this_thr )
372 {
373  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, "
374  "curtask_parent=%p\n",
375  0, this_thr, this_thr -> th.th_current_task,
376  this_thr -> th.th_current_task -> td_parent ) );
377 
378  this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent;
379 
380  KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, "
381  "curtask_parent=%p\n",
382  0, this_thr, this_thr -> th.th_current_task,
383  this_thr -> th.th_current_task -> td_parent ) );
384 }
385 
386 
387 //---------------------------------------------------------------------------------------
388 // __kmp_push_current_task_to_thread: set up current task in called thread for a new team
389 // this_thr: thread structure to set up
390 // team: team for implicit task data
391 // tid: thread within team to set up
392 
393 void
394 __kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid )
395 {
396  // current task of the thread is a parent of the new just created implicit tasks of new team
397  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p "
398  "parent_task=%p\n",
399  tid, this_thr, this_thr->th.th_current_task,
400  team->t.t_implicit_task_taskdata[tid].td_parent ) );
401 
402  KMP_DEBUG_ASSERT (this_thr != NULL);
403 
404  if( tid == 0 ) {
405  if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) {
406  team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task;
407  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ];
408  }
409  } else {
410  team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent;
411  this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ];
412  }
413 
414  KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p "
415  "parent_task=%p\n",
416  tid, this_thr, this_thr->th.th_current_task,
417  team->t.t_implicit_task_taskdata[tid].td_parent ) );
418 }
419 
420 
421 //----------------------------------------------------------------------
422 // __kmp_task_start: bookkeeping for a task starting execution
423 // GTID: global thread id of calling thread
424 // task: task starting execution
425 // current_task: task suspending
426 
427 static void
428 __kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task )
429 {
430  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
431  kmp_info_t * thread = __kmp_threads[ gtid ];
432 
433  KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n",
434  gtid, taskdata, current_task) );
435 
436  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
437 
438  // mark currently executing task as suspended
439  // TODO: GEH - make sure root team implicit task is initialized properly.
440  // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 );
441  current_task -> td_flags.executing = 0;
442 
443  // Add task to stack if tied
444 #ifdef BUILD_TIED_TASK_STACK
445  if ( taskdata -> td_flags.tiedness == TASK_TIED )
446  {
447  __kmp_push_task_stack( gtid, thread, taskdata );
448  }
449 #endif /* BUILD_TIED_TASK_STACK */
450 
451  // mark starting task as executing and as current task
452  thread -> th.th_current_task = taskdata;
453 
454  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 0 );
455  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 0 );
456  taskdata -> td_flags.started = 1;
457  taskdata -> td_flags.executing = 1;
458  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
459  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
460 
461  // GEH TODO: shouldn't we pass some sort of location identifier here?
462  // APT: yes, we will pass location here.
463  // need to store current thread state (in a thread or taskdata structure)
464  // before setting work_state, otherwise wrong state is set after end of task
465 
466  KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n",
467  gtid, taskdata ) );
468 
469  return;
470 }
471 
472 
473 //----------------------------------------------------------------------
474 // __kmpc_omp_task_begin_if0: report that a given serialized task has started execution
475 // loc_ref: source location information; points to beginning of task block.
476 // gtid: global thread number.
477 // task: task thunk for the started task.
478 
479 void
480 __kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
481 {
482  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
483  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
484 
485  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n",
486  gtid, loc_ref, taskdata, current_task ) );
487 
488  taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred.
489  __kmp_task_start( gtid, task, current_task );
490 
491  KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n",
492  gtid, loc_ref, taskdata ) );
493 
494  return;
495 }
496 
497 #ifdef TASK_UNUSED
498 //----------------------------------------------------------------------
499 // __kmpc_omp_task_begin: report that a given task has started execution
500 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
501 
502 void
503 __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task )
504 {
505  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
506 
507  KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n",
508  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) );
509 
510  __kmp_task_start( gtid, task, current_task );
511 
512  KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n",
513  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
514 
515  return;
516 }
517 #endif // TASK_UNUSED
518 
519 
520 //-------------------------------------------------------------------------------------
521 // __kmp_free_task: free the current task space and the space for shareds
522 // gtid: Global thread ID of calling thread
523 // taskdata: task to free
524 // thread: thread data structure of caller
525 
526 static void
527 __kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
528 {
529  KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n",
530  gtid, taskdata) );
531 
532  // Check to make sure all flags and counters have the correct values
533  KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT );
534  KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 );
535  KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 );
536  KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 );
537  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1);
538  KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 );
539 
540  taskdata->td_flags.freed = 1;
541  // deallocate the taskdata and shared variable blocks associated with this task
542  #if USE_FAST_MEMORY
543  __kmp_fast_free( thread, taskdata );
544  #else /* ! USE_FAST_MEMORY */
545  __kmp_thread_free( thread, taskdata );
546  #endif
547 
548  KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n",
549  gtid, taskdata) );
550 }
551 
552 //-------------------------------------------------------------------------------------
553 // __kmp_free_task_and_ancestors: free the current task and ancestors without children
554 //
555 // gtid: Global thread ID of calling thread
556 // taskdata: task to free
557 // thread: thread data structure of caller
558 
559 static void
560 __kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread )
561 {
562  kmp_int32 children = 0;
563  kmp_int32 team_or_tasking_serialized = taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser;
564 
565  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
566 
567  if ( !team_or_tasking_serialized ) {
568  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
569  KMP_DEBUG_ASSERT( children >= 0 );
570  }
571 
572  // Now, go up the ancestor tree to see if any ancestors can now be freed.
573  while ( children == 0 )
574  {
575  kmp_taskdata_t * parent_taskdata = taskdata -> td_parent;
576 
577  KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete "
578  "and freeing itself\n", gtid, taskdata) );
579 
580  // --- Deallocate my ancestor task ---
581  __kmp_free_task( gtid, taskdata, thread );
582 
583  taskdata = parent_taskdata;
584 
585  // Stop checking ancestors at implicit task or if tasking serialized
586  // instead of walking up ancestor tree to avoid premature deallocation of ancestors.
587  if ( team_or_tasking_serialized || taskdata -> td_flags.tasktype == TASK_IMPLICIT )
588  return;
589 
590  if ( !team_or_tasking_serialized ) {
591  // Predecrement simulated by "- 1" calculation
592  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1;
593  KMP_DEBUG_ASSERT( children >= 0 );
594  }
595  }
596 
597  KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; "
598  "not freeing it yet\n", gtid, taskdata, children) );
599 }
600 
601 //---------------------------------------------------------------------
602 // __kmp_task_finish: bookkeeping to do when a task finishes execution
603 // gtid: global thread ID for calling thread
604 // task: task to be finished
605 // resumed_task: task to be resumed. (may be NULL if task is serialized)
606 
607 static void
608 __kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task )
609 {
610  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
611  kmp_info_t * thread = __kmp_threads[ gtid ];
612  kmp_int32 children = 0;
613 
614  KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n",
615  gtid, taskdata, resumed_task) );
616 
617  KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT );
618 
619  // Pop task from stack if tied
620 #ifdef BUILD_TIED_TASK_STACK
621  if ( taskdata -> td_flags.tiedness == TASK_TIED )
622  {
623  __kmp_pop_task_stack( gtid, thread, taskdata );
624  }
625 #endif /* BUILD_TIED_TASK_STACK */
626 
627  KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 );
628  KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 );
629  taskdata -> td_flags.executing = 0; // suspend the finishing task
630  taskdata -> td_flags.complete = 1; // mark the task as completed
631  KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 );
632  KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 );
633 
634  // Only need to keep track of count if team parallel and tasking not serialized
635  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
636  // Predecrement simulated by "- 1" calculation
637  children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1;
638  KMP_DEBUG_ASSERT( children >= 0 );
639 #if OMP_40_ENABLED
640  if ( taskdata->td_taskgroup )
641  KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) );
642 #endif
643  }
644 
645  KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n",
646  gtid, taskdata, children) );
647 
648  // bookkeeping for resuming task:
649  // GEH - note tasking_ser => task_serial
650  KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) ==
651  taskdata->td_flags.task_serial);
652  if ( taskdata->td_flags.task_serial )
653  {
654  if (resumed_task == NULL) {
655  resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent
656  }
657  else {
658  // verify resumed task passed in points to parent
659  KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent );
660  }
661  }
662  else {
663  KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt
664  }
665 
666  // Free this task and then ancestor tasks if they have no children.
667  __kmp_free_task_and_ancestors(gtid, taskdata, thread);
668 
669  __kmp_threads[ gtid ] -> th.th_current_task = resumed_task; // restore current_task
670 
671  // TODO: GEH - make sure root team implicit task is initialized properly.
672  // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 );
673  resumed_task->td_flags.executing = 1; // resume previous task
674 
675  KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n",
676  gtid, taskdata, resumed_task) );
677 
678  return;
679 }
680 
681 //---------------------------------------------------------------------
682 // __kmpc_omp_task_complete_if0: report that a task has completed execution
683 // loc_ref: source location information; points to end of task block.
684 // gtid: global thread number.
685 // task: task thunk for the completed task.
686 
687 void
688 __kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
689 {
690  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n",
691  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
692 
693  __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume
694 
695  KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n",
696  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
697 
698  return;
699 }
700 
701 #ifdef TASK_UNUSED
702 //---------------------------------------------------------------------
703 // __kmpc_omp_task_complete: report that a task has completed execution
704 // NEVER GENERATED BY COMPILER, DEPRECATED!!!
705 
706 void
707 __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task )
708 {
709  KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n",
710  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
711 
712  __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume
713 
714  KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n",
715  gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) );
716  return;
717 }
718 #endif // TASK_UNUSED
719 
720 
721 //----------------------------------------------------------------------------------------------------
722 // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread
723 //
724 // loc_ref: reference to source location of parallel region
725 // this_thr: thread data structure corresponding to implicit task
726 // team: team for this_thr
727 // tid: thread id of given thread within team
728 // set_curr_task: TRUE if need to push current task to thread
729 // NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere.
730 // TODO: Get better loc_ref. Value passed in may be NULL
731 
732 void
733 __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task )
734 {
735  kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ];
736 
737  KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n",
738  tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) );
739 
740  task->td_task_id = KMP_GEN_TASK_ID();
741  task->td_team = team;
742 // task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger)
743  task->td_ident = loc_ref;
744  task->td_taskwait_ident = NULL;
745  task->td_taskwait_counter = 0;
746  task->td_taskwait_thread = 0;
747 
748  task->td_flags.tiedness = TASK_TIED;
749  task->td_flags.tasktype = TASK_IMPLICIT;
750  // All implicit tasks are executed immediately, not deferred
751  task->td_flags.task_serial = 1;
752  task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
753  task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
754 
755  task->td_flags.started = 1;
756  task->td_flags.executing = 1;
757  task->td_flags.complete = 0;
758  task->td_flags.freed = 0;
759 
760  if (set_curr_task) { // only do this initialization the first time a thread is created
761  task->td_incomplete_child_tasks = 0;
762  task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task
763 #if OMP_40_ENABLED
764  task->td_taskgroup = NULL; // An implicit task does not have taskgroup
765 #endif
766  __kmp_push_current_task_to_thread( this_thr, team, tid );
767  } else {
768  KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0);
769  KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0);
770  }
771 
772  KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n",
773  tid, team, task ) );
774 }
775 
776 // Round up a size to a power of two specified by val
777 // Used to insert padding between structures co-allocated using a single malloc() call
778 static size_t
779 __kmp_round_up_to_val( size_t size, size_t val ) {
780  if ( size & ( val - 1 ) ) {
781  size &= ~ ( val - 1 );
782  if ( size <= KMP_SIZE_T_MAX - val ) {
783  size += val; // Round up if there is no overflow.
784  }; // if
785  }; // if
786  return size;
787 } // __kmp_round_up_to_va
788 
789 
790 //---------------------------------------------------------------------------------
791 // __kmp_task_alloc: Allocate the taskdata and task data structures for a task
792 //
793 // loc_ref: source location information
794 // gtid: global thread number.
795 // flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered.
796 // Converted from kmp_int32 to kmp_tasking_flags_t in routine.
797 // sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task.
798 // sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task.
799 // task_entry: Pointer to task code entry point generated by compiler.
800 // returns: a pointer to the allocated kmp_task_t structure (task).
801 
802 kmp_task_t *
803 __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags,
804  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
805  kmp_routine_entry_t task_entry )
806 {
807  kmp_task_t *task;
808  kmp_taskdata_t *taskdata;
809  kmp_info_t *thread = __kmp_threads[ gtid ];
810  kmp_team_t *team = thread->th.th_team;
811  kmp_taskdata_t *parent_task = thread->th.th_current_task;
812  size_t shareds_offset;
813 
814  KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) "
815  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
816  gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t,
817  sizeof_shareds, task_entry) );
818 
819  if ( parent_task->td_flags.final ) {
820  if (flags->merged_if0) {
821  }
822  flags->final = 1;
823  }
824 
825  // Calculate shared structure offset including padding after kmp_task_t struct
826  // to align pointers in shared struct
827  shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t;
828  shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * ));
829 
830  // Allocate a kmp_taskdata_t block and a kmp_task_t block.
831  KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n",
832  gtid, shareds_offset) );
833  KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n",
834  gtid, sizeof_shareds) );
835 
836  // Avoid double allocation here by combining shareds with taskdata
837  #if USE_FAST_MEMORY
838  taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds );
839  #else /* ! USE_FAST_MEMORY */
840  taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds );
841  #endif /* USE_FAST_MEMORY */
842 
843  task = KMP_TASKDATA_TO_TASK(taskdata);
844 
845  // Make sure task & taskdata are aligned appropriately
846 #if KMP_ARCH_X86
847  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 );
848  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 );
849 #else
850  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 );
851  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 );
852 #endif
853  if (sizeof_shareds > 0) {
854  // Avoid double allocation here by combining shareds with taskdata
855  task->shareds = & ((char *) taskdata)[ shareds_offset ];
856  // Make sure shareds struct is aligned to pointer size
857  KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 );
858  } else {
859  task->shareds = NULL;
860  }
861  task->routine = task_entry;
862  task->part_id = 0; // AC: Always start with 0 part id
863 
864  taskdata->td_task_id = KMP_GEN_TASK_ID();
865  taskdata->td_team = team;
866  taskdata->td_alloc_thread = thread;
867  taskdata->td_parent = parent_task;
868  taskdata->td_level = parent_task->td_level + 1; // increment nesting level
869  taskdata->td_ident = loc_ref;
870  taskdata->td_taskwait_ident = NULL;
871  taskdata->td_taskwait_counter = 0;
872  taskdata->td_taskwait_thread = 0;
873  KMP_DEBUG_ASSERT( taskdata->td_parent != NULL );
874  copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs );
875 
876  taskdata->td_flags.tiedness = flags->tiedness;
877  taskdata->td_flags.final = flags->final;
878  taskdata->td_flags.merged_if0 = flags->merged_if0;
879  taskdata->td_flags.tasktype = TASK_EXPLICIT;
880 
881  // GEH - TODO: fix this to copy parent task's value of tasking_ser flag
882  taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec );
883 
884  // GEH - TODO: fix this to copy parent task's value of team_serial flag
885  taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0;
886 
887  // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region
888  // tasks are not left until program termination to execute. Also, it helps locality to execute
889  // immediately.
890  taskdata->td_flags.task_serial = ( taskdata->td_flags.final
891  || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser );
892 
893  taskdata->td_flags.started = 0;
894  taskdata->td_flags.executing = 0;
895  taskdata->td_flags.complete = 0;
896  taskdata->td_flags.freed = 0;
897 
898  taskdata->td_flags.native = flags->native;
899 
900  taskdata->td_incomplete_child_tasks = 0;
901  taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children
902 #if OMP_40_ENABLED
903  taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task
904 #endif
905  // Only need to keep track of child task counts if team parallel and tasking not serialized
906  if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) {
907  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) );
908 #if OMP_40_ENABLED
909  if ( parent_task->td_taskgroup )
910  KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) );
911 #endif
912  // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated
913  if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) {
914  KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) );
915  }
916  }
917 
918  KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n",
919  gtid, taskdata, taskdata->td_parent) );
920 
921  return task;
922 }
923 
924 
925 kmp_task_t *
926 __kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
927  size_t sizeof_kmp_task_t, size_t sizeof_shareds,
928  kmp_routine_entry_t task_entry )
929 {
930  kmp_task_t *retval;
931  kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags;
932 
933  input_flags->native = FALSE;
934  // __kmp_task_alloc() sets up all other runtime flags
935 
936  KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) "
937  "sizeof_task=%ld sizeof_shared=%ld entry=%p\n",
938  gtid, loc_ref, input_flags->tiedness ? "tied " : "untied",
939  sizeof_kmp_task_t, sizeof_shareds, task_entry) );
940 
941  retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t,
942  sizeof_shareds, task_entry );
943 
944  KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) );
945 
946  return retval;
947 }
948 
949 //-----------------------------------------------------------
950 // __kmp_invoke_task: invoke the specified task
951 //
952 // gtid: global thread ID of caller
953 // task: the task to invoke
954 // current_task: the task to resume after task invokation
955 
956 static void
957 __kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task )
958 {
959  kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task);
960  KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n",
961  gtid, taskdata, current_task) );
962 
963  __kmp_task_start( gtid, task, current_task );
964 
965  //
966  // Invoke the task routine and pass in relevant data.
967  // Thunks generated by gcc take a different argument list.
968  //
969 #ifdef KMP_GOMP_COMPAT
970  if (taskdata->td_flags.native) {
971  ((void (*)(void *))(*(task->routine)))(task->shareds);
972  }
973  else
974 #endif /* KMP_GOMP_COMPAT */
975  {
976  (*(task->routine))(gtid, task);
977  }
978 
979  __kmp_task_finish( gtid, task, current_task );
980 
981  KA_TRACE(30, ("__kmp_inovke_task(exit): T#%d completed task %p, resuming task %p\n",
982  gtid, taskdata, current_task) );
983  return;
984 }
985 
986 //-----------------------------------------------------------------------
987 // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution
988 //
989 // loc_ref: location of original task pragma (ignored)
990 // gtid: Global Thread ID of encountering thread
991 // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task''
992 // Returns:
993 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
994 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
995 
996 kmp_int32
997 __kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
998 {
999  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1000 
1001  KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n",
1002  gtid, loc_ref, new_taskdata ) );
1003 
1004  /* Should we execute the new task or queue it? For now, let's just always try to
1005  queue it. If the queue fills up, then we'll execute it. */
1006 
1007  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1008  { // Execute this task immediately
1009  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1010  new_taskdata->td_flags.task_serial = 1;
1011  __kmp_invoke_task( gtid, new_task, current_task );
1012  }
1013 
1014  KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: "
1015  "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref,
1016  new_taskdata ) );
1017 
1018  return TASK_CURRENT_NOT_QUEUED;
1019 }
1020 
1021 
1022 //---------------------------------------------------------------------
1023 // __kmpc_omp_task: Schedule a non-thread-switchable task for execution
1024 // loc_ref: location of original task pragma (ignored)
1025 // gtid: Global Thread ID of encountering thread
1026 // new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc()
1027 // returns:
1028 //
1029 // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later.
1030 // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later.
1031 
1032 kmp_int32
1033 __kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task)
1034 {
1035  kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task);
1036  kmp_int32 rc;
1037 
1038  KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n",
1039  gtid, loc_ref, new_taskdata ) );
1040 
1041  /* Should we execute the new task or queue it? For now, let's just always try to
1042  queue it. If the queue fills up, then we'll execute it. */
1043 
1044  if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer
1045  { // Execute this task immediately
1046  kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task;
1047  new_taskdata -> td_flags.task_serial = 1;
1048  __kmp_invoke_task( gtid, new_task, current_task );
1049  }
1050 
1051  KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n",
1052  gtid, loc_ref, new_taskdata ) );
1053 
1054  return TASK_CURRENT_NOT_QUEUED;
1055 }
1056 
1057 
1058 //-------------------------------------------------------------------------------------
1059 // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete
1060 
1061 kmp_int32
1062 __kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid )
1063 {
1064  kmp_taskdata_t * taskdata;
1065  kmp_info_t * thread;
1066  int thread_finished = FALSE;
1067 
1068  KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n",
1069  gtid, loc_ref) );
1070 
1071  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1072  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1073 
1074  thread = __kmp_threads[ gtid ];
1075  taskdata = thread -> th.th_current_task;
1076 #if USE_ITT_BUILD
1077  // Note: These values are used by ITT events as well.
1078 #endif /* USE_ITT_BUILD */
1079  taskdata->td_taskwait_counter += 1;
1080  taskdata->td_taskwait_ident = loc_ref;
1081  taskdata->td_taskwait_thread = gtid + 1;
1082 
1083 #if USE_ITT_BUILD
1084  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1085  if ( itt_sync_obj != NULL )
1086  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1087 #endif /* USE_ITT_BUILD */
1088 
1089  if ( ! taskdata->td_flags.team_serial ) {
1090  // GEH: if team serialized, avoid reading the volatile variable below.
1091  while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) {
1092  __kmp_execute_tasks( thread, gtid, &(taskdata->td_incomplete_child_tasks),
1093  0, FALSE, &thread_finished,
1094 #if USE_ITT_BUILD
1095  itt_sync_obj,
1096 #endif /* USE_ITT_BUILD */
1097  __kmp_task_stealing_constraint );
1098  }
1099  }
1100 #if USE_ITT_BUILD
1101  if ( itt_sync_obj != NULL )
1102  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1103 #endif /* USE_ITT_BUILD */
1104 
1105  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1106  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1107  }
1108 
1109  KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, "
1110  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1111 
1112  return TASK_CURRENT_NOT_QUEUED;
1113 }
1114 
1115 
1116 //-------------------------------------------------
1117 // __kmpc_omp_taskyield: switch to a different task
1118 
1119 kmp_int32
1120 __kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part )
1121 {
1122  kmp_taskdata_t * taskdata;
1123  kmp_info_t * thread;
1124  int thread_finished = FALSE;
1125 
1126  KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n",
1127  gtid, loc_ref, end_part) );
1128 
1129  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1130  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait?
1131 
1132  thread = __kmp_threads[ gtid ];
1133  taskdata = thread -> th.th_current_task;
1134  // Should we model this as a task wait or not?
1135 #if USE_ITT_BUILD
1136  // Note: These values are used by ITT events as well.
1137 #endif /* USE_ITT_BUILD */
1138  taskdata->td_taskwait_counter += 1;
1139  taskdata->td_taskwait_ident = loc_ref;
1140  taskdata->td_taskwait_thread = gtid + 1;
1141 
1142 #if USE_ITT_BUILD
1143  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1144  if ( itt_sync_obj != NULL )
1145  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1146 #endif /* USE_ITT_BUILD */
1147  if ( ! taskdata->td_flags.team_serial ) {
1148  __kmp_execute_tasks( thread, gtid, NULL, 0, FALSE, &thread_finished,
1149 #if USE_ITT_BUILD
1150  itt_sync_obj,
1151 #endif /* USE_ITT_BUILD */
1152  __kmp_task_stealing_constraint );
1153  }
1154 
1155 #if USE_ITT_BUILD
1156  if ( itt_sync_obj != NULL )
1157  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1158 #endif /* USE_ITT_BUILD */
1159 
1160  // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait?
1161  taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread;
1162  }
1163 
1164  KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, "
1165  "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) );
1166 
1167  return TASK_CURRENT_NOT_QUEUED;
1168 }
1169 
1170 
1171 #if OMP_40_ENABLED
1172 //-------------------------------------------------------------------------------------
1173 // __kmpc_taskgroup: Start a new taskgroup
1174 
1175 void
1176 __kmpc_taskgroup( ident* loc, int gtid )
1177 {
1178  kmp_info_t * thread = __kmp_threads[ gtid ];
1179  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1180  kmp_taskgroup_t * tg_new =
1181  (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) );
1182  KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) );
1183  tg_new->count = 0;
1184  tg_new->parent = taskdata->td_taskgroup;
1185  taskdata->td_taskgroup = tg_new;
1186 }
1187 
1188 
1189 //-------------------------------------------------------------------------------------
1190 // __kmpc_end_taskgroup: Wait until all tasks generated by the current task
1191 // and its descendants are complete
1192 
1193 void
1194 __kmpc_end_taskgroup( ident* loc, int gtid )
1195 {
1196  kmp_info_t * thread = __kmp_threads[ gtid ];
1197  kmp_taskdata_t * taskdata = thread->th.th_current_task;
1198  kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup;
1199  int thread_finished = FALSE;
1200 
1201  KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) );
1202  KMP_DEBUG_ASSERT( taskgroup != NULL );
1203 
1204  if ( __kmp_tasking_mode != tskm_immediate_exec ) {
1205 #if USE_ITT_BUILD
1206  // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them
1207  void * itt_sync_obj = __kmp_itt_taskwait_object( gtid );
1208  if ( itt_sync_obj != NULL )
1209  __kmp_itt_taskwait_starting( gtid, itt_sync_obj );
1210 #endif /* USE_ITT_BUILD */
1211 
1212  if ( ! taskdata->td_flags.team_serial ) {
1213  while ( TCR_4(taskgroup->count) != 0 ) {
1214  __kmp_execute_tasks( thread, gtid, &(taskgroup->count),
1215  0, FALSE, &thread_finished,
1216 #if USE_ITT_BUILD
1217  itt_sync_obj,
1218 #endif /* USE_ITT_BUILD */
1219  __kmp_task_stealing_constraint );
1220  }
1221  }
1222 
1223 #if USE_ITT_BUILD
1224  if ( itt_sync_obj != NULL )
1225  __kmp_itt_taskwait_finished( gtid, itt_sync_obj );
1226 #endif /* USE_ITT_BUILD */
1227  }
1228  KMP_DEBUG_ASSERT( taskgroup->count == 0 );
1229 
1230  // Restore parent taskgroup for the current task
1231  taskdata->td_taskgroup = taskgroup->parent;
1232  __kmp_thread_free( thread, taskgroup );
1233 
1234  KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) );
1235 }
1236 #endif
1237 
1238 
1239 //------------------------------------------------------
1240 // __kmp_remove_my_task: remove a task from my own deque
1241 
1242 static kmp_task_t *
1243 __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team,
1244  kmp_int32 is_constrained )
1245 {
1246  kmp_task_t * task;
1247  kmp_taskdata_t * taskdata;
1248  kmp_thread_data_t *thread_data;
1249  kmp_uint32 tail;
1250 
1251  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1252  KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition
1253 
1254  thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ];
1255 
1256  KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n",
1257  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1258  thread_data->td.td_deque_tail) );
1259 
1260  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1261  KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1262  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1263  thread_data->td.td_deque_tail) );
1264  return NULL;
1265  }
1266 
1267  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1268 
1269  if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) {
1270  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1271  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1272  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1273  thread_data->td.td_deque_tail) );
1274  return NULL;
1275  }
1276 
1277  tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1278  taskdata = thread_data -> td.td_deque[ tail ];
1279 
1280  if (is_constrained) {
1281  // we need to check if the candidate obeys task scheduling constraint:
1282  // only child of current task can be scheduled
1283  kmp_taskdata_t * current = thread->th.th_current_task;
1284  kmp_int32 level = current->td_level;
1285  kmp_taskdata_t * parent = taskdata->td_parent;
1286  while ( parent != current && parent->td_level > level ) {
1287  parent = parent->td_parent; // check generation up to the level of the current task
1288  KMP_DEBUG_ASSERT(parent != NULL);
1289  }
1290  if ( parent != current ) {
1291  // If the tail task is not a child, then no other childs can appear in the deque.
1292  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1293  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n",
1294  gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1295  thread_data->td.td_deque_tail) );
1296  return NULL;
1297  }
1298  }
1299 
1300  thread_data -> td.td_deque_tail = tail;
1301  TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1);
1302 
1303  __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock );
1304 
1305  KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n",
1306  gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head,
1307  thread_data->td.td_deque_tail) );
1308 
1309  task = KMP_TASKDATA_TO_TASK( taskdata );
1310  return task;
1311 }
1312 
1313 
1314 //-----------------------------------------------------------
1315 // __kmp_steal_task: remove a task from another thread's deque
1316 // Assume that calling thread has already checked existence of
1317 // task_team thread_data before calling this routine.
1318 
1319 static kmp_task_t *
1320 __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team,
1321  volatile kmp_uint32 *unfinished_threads, int *thread_finished,
1322  kmp_int32 is_constrained )
1323 {
1324  kmp_task_t * task;
1325  kmp_taskdata_t * taskdata;
1326  kmp_thread_data_t *victim_td, *threads_data;
1327  kmp_int32 victim_tid, thread_tid;
1328 
1329  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1330 
1331  threads_data = task_team -> tt.tt_threads_data;
1332  KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition
1333 
1334  victim_tid = victim->th.th_info.ds.ds_tid;
1335  victim_td = & threads_data[ victim_tid ];
1336 
1337  KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d "
1338  "head=%u tail=%u\n",
1339  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1340  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1341 
1342  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition
1343  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1344  {
1345  KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p "
1346  "ntasks=%d head=%u tail=%u\n",
1347  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1348  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1349  return NULL;
1350  }
1351 
1352  __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock );
1353 
1354  // Check again after we acquire the lock
1355  if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) ||
1356  (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen?
1357  {
1358  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1359  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1360  "ntasks=%d head=%u tail=%u\n",
1361  gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks,
1362  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1363  return NULL;
1364  }
1365 
1366  KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL );
1367 
1368  if ( !is_constrained ) {
1369  taskdata = victim_td -> td.td_deque[ victim_td -> td.td_deque_head ];
1370  // Bump head pointer and Wrap.
1371  victim_td -> td.td_deque_head = ( victim_td -> td.td_deque_head + 1 ) & TASK_DEQUE_MASK;
1372  } else {
1373  // While we have postponed tasks let's steal from tail of the deque (smaller tasks)
1374  kmp_int32 tail = ( victim_td -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK; // Wrap index.
1375  taskdata = victim_td -> td.td_deque[ tail ];
1376  // we need to check if the candidate obeys task scheduling constraint:
1377  // only child of current task can be scheduled
1378  kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task;
1379  kmp_int32 level = current->td_level;
1380  kmp_taskdata_t * parent = taskdata->td_parent;
1381  while ( parent != current && parent->td_level > level ) {
1382  parent = parent->td_parent; // check generation up to the level of the current task
1383  KMP_DEBUG_ASSERT(parent != NULL);
1384  }
1385  if ( parent != current ) {
1386  // If the tail task is not a child, then no other childs can appear in the deque (?).
1387  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1388  KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p "
1389  "ntasks=%d head=%u tail=%u\n",
1390  gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ),
1391  task_team, victim_td->td.td_deque_ntasks,
1392  victim_td->td.td_deque_head, victim_td->td.td_deque_tail) );
1393  return NULL;
1394  }
1395  victim_td -> td.td_deque_tail = tail;
1396  }
1397  if (*thread_finished) {
1398  // We need to un-mark this victim as a finished victim. This must be done before
1399  // releasing the lock, or else other threads (starting with the master victim)
1400  // might be prematurely released from the barrier!!!
1401  kmp_uint32 count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads );
1402 
1403  KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n",
1404  gtid, count + 1, task_team) );
1405 
1406  *thread_finished = FALSE;
1407  }
1408  TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1);
1409 
1410  __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock );
1411 
1412  KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#d: task_team=%p "
1413  "ntasks=%d head=%u tail=%u\n",
1414  gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team,
1415  victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head,
1416  victim_td->td.td_deque_tail) );
1417 
1418  task = KMP_TASKDATA_TO_TASK( taskdata );
1419  return task;
1420 }
1421 
1422 
1423 //-----------------------------------------------------------------------------
1424 // __kmp_execute_tasks: Choose and execute tasks until either the condition
1425 // is statisfied (return true) or there are none left (return false).
1426 // final_spin is TRUE if this is the spin at the release barrier.
1427 // thread_finished indicates whether the thread is finished executing all
1428 // the tasks it has on its deque, and is at the release barrier.
1429 // spinner is the location on which to spin.
1430 // spinner == NULL means only execute a single task and return.
1431 // checker is the value to check to terminate the spin.
1432 
1433 int
1434 __kmp_execute_tasks( kmp_info_t *thread,
1435  kmp_int32 gtid,
1436  volatile kmp_uint *spinner,
1437  kmp_uint checker,
1438  int final_spin,
1439  int *thread_finished,
1440 #if USE_ITT_BUILD
1441  void * itt_sync_obj,
1442 #endif /* USE_ITT_BUILD */
1443  kmp_int32 is_constrained )
1444 {
1445  kmp_task_team_t * task_team;
1446  kmp_team_t * team;
1447  kmp_thread_data_t * threads_data;
1448  kmp_task_t * task;
1449  kmp_taskdata_t * current_task = thread -> th.th_current_task;
1450  volatile kmp_uint32 * unfinished_threads;
1451  kmp_int32 nthreads, last_stolen, k, tid;
1452 
1453  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
1454  KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] );
1455 
1456  task_team = thread -> th.th_task_team;
1457  KMP_DEBUG_ASSERT( task_team != NULL );
1458 
1459  KA_TRACE(15, ("__kmp_execute_tasks(enter): T#%d final_spin=%d *thread_finished=%d\n",
1460  gtid, final_spin, *thread_finished) );
1461 
1462  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1463  KMP_DEBUG_ASSERT( threads_data != NULL );
1464 
1465  nthreads = task_team -> tt.tt_nproc;
1466  unfinished_threads = &(task_team -> tt.tt_unfinished_threads);
1467  KMP_DEBUG_ASSERT( nthreads > 1 );
1468  KMP_DEBUG_ASSERT( TCR_4((int)*unfinished_threads) >= 0 );
1469 
1470  // Choose tasks from our own work queue.
1471  start:
1472  while (( task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained )) != NULL ) {
1473 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1474  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1475  if ( itt_sync_obj == NULL ) {
1476  // we are at fork barrier where we could not get the object reliably
1477  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1478  }
1479  __kmp_itt_task_starting( itt_sync_obj );
1480  }
1481 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1482  __kmp_invoke_task( gtid, task, current_task );
1483 #if USE_ITT_BUILD
1484  if ( itt_sync_obj != NULL )
1485  __kmp_itt_task_finished( itt_sync_obj );
1486 #endif /* USE_ITT_BUILD */
1487 
1488  // If this thread is only partway through the barrier and the condition
1489  // is met, then return now, so that the barrier gather/release pattern can proceed.
1490  // If this thread is in the last spin loop in the barrier, waiting to be
1491  // released, we know that the termination condition will not be satisified,
1492  // so don't waste any cycles checking it.
1493  if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1494  KA_TRACE(15, ("__kmp_execute_tasks(exit #1): T#%d spin condition satisfied\n", gtid) );
1495  return TRUE;
1496  }
1497  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1498  }
1499 
1500  // This thread's work queue is empty. If we are in the final spin loop
1501  // of the barrier, check and see if the termination condition is satisfied.
1502  if (final_spin) {
1503  // First, decrement the #unfinished threads, if that has not already
1504  // been done. This decrement might be to the spin location, and
1505  // result in the termination condition being satisfied.
1506  if (! *thread_finished) {
1507  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1508  KA_TRACE(20, ("__kmp_execute_tasks(dec #1): T#%d dec unfinished_threads to %d task_team=%p\n",
1509  gtid, count, task_team) );
1510  *thread_finished = TRUE;
1511  }
1512 
1513  // It is now unsafe to reference thread->th.th_team !!!
1514  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1515  // thread to pass through the barrier, where it might reset each thread's
1516  // th.th_team field for the next parallel region.
1517  // If we can steal more work, we know that this has not happened yet.
1518  if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1519  KA_TRACE(15, ("__kmp_execute_tasks(exit #2): T#%d spin condition satisfied\n", gtid) );
1520  return TRUE;
1521  }
1522  }
1523 
1524  // Try to steal from the last place I stole from successfully.
1525  tid = thread -> th.th_info.ds.ds_tid;//__kmp_tid_from_gtid( gtid );
1526  last_stolen = threads_data[ tid ].td.td_deque_last_stolen;
1527 
1528  if (last_stolen != -1) {
1529  kmp_info_t *other_thread = threads_data[last_stolen].td.td_thr;
1530 
1531  while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1532  thread_finished, is_constrained )) != NULL)
1533  {
1534 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1535  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1536  if ( itt_sync_obj == NULL ) {
1537  // we are at fork barrier where we could not get the object reliably
1538  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1539  }
1540  __kmp_itt_task_starting( itt_sync_obj );
1541  }
1542 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1543  __kmp_invoke_task( gtid, task, current_task );
1544 #if USE_ITT_BUILD
1545  if ( itt_sync_obj != NULL )
1546  __kmp_itt_task_finished( itt_sync_obj );
1547 #endif /* USE_ITT_BUILD */
1548 
1549  // Check to see if this thread can proceed.
1550  if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1551  KA_TRACE(15, ("__kmp_execute_tasks(exit #3): T#%d spin condition satisfied\n",
1552  gtid) );
1553  return TRUE;
1554  }
1555 
1556  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1557  // If the execution of the stolen task resulted in more tasks being
1558  // placed on our run queue, then restart the whole process.
1559  if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1560  KA_TRACE(20, ("__kmp_execute_tasks: T#%d stolen task spawned other tasks, restart\n",
1561  gtid) );
1562  goto start;
1563  }
1564  }
1565 
1566  // Don't give priority to stealing from this thread anymore.
1567  threads_data[ tid ].td.td_deque_last_stolen = -1;
1568 
1569  // The victims's work queue is empty. If we are in the final spin loop
1570  // of the barrier, check and see if the termination condition is satisfied.
1571  if (final_spin) {
1572  // First, decrement the #unfinished threads, if that has not already
1573  // been done. This decrement might be to the spin location, and
1574  // result in the termination condition being satisfied.
1575  if (! *thread_finished) {
1576  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1577  KA_TRACE(20, ("__kmp_execute_tasks(dec #2): T#%d dec unfinished_threads to %d "
1578  "task_team=%p\n", gtid, count, task_team) );
1579  *thread_finished = TRUE;
1580  }
1581 
1582  // If __kmp_tasking_mode != tskm_immediate_exec
1583  // then it is now unsafe to reference thread->th.th_team !!!
1584  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1585  // thread to pass through the barrier, where it might reset each thread's
1586  // th.th_team field for the next parallel region.
1587  // If we can steal more work, we know that this has not happened yet.
1588  if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1589  KA_TRACE(15, ("__kmp_execute_tasks(exit #4): T#%d spin condition satisfied\n",
1590  gtid) );
1591  return TRUE;
1592  }
1593  }
1594  }
1595 
1596  // Find a different thread to steal work from. Pick a random thread.
1597  // My initial plan was to cycle through all the threads, and only return
1598  // if we tried to steal from every thread, and failed. Arch says that's
1599  // not such a great idea.
1600  // GEH - need yield code in this loop for throughput library mode?
1601  new_victim:
1602  k = __kmp_get_random( thread ) % (nthreads - 1);
1603  if ( k >= thread -> th.th_info.ds.ds_tid ) {
1604  ++k; // Adjusts random distribution to exclude self
1605  }
1606  {
1607  kmp_info_t *other_thread = threads_data[k].td.td_thr;
1608  int first;
1609 
1610  // There is a slight chance that __kmp_enable_tasking() did not wake up
1611  // all threads waiting at the barrier. If this thread is sleeping, then
1612  // then wake it up. Since we weree going to pay the cache miss penalty
1613  // for referenceing another thread's kmp_info_t struct anyway, the check
1614  // shouldn't cost too much performance at this point.
1615  // In extra barrier mode, tasks do not sleep at the separate tasking
1616  // barrier, so this isn't a problem.
1617  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1618  (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) &&
1619  (TCR_PTR(other_thread->th.th_sleep_loc) != NULL))
1620  {
1621  __kmp_resume( __kmp_gtid_from_thread( other_thread ), NULL );
1622 
1623  // A sleeping thread should not have any tasks on it's queue.
1624  // There is a slight possiblility that it resumes, steals a task from
1625  // another thread, which spawns more tasks, all in the that it takes
1626  // this thread to check => don't write an assertion that the victim's
1627  // queue is empty. Try stealing from a different thread.
1628  goto new_victim;
1629  }
1630 
1631  // Now try to steal work from the selected thread
1632  first = TRUE;
1633  while ((task = __kmp_steal_task( other_thread, gtid, task_team, unfinished_threads,
1634  thread_finished, is_constrained )) != NULL)
1635  {
1636 #if USE_ITT_BUILD && USE_ITT_NOTIFY
1637  if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) {
1638  if ( itt_sync_obj == NULL ) {
1639  // we are at fork barrier where we could not get the object reliably
1640  itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier );
1641  }
1642  __kmp_itt_task_starting( itt_sync_obj );
1643  }
1644 #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
1645  __kmp_invoke_task( gtid, task, current_task );
1646 #if USE_ITT_BUILD
1647  if ( itt_sync_obj != NULL )
1648  __kmp_itt_task_finished( itt_sync_obj );
1649 #endif /* USE_ITT_BUILD */
1650 
1651  // Try stealing from this victim again, in the future.
1652  if (first) {
1653  threads_data[ tid ].td.td_deque_last_stolen = k;
1654  first = FALSE;
1655  }
1656 
1657  // Check to see if this thread can proceed.
1658  if ((spinner == NULL) || ((!final_spin) && (TCR_4(*spinner) == checker))) {
1659  KA_TRACE(15, ("__kmp_execute_tasks(exit #5): T#%d spin condition satisfied\n",
1660  gtid) );
1661  return TRUE;
1662  }
1663  KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task
1664 
1665  // If the execution of the stolen task resulted in more tasks being
1666  // placed on our run queue, then restart the whole process.
1667  if (TCR_4(threads_data[ tid ].td.td_deque_ntasks) != 0) {
1668  KA_TRACE(20, ("__kmp_execute_tasks: T#%d stolen task spawned other tasks, restart\n",
1669  gtid) );
1670  goto start;
1671  }
1672  }
1673 
1674  // The victims's work queue is empty. If we are in the final spin loop
1675  // of the barrier, check and see if the termination condition is satisfied.
1676  // Going on and finding a new victim to steal from is expensive, as it
1677  // involves a lot of cache misses, so we definitely want to re-check the
1678  // termination condition before doing that.
1679  if (final_spin) {
1680  // First, decrement the #unfinished threads, if that has not already
1681  // been done. This decrement might be to the spin location, and
1682  // result in the termination condition being satisfied.
1683  if (! *thread_finished) {
1684  kmp_uint32 count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1;
1685  KA_TRACE(20, ("__kmp_execute_tasks(dec #3): T#%d dec unfinished_threads to %d; "
1686  "task_team=%p\n",
1687  gtid, count, task_team) );
1688  *thread_finished = TRUE;
1689  }
1690 
1691  // If __kmp_tasking_mode != tskm_immediate_exec,
1692  // then it is now unsafe to reference thread->th.th_team !!!
1693  // Decrementing task_team->tt.tt_unfinished_threads can allow the master
1694  // thread to pass through the barrier, where it might reset each thread's
1695  // th.th_team field for the next parallel region.
1696  // If we can steal more work, we know that this has not happened yet.
1697  if ((spinner != NULL) && (TCR_4(*spinner) == checker)) {
1698  KA_TRACE(15, ("__kmp_execute_tasks(exit #6): T#%d spin condition satisfied\n",
1699  gtid) );
1700  return TRUE;
1701  }
1702  }
1703  }
1704 
1705  KA_TRACE(15, ("__kmp_execute_tasks(exit #7): T#%d can't find work\n", gtid) );
1706  return FALSE;
1707 }
1708 
1709 
1710 //-----------------------------------------------------------------------------
1711 // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the
1712 // next barrier so they can assist in executing enqueued tasks.
1713 // First thread in allocates the task team atomically.
1714 
1715 static void
1716 __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr )
1717 {
1718  kmp_team_t *team = this_thr->th.th_team;
1719  kmp_thread_data_t *threads_data;
1720  int nthreads, i, is_init_thread;
1721 
1722  KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n",
1723  __kmp_gtid_from_thread( this_thr ) ) );
1724 
1725  KMP_DEBUG_ASSERT(task_team != NULL);
1726  KMP_DEBUG_ASSERT(team != NULL);
1727 
1728  nthreads = task_team->tt.tt_nproc;
1729  KMP_DEBUG_ASSERT(nthreads > 0);
1730  KMP_DEBUG_ASSERT(nthreads == team->t.t_nproc);
1731 
1732  // Allocate or increase the size of threads_data if necessary
1733  is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team );
1734 
1735  if (!is_init_thread) {
1736  // Some other thread already set up the array.
1737  KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n",
1738  __kmp_gtid_from_thread( this_thr ) ) );
1739  return;
1740  }
1741  threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data);
1742  KMP_DEBUG_ASSERT( threads_data != NULL );
1743 
1744  if ( ( __kmp_tasking_mode == tskm_task_teams ) &&
1745  ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) )
1746  {
1747  // Release any threads sleeping at the barrier, so that they can steal
1748  // tasks and execute them. In extra barrier mode, tasks do not sleep
1749  // at the separate tasking barrier, so this isn't a problem.
1750  for (i = 0; i < nthreads; i++) {
1751  volatile kmp_uint *sleep_loc;
1752  kmp_info_t *thread = threads_data[i].td.td_thr;
1753 
1754  if (i == this_thr->th.th_info.ds.ds_tid) {
1755  continue;
1756  }
1757  // Since we haven't locked the thread's suspend mutex lock at this
1758  // point, there is a small window where a thread might be putting
1759  // itself to sleep, but hasn't set the th_sleep_loc field yet.
1760  // To work around this, __kmp_execute_tasks() periodically checks
1761  // see if other threads are sleeping (using the same random
1762  // mechanism that is used for task stealing) and awakens them if
1763  // they are.
1764  if ( ( sleep_loc = (volatile kmp_uint *)
1765  TCR_PTR( thread -> th.th_sleep_loc) ) != NULL )
1766  {
1767  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n",
1768  __kmp_gtid_from_thread( this_thr ),
1769  __kmp_gtid_from_thread( thread ) ) );
1770  __kmp_resume( __kmp_gtid_from_thread( thread ), sleep_loc );
1771  }
1772  else {
1773  KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n",
1774  __kmp_gtid_from_thread( this_thr ),
1775  __kmp_gtid_from_thread( thread ) ) );
1776  }
1777  }
1778  }
1779 
1780  KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n",
1781  __kmp_gtid_from_thread( this_thr ) ) );
1782 }
1783 
1784 
1785 /* ------------------------------------------------------------------------ */
1786 /*
1787  * Utility routines for "task teams". A task team (kmp_task_t) is kind of
1788  * like a shadow of the kmp_team_t data struct, with a different lifetime.
1789  * After a child * thread checks into a barrier and calls __kmp_release() from
1790  * the particular variant of __kmp_<barrier_kind>_barrier_gather(), it can no
1791  * longer assume that the kmp_team_t structure is intact (at any moment, the
1792  * master thread may exit the barrier code and free the team data structure,
1793  * and return the threads to the thread pool).
1794  *
1795  * This does not work with the the tasking code, as the thread is still
1796  * expected to participate in the execution of any tasks that may have been
1797  * spawned my a member of the team, and the thread still needs access to all
1798  * to each thread in the team, so that it can steal work from it.
1799  *
1800  * Enter the existence of the kmp_task_team_t struct. It employs a reference
1801  * counting mechanims, and is allocated by the master thread before calling
1802  * __kmp_<barrier_kind>_release, and then is release by the last thread to
1803  * exit __kmp_<barrier_kind>_release at the next barrier. I.e. the lifetimes
1804  * of the kmp_task_team_t structs for consecutive barriers can overlap
1805  * (and will, unless the master thread is the last thread to exit the barrier
1806  * release phase, which is not typical).
1807  *
1808  * The existence of such a struct is useful outside the context of tasking,
1809  * but for now, I'm trying to keep it specific to the OMP_30_ENABLED macro,
1810  * so that any performance differences show up when comparing the 2.5 vs. 3.0
1811  * libraries.
1812  *
1813  * We currently use the existence of the threads array as an indicator that
1814  * tasks were spawned since the last barrier. If the structure is to be
1815  * useful outside the context of tasking, then this will have to change, but
1816  * not settting the field minimizes the performance impact of tasking on
1817  * barriers, when no explicit tasks were spawned (pushed, actually).
1818  */
1819 
1820 static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures
1821 // Lock for task team data structures
1822 static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock );
1823 
1824 
1825 //------------------------------------------------------------------------------
1826 // __kmp_alloc_task_deque:
1827 // Allocates a task deque for a particular thread, and initialize the necessary
1828 // data structures relating to the deque. This only happens once per thread
1829 // per task team since task teams are recycled.
1830 // No lock is needed during allocation since each thread allocates its own
1831 // deque.
1832 
1833 static void
1834 __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data )
1835 {
1836  __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock );
1837  KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL );
1838 
1839  // Initialize last stolen task field to "none"
1840  thread_data -> td.td_deque_last_stolen = -1;
1841 
1842  KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 );
1843  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 );
1844  KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 );
1845 
1846  KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n",
1847  __kmp_gtid_from_thread( thread ), TASK_DEQUE_SIZE, thread_data ) );
1848  // Allocate space for task deque, and zero the deque
1849  // Cannot use __kmp_thread_calloc() because threads not around for
1850  // kmp_reap_task_team( ).
1851  thread_data -> td.td_deque = (kmp_taskdata_t **)
1852  __kmp_allocate( TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *));
1853 }
1854 
1855 
1856 //------------------------------------------------------------------------------
1857 // __kmp_free_task_deque:
1858 // Deallocates a task deque for a particular thread.
1859 // Happens at library deallocation so don't need to reset all thread data fields.
1860 
1861 static void
1862 __kmp_free_task_deque( kmp_thread_data_t *thread_data )
1863 {
1864  __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock );
1865 
1866  if ( thread_data -> td.td_deque != NULL ) {
1867  TCW_4(thread_data -> td.td_deque_ntasks, 0);
1868  __kmp_free( thread_data -> td.td_deque );
1869  thread_data -> td.td_deque = NULL;
1870  }
1871  __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock );
1872 
1873 #ifdef BUILD_TIED_TASK_STACK
1874  // GEH: Figure out what to do here for td_susp_tied_tasks
1875  if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) {
1876  __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data );
1877  }
1878 #endif // BUILD_TIED_TASK_STACK
1879 }
1880 
1881 
1882 //------------------------------------------------------------------------------
1883 // __kmp_realloc_task_threads_data:
1884 // Allocates a threads_data array for a task team, either by allocating an initial
1885 // array or enlarging an existing array. Only the first thread to get the lock
1886 // allocs or enlarges the array and re-initializes the array eleemnts.
1887 // That thread returns "TRUE", the rest return "FALSE".
1888 // Assumes that the new array size is given by task_team -> tt.tt_nproc.
1889 // The current size is given by task_team -> tt.tt_max_threads.
1890 
1891 static int
1892 __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team )
1893 {
1894  kmp_thread_data_t ** threads_data_p;
1895  kmp_int32 nthreads, maxthreads;
1896  int is_init_thread = FALSE;
1897 
1898  if ( TCR_4(task_team -> tt.tt_found_tasks) ) {
1899  // Already reallocated and initialized.
1900  return FALSE;
1901  }
1902 
1903  threads_data_p = & task_team -> tt.tt_threads_data;
1904  nthreads = task_team -> tt.tt_nproc;
1905  maxthreads = task_team -> tt.tt_max_threads;
1906 
1907  // All threads must lock when they encounter the first task of the implicit task
1908  // region to make sure threads_data fields are (re)initialized before used.
1909  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
1910 
1911  if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) {
1912  // first thread to enable tasking
1913  kmp_team_t *team = thread -> th.th_team;
1914  int i;
1915 
1916  is_init_thread = TRUE;
1917  if ( maxthreads < nthreads ) {
1918 
1919  if ( *threads_data_p != NULL ) {
1920  kmp_thread_data_t *old_data = *threads_data_p;
1921  kmp_thread_data_t *new_data = NULL;
1922 
1923  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating "
1924  "threads data for task_team %p, new_size = %d, old_size = %d\n",
1925  __kmp_gtid_from_thread( thread ), task_team,
1926  nthreads, maxthreads ) );
1927  // Reallocate threads_data to have more elements than current array
1928  // Cannot use __kmp_thread_realloc() because threads not around for
1929  // kmp_reap_task_team( ). Note all new array entries are initialized
1930  // to zero by __kmp_allocate().
1931  new_data = (kmp_thread_data_t *)
1932  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
1933  // copy old data to new data
1934  memcpy( (void *) new_data, (void *) old_data,
1935  maxthreads * sizeof(kmp_taskdata_t *) );
1936 
1937 #ifdef BUILD_TIED_TASK_STACK
1938  // GEH: Figure out if this is the right thing to do
1939  for (i = maxthreads; i < nthreads; i++) {
1940  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1941  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
1942  }
1943 #endif // BUILD_TIED_TASK_STACK
1944  // Install the new data and free the old data
1945  (*threads_data_p) = new_data;
1946  __kmp_free( old_data );
1947  }
1948  else {
1949  KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating "
1950  "threads data for task_team %p, size = %d\n",
1951  __kmp_gtid_from_thread( thread ), task_team, nthreads ) );
1952  // Make the initial allocate for threads_data array, and zero entries
1953  // Cannot use __kmp_thread_calloc() because threads not around for
1954  // kmp_reap_task_team( ).
1955  *threads_data_p = (kmp_thread_data_t *)
1956  __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) );
1957 #ifdef BUILD_TIED_TASK_STACK
1958  // GEH: Figure out if this is the right thing to do
1959  for (i = 0; i < nthreads; i++) {
1960  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1961  __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data );
1962  }
1963 #endif // BUILD_TIED_TASK_STACK
1964  }
1965  task_team -> tt.tt_max_threads = nthreads;
1966  }
1967  else {
1968  // If array has (more than) enough elements, go ahead and use it
1969  KMP_DEBUG_ASSERT( *threads_data_p != NULL );
1970  }
1971 
1972  // initialize threads_data pointers back to thread_info structures
1973  for (i = 0; i < nthreads; i++) {
1974  kmp_thread_data_t *thread_data = & (*threads_data_p)[i];
1975  thread_data -> td.td_thr = team -> t.t_threads[i];
1976 
1977  if ( thread_data -> td.td_deque_last_stolen >= nthreads) {
1978  // The last stolen field survives across teams / barrier, and the number
1979  // of threads may have changed. It's possible (likely?) that a new
1980  // parallel region will exhibit the same behavior as the previous region.
1981  thread_data -> td.td_deque_last_stolen = -1;
1982  }
1983  }
1984 
1985  KMP_MB();
1986  TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE);
1987  }
1988 
1989  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
1990  return is_init_thread;
1991 }
1992 
1993 
1994 //------------------------------------------------------------------------------
1995 // __kmp_free_task_threads_data:
1996 // Deallocates a threads_data array for a task team, including any attached
1997 // tasking deques. Only occurs at library shutdown.
1998 
1999 static void
2000 __kmp_free_task_threads_data( kmp_task_team_t *task_team )
2001 {
2002  __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2003  if ( task_team -> tt.tt_threads_data != NULL ) {
2004  int i;
2005  for (i = 0; i < task_team->tt.tt_max_threads; i++ ) {
2006  __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] );
2007  }
2008  __kmp_free( task_team -> tt.tt_threads_data );
2009  task_team -> tt.tt_threads_data = NULL;
2010  }
2011  __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2012 }
2013 
2014 
2015 //------------------------------------------------------------------------------
2016 // __kmp_allocate_task_team:
2017 // Allocates a task team associated with a specific team, taking it from
2018 // the global task team free list if possible. Also initializes data structures.
2019 
2020 static kmp_task_team_t *
2021 __kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team )
2022 {
2023  kmp_task_team_t *task_team = NULL;
2024  int nthreads;
2025 
2026  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n",
2027  (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) );
2028 
2029  if (TCR_PTR(__kmp_free_task_teams) != NULL) {
2030  // Take a task team from the task team pool
2031  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2032  if (__kmp_free_task_teams != NULL) {
2033  task_team = __kmp_free_task_teams;
2034  TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next);
2035  task_team -> tt.tt_next = NULL;
2036  }
2037  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2038  }
2039 
2040  if (task_team == NULL) {
2041  KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating "
2042  "task team for team %p\n",
2043  __kmp_gtid_from_thread( thread ), team ) );
2044  // Allocate a new task team if one is not available.
2045  // Cannot use __kmp_thread_malloc() because threads not around for
2046  // kmp_reap_task_team( ).
2047  task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) );
2048  __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock );
2049  //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory
2050  //task_team -> tt.tt_max_threads = 0;
2051  //task_team -> tt.tt_next = NULL;
2052  }
2053 
2054  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2055  task_team -> tt.tt_nproc = nthreads = team->t.t_nproc;
2056 
2057  task_team -> tt.tt_state = 0;
2058  TCW_4( task_team -> tt.tt_unfinished_threads, nthreads );
2059  TCW_4( task_team -> tt.tt_active, TRUE );
2060  TCW_4( task_team -> tt.tt_ref_ct, nthreads - 1);
2061 
2062  KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p\n",
2063  (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team ) );
2064  return task_team;
2065 }
2066 
2067 
2068 //------------------------------------------------------------------------------
2069 // __kmp_free_task_team:
2070 // Frees the task team associated with a specific thread, and adds it
2071 // to the global task team free list.
2072 //
2073 
2074 static void
2075 __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team )
2076 {
2077  KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n",
2078  thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) );
2079 
2080  KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_ref_ct) == 0 );
2081 
2082  // Put task team back on free list
2083  __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock );
2084 
2085  KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL );
2086  task_team -> tt.tt_next = __kmp_free_task_teams;
2087  TCW_4(task_team -> tt.tt_found_tasks, FALSE);
2088  TCW_PTR(__kmp_free_task_teams, task_team);
2089 
2090  __kmp_release_bootstrap_lock( & __kmp_task_team_lock );
2091 }
2092 
2093 
2094 //------------------------------------------------------------------------------
2095 // __kmp_reap_task_teams:
2096 // Free all the task teams on the task team free list.
2097 // Should only be done during library shutdown.
2098 // Cannot do anything that needs a thread structure or gtid since they are already gone.
2099 
2100 void
2101 __kmp_reap_task_teams( void )
2102 {
2103  kmp_task_team_t *task_team;
2104 
2105  if ( TCR_PTR(__kmp_free_task_teams) != NULL ) {
2106  // Free all task_teams on the free list
2107  __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock );
2108  while ( ( task_team = __kmp_free_task_teams ) != NULL ) {
2109  __kmp_free_task_teams = task_team -> tt.tt_next;
2110  task_team -> tt.tt_next = NULL;
2111 
2112  // Free threads_data if necessary
2113  if ( task_team -> tt.tt_threads_data != NULL ) {
2114  __kmp_free_task_threads_data( task_team );
2115  }
2116  __kmp_free( task_team );
2117  }
2118  __kmp_release_bootstrap_lock( &__kmp_task_team_lock );
2119  }
2120 }
2121 
2122 
2123 //------------------------------------------------------------------------------
2124 // __kmp_unref_task_teams:
2125 // Remove one thread from referencing the task team structure by
2126 // decreasing the reference count and deallocate task team if no more
2127 // references to it.
2128 //
2129 void
2130 __kmp_unref_task_team( kmp_task_team_t *task_team, kmp_info_t *thread )
2131 {
2132  kmp_uint ref_ct;
2133 
2134  ref_ct = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& task_team->tt.tt_ref_ct) ) - 1;
2135 
2136  KA_TRACE( 20, ( "__kmp_unref_task_team: T#%d task_team = %p ref_ct = %d\n",
2137  __kmp_gtid_from_thread( thread ), task_team, ref_ct ) );
2138 
2139 
2140  if ( ref_ct == 0 ) {
2141  __kmp_free_task_team( thread, task_team );
2142  }
2143 
2144  TCW_PTR( *((volatile kmp_task_team_t **)(&thread->th.th_task_team)), NULL );
2145 }
2146 
2147 
2148 //------------------------------------------------------------------------------
2149 // __kmp_wait_to_unref_task_teams:
2150 // Some threads could still be in the fork barrier release code, possibly
2151 // trying to steal tasks. Wait for each thread to unreference its task team.
2152 //
2153 void
2154 __kmp_wait_to_unref_task_teams(void)
2155 {
2156  kmp_info_t *thread;
2157  kmp_uint32 spins;
2158  int done;
2159 
2160  KMP_INIT_YIELD( spins );
2161 
2162 
2163  for (;;) {
2164  done = TRUE;
2165 
2166  // TODO: GEH - this may be is wrong because some sync would be necessary
2167  // in case threads are added to the pool during the traversal.
2168  // Need to verify that lock for thread pool is held when calling
2169  // this routine.
2170  for (thread = (kmp_info_t *)__kmp_thread_pool;
2171  thread != NULL;
2172  thread = thread->th.th_next_pool)
2173  {
2174  volatile kmp_uint *sleep_loc;
2175 #if KMP_OS_WINDOWS
2176  DWORD exit_val;
2177 #endif
2178  if ( TCR_PTR(thread->th.th_task_team) == NULL ) {
2179  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n",
2180  __kmp_gtid_from_thread( thread ) ) );
2181  continue;
2182  }
2183 #if KMP_OS_WINDOWS
2184  // TODO: GEH - add this check for Linux* OS / OS X* as well?
2185  if (!__kmp_is_thread_alive(thread, &exit_val)) {
2186  if (TCR_PTR(thread->th.th_task_team) != NULL) {
2187  __kmp_unref_task_team( thread->th.th_task_team, thread );
2188  }
2189  continue;
2190  }
2191 #endif
2192 
2193  done = FALSE; // Because th_task_team pointer is not NULL for this thread
2194 
2195  KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n",
2196  __kmp_gtid_from_thread( thread ) ) );
2197 
2198  if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) {
2199  // If the thread is sleeping, awaken it.
2200  if ( ( sleep_loc = (volatile kmp_uint *) TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) {
2201  KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n",
2202  __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) );
2203  __kmp_resume( __kmp_gtid_from_thread( thread ), sleep_loc );
2204  }
2205  }
2206  }
2207  if (done) {
2208  break;
2209  }
2210 
2211  // If we are oversubscribed,
2212  // or have waited a bit (and library mode is throughput), yield.
2213  // Pause is in the following code.
2214  KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2215  KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput
2216  }
2217 
2218 
2219 }
2220 
2221 
2222 //------------------------------------------------------------------------------
2223 // __kmp_task_team_setup: Create a task_team for the current team, but use
2224 // an already created, unused one if it already exists.
2225 // This may be called by any thread, but only for teams with # threads >1.
2226 
2227 void
2228 __kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team )
2229 {
2230  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2231 
2232  if ( ( team->t.t_task_team == NULL ) && ( team->t.t_nproc > 1 ) ) {
2233  // Allocate a new task team, which will be propagated to
2234  // all of the worker threads after the barrier. As they
2235  // spin in the barrier release phase, then will continue
2236  // to use the previous task team struct, until they receive
2237  // the signal to stop checking for tasks (they can't safely
2238  // reference the kmp_team_t struct, which could be reallocated
2239  // by the master thread).
2240  team->t.t_task_team = __kmp_allocate_task_team( this_thr, team );
2241  KA_TRACE( 20, ( "__kmp_task_team_setup: Master T#%d created new "
2242  "task_team %p for team %d\n",
2243  __kmp_gtid_from_thread( this_thr ), team->t.t_task_team,
2244  ((team != NULL) ? team->t.t_id : -1)) );
2245  }
2246  else {
2247  // All threads have reported in, and no tasks were spawned
2248  // for this release->gather region. Leave the old task
2249  // team struct in place for the upcoming region. No task
2250  // teams are formed for serialized teams.
2251  }
2252  if ( team->t.t_task_team != NULL ) {
2253  // Toggle the state flag so that we can tell which side of
2254  // the barrier we are on.
2255  team->t.t_task_team->tt.tt_state = 1 - this_thr->th.th_task_state;
2256  }
2257 }
2258 
2259 
2260 //------------------------------------------------------------------------------
2261 // __kmp_task_team_sync: Propagation of task team data from team to threads
2262 // which happens just after the release phase of a team barrier. This may be
2263 // called by any thread, but only for teams with # threads > 1.
2264 
2265 void
2266 __kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team )
2267 {
2268  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2269 
2270  // On the rare chance that this thread never saw that the task
2271  // team was no longer active, then unref/deallocate it now.
2272  if ( this_thr->th.th_task_team != NULL ) {
2273  if ( ! TCR_SYNC_4( this_thr->th.th_task_team->tt.tt_active ) ) {
2274  KMP_DEBUG_ASSERT( ! KMP_MASTER_TID( __kmp_tid_from_gtid( __kmp_gtid_from_thread( this_thr ) ) ) );
2275  __kmp_unref_task_team( this_thr->th.th_task_team, this_thr );
2276  } else {
2277  //
2278  // We are re-using a task team that was never enabled.
2279  //
2280  KMP_DEBUG_ASSERT( this_thr->th.th_task_team == team->t.t_task_team );
2281  }
2282  }
2283 
2284  //
2285  // It is now safe to propagate the task team pointer from the
2286  // team struct to the current thread.
2287  //
2288  TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team);
2289  if ( this_thr->th.th_task_team != NULL ) {
2290  //
2291  // Toggle the th_task_state field, instead of reading it from
2292  // the task team. Reading the tt_state field at this point
2293  // causes a 30% regression on EPCC parallel - toggling it
2294  // is much cheaper.
2295  //
2296  this_thr->th.th_task_state = 1 - this_thr->th.th_task_state;
2297  KMP_DEBUG_ASSERT( this_thr->th.th_task_state == TCR_4(team->t.t_task_team->tt.tt_state) );
2298  }
2299  KA_TRACE( 20, ( "__kmp_task_team_sync: Thread T#%d task team assigned pointer (%p) from Team #%d task team\n",
2300  __kmp_gtid_from_thread( this_thr ), &this_thr->th.th_task_team,
2301  this_thr->th.th_task_team, ((team != NULL) ? (team->t.t_id) : -1) ) );
2302 }
2303 
2304 
2305 //------------------------------------------------------------------------------
2306 // __kmp_task_team_wait: Master thread waits for outstanding tasks after
2307 // the barrier gather phase. Only called by master thread if #threads
2308 // in team > 1 !
2309 
2310 void
2311 __kmp_task_team_wait( kmp_info_t *this_thr,
2312  kmp_team_t *team
2313 #if USE_ITT_BUILD
2314  , void * itt_sync_obj
2315 #endif /* USE_ITT_BUILD */
2316  )
2317 {
2318  kmp_task_team_t *task_team = team->t.t_task_team;
2319 
2320  KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec );
2321  KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team );
2322 
2323  if ( ( task_team != NULL ) && KMP_TASKING_ENABLED( task_team, this_thr->th.th_task_state ) ) {
2324  KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d waiting for all tasks: task_team = %p\n",
2325  __kmp_gtid_from_thread( this_thr ), task_team ) );
2326  //
2327  // All worker threads might have dropped through to the
2328  // release phase, but could still be executing tasks.
2329  // Wait here for all tasks to complete. To avoid memory
2330  // contention, only the master thread checks for the
2331  // termination condition.
2332  //
2333  __kmp_wait_sleep( this_thr, &task_team->tt.tt_unfinished_threads, 0, TRUE
2334 #if USE_ITT_BUILD
2335  , itt_sync_obj
2336 #endif /* USE_ITT_BUILD */
2337  );
2338 
2339  //
2340  // Kill the old task team, so that the worker threads will
2341  // stop referencing it while spinning. They will
2342  // deallocate it when the reference count reaches zero.
2343  // The master thread is not included in the ref count.
2344  //
2345  KA_TRACE( 20, ( "__kmp_task_team_wait: Master T#%d deactivating task_team %p\n",
2346  __kmp_gtid_from_thread( this_thr ), task_team ) );
2347  KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 );
2348  TCW_SYNC_4( task_team->tt.tt_active, FALSE );
2349  KMP_MB();
2350 
2351  TCW_PTR(this_thr->th.th_task_team, NULL);
2352  team->t.t_task_team = NULL;
2353  }
2354 }
2355 
2356 
2357 //------------------------------------------------------------------------------
2358 // __kmp_tasking_barrier:
2359 // Internal function to execute all tasks prior to a regular barrier or a
2360 // join barrier. It is a full barrier itself, which unfortunately turns
2361 // regular barriers into double barriers and join barriers into 1 1/2
2362 // barriers.
2363 // This routine may only called when __kmp_tasking_mode == tskm_extra_barrier.
2364 
2365 void
2366 __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid )
2367 {
2368  volatile kmp_uint32 *spin = &team->t.t_task_team->tt.tt_unfinished_threads;
2369  int flag = FALSE;
2370  KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier );
2371 
2372 #if USE_ITT_BUILD
2373  KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL );
2374 #endif /* USE_ITT_BUILD */
2375  while (! __kmp_execute_tasks( thread, gtid, spin, 0, TRUE, &flag, NULL ) ) {
2376 #if USE_ITT_BUILD
2377  // TODO: What about itt_sync_obj??
2378  KMP_FSYNC_SPIN_PREPARE( spin );
2379 #endif /* USE_ITT_BUILD */
2380 
2381  if( TCR_4(__kmp_global.g.g_done) ) {
2382  if( __kmp_global.g.g_abort )
2383  __kmp_abort_thread( );
2384  break;
2385  }
2386  KMP_YIELD( TRUE ); // GH: We always yield here
2387  }
2388 #if USE_ITT_BUILD
2389  KMP_FSYNC_SPIN_ACQUIRED( (void*) spin );
2390 #endif /* USE_ITT_BUILD */
2391 }
2392 
2393 #endif // OMP_30_ENABLED
2394