53 #include "kmp_error.h"
54 #if KMP_OS_WINDOWS && KMP_ARCH_X86
61 #ifdef KMP_STATIC_STEAL_ENABLED
64 template<
typename T >
65 struct dispatch_private_infoXX_template {
66 typedef typename traits_t< T >::unsigned_t UT;
67 typedef typename traits_t< T >::signed_t ST;
74 T static_steal_counter;
84 struct KMP_ALIGN( 32 ) {
101 template<
typename T >
102 struct dispatch_private_infoXX_template {
103 typedef typename traits_t< T >::unsigned_t UT;
104 typedef typename traits_t< T >::signed_t ST;
127 template<
typename T >
128 struct KMP_ALIGN_CACHE dispatch_private_info_template {
130 union KMP_ALIGN_CACHE private_info_tmpl {
131 dispatch_private_infoXX_template< T > p;
132 dispatch_private_info64_t p64;
136 kmp_uint32 ordered_bumped;
137 kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3];
138 dispatch_private_info * next;
140 kmp_uint32 type_size;
141 enum cons_type pushed_ws;
146 template<
typename UT >
147 struct dispatch_shared_infoXX_template {
150 volatile UT iteration;
151 volatile UT num_done;
152 volatile UT ordered_iteration;
153 UT ordered_dummy[KMP_MAX_ORDERED-1];
157 template<
typename UT >
158 struct dispatch_shared_info_template {
160 union shared_info_tmpl {
161 dispatch_shared_infoXX_template< UT > s;
162 dispatch_shared_info64_t s64;
164 volatile kmp_uint32 buffer_index;
171 __kmp_static_delay(
int arg )
174 #if KMP_ARCH_X86_64 && KMP_OS_LINUX
176 KMP_ASSERT( arg >= 0 );
181 __kmp_static_yield(
int arg )
186 #undef USE_TEST_LOCKS
189 template<
typename T >
190 static __forceinline T
191 test_then_add(
volatile T *p, T d ) { KMP_ASSERT(0); };
194 __forceinline kmp_int32
195 test_then_add< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 d )
198 r = KMP_TEST_THEN_ADD32( p, d );
203 __forceinline kmp_int64
204 test_then_add< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 d )
207 r = KMP_TEST_THEN_ADD64( p, d );
212 template<
typename T >
213 static __forceinline T
214 test_then_inc_acq(
volatile T *p ) { KMP_ASSERT(0); };
217 __forceinline kmp_int32
218 test_then_inc_acq< kmp_int32 >(
volatile kmp_int32 *p )
221 r = KMP_TEST_THEN_INC_ACQ32( p );
226 __forceinline kmp_int64
227 test_then_inc_acq< kmp_int64 >(
volatile kmp_int64 *p )
230 r = KMP_TEST_THEN_INC_ACQ64( p );
235 template<
typename T >
236 static __forceinline T
237 test_then_inc(
volatile T *p ) { KMP_ASSERT(0); };
240 __forceinline kmp_int32
241 test_then_inc< kmp_int32 >(
volatile kmp_int32 *p )
244 r = KMP_TEST_THEN_INC32( p );
249 __forceinline kmp_int64
250 test_then_inc< kmp_int64 >(
volatile kmp_int64 *p )
253 r = KMP_TEST_THEN_INC64( p );
258 template<
typename T >
259 static __forceinline kmp_int32
260 compare_and_swap(
volatile T *p, T c, T s ) { KMP_ASSERT(0); };
263 __forceinline kmp_int32
264 compare_and_swap< kmp_int32 >(
volatile kmp_int32 *p, kmp_int32 c, kmp_int32 s )
266 return KMP_COMPARE_AND_STORE_REL32( p, c, s );
270 __forceinline kmp_int32
271 compare_and_swap< kmp_int64 >(
volatile kmp_int64 *p, kmp_int64 c, kmp_int64 s )
273 return KMP_COMPARE_AND_STORE_REL64( p, c, s );
289 template<
typename UT >
292 __kmp_wait_yield(
volatile UT * spinner,
294 kmp_uint32 (* pred)( UT, UT )
295 USE_ITT_BUILD_ARG(
void * obj)
299 register volatile UT * spin = spinner;
300 register UT check = checker;
301 register kmp_uint32 spins;
302 register kmp_uint32 (*f) ( UT, UT ) = pred;
305 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
306 KMP_INIT_YIELD( spins );
308 #if USE_ITT_BUILD && defined (USE_ITT) && KMP_OS_WINDOWS
311 while(!f(r = *(
volatile UT *)spin, check))
313 while(!f(r = *spin, check))
316 KMP_FSYNC_SPIN_PREPARE( obj );
322 __kmp_static_delay(TRUE);
327 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
328 KMP_YIELD_SPIN( spins );
330 KMP_FSYNC_SPIN_ACQUIRED( obj );
334 template<
typename UT >
335 static kmp_uint32 __kmp_eq( UT value, UT checker) {
336 return value == checker;
339 template<
typename UT >
340 static kmp_uint32 __kmp_neq( UT value, UT checker) {
341 return value != checker;
344 template<
typename UT >
345 static kmp_uint32 __kmp_lt( UT value, UT checker) {
346 return value < checker;
349 template<
typename UT >
350 static kmp_uint32 __kmp_ge( UT value, UT checker) {
351 return value >= checker;
354 template<
typename UT >
355 static kmp_uint32 __kmp_le( UT value, UT checker) {
356 return value <= checker;
364 __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
368 KMP_DEBUG_ASSERT( gtid_ref );
370 if ( __kmp_env_consistency_check ) {
371 th = __kmp_threads[*gtid_ref];
372 if ( th -> th.th_root -> r.r_active
373 && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) {
374 __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL );
379 template<
typename UT >
381 __kmp_dispatch_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
383 typedef typename traits_t< UT >::signed_t ST;
384 dispatch_private_info_template< UT > * pr;
386 int gtid = *gtid_ref;
388 kmp_info_t *th = __kmp_threads[ gtid ];
389 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
391 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d called\n", gtid ) );
392 if ( __kmp_env_consistency_check ) {
393 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
394 ( th -> th.th_dispatch -> th_dispatch_pr_current );
395 if ( pr -> pushed_ws != ct_none ) {
396 __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL );
400 if ( ! th -> th.th_team -> t.t_serialized ) {
401 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
402 ( th -> th.th_dispatch -> th_dispatch_sh_current );
405 if ( ! __kmp_env_consistency_check ) {
406 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
407 ( th -> th.th_dispatch -> th_dispatch_pr_current );
409 lower = pr->u.p.ordered_lower;
411 #if ! defined( KMP_GOMP_COMPAT )
412 if ( __kmp_env_consistency_check ) {
413 if ( pr->ordered_bumped ) {
414 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
415 __kmp_error_construct2(
416 kmp_i18n_msg_CnsMultipleNesting,
417 ct_ordered_in_pdo, loc_ref,
418 & p->stack_data[ p->w_top ]
429 buff = __kmp_str_format(
430 "__kmp_dispatch_deo: T#%%d before wait: ordered_iter:%%%s lower:%%%s\n",
431 traits_t< UT >::spec, traits_t< UT >::spec );
432 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
433 __kmp_str_free( &buff );
437 __kmp_wait_yield< UT >( &sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
438 USE_ITT_BUILD_ARG( NULL )
445 buff = __kmp_str_format(
446 "__kmp_dispatch_deo: T#%%d after wait: ordered_iter:%%%s lower:%%%s\n",
447 traits_t< UT >::spec, traits_t< UT >::spec );
448 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
449 __kmp_str_free( &buff );
453 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d returned\n", gtid ) );
457 __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
461 if ( __kmp_env_consistency_check ) {
462 th = __kmp_threads[*gtid_ref];
463 if ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) {
464 __kmp_pop_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref );
469 template<
typename UT >
471 __kmp_dispatch_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref )
473 typedef typename traits_t< UT >::signed_t ST;
474 dispatch_private_info_template< UT > * pr;
476 int gtid = *gtid_ref;
478 kmp_info_t *th = __kmp_threads[ gtid ];
479 KMP_DEBUG_ASSERT( th -> th.th_dispatch );
481 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d called\n", gtid ) );
482 if ( __kmp_env_consistency_check ) {
483 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
484 ( th -> th.th_dispatch -> th_dispatch_pr_current );
485 if ( pr -> pushed_ws != ct_none ) {
486 __kmp_pop_sync( gtid, ct_ordered_in_pdo, loc_ref );
490 if ( ! th -> th.th_team -> t.t_serialized ) {
491 dispatch_shared_info_template< UT > * sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
492 ( th -> th.th_dispatch -> th_dispatch_sh_current );
494 if ( ! __kmp_env_consistency_check ) {
495 pr =
reinterpret_cast< dispatch_private_info_template< UT >*
>
496 ( th -> th.th_dispatch -> th_dispatch_pr_current );
499 KMP_FSYNC_RELEASING( & sh->u.s.ordered_iteration );
500 #if ! defined( KMP_GOMP_COMPAT )
501 if ( __kmp_env_consistency_check ) {
502 if ( pr->ordered_bumped != 0 ) {
503 struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons;
505 __kmp_error_construct2(
506 kmp_i18n_msg_CnsMultipleNesting,
507 ct_ordered_in_pdo, loc_ref,
508 & p->stack_data[ p->w_top ]
516 pr->ordered_bumped += 1;
518 KD_TRACE(1000, (
"__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
519 gtid, pr->ordered_bumped ) );
524 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
528 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d returned\n", gtid ) );
532 template<
typename UT >
533 static __forceinline
long double
534 __kmp_pow(
long double x, UT y) {
537 KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
553 template<
typename T >
554 static __inline
typename traits_t< T >::unsigned_t
555 __kmp_dispatch_guided_remaining(
557 typename traits_t< T >::floating_t base,
558 typename traits_t< T >::unsigned_t idx
567 typedef typename traits_t< T >::unsigned_t UT;
569 long double x = tc * __kmp_pow< UT >(base, idx);
581 static int guided_int_param = 2;
582 static double guided_flt_param = 0.5;
586 template<
typename T >
594 typename traits_t< T >::signed_t st,
595 typename traits_t< T >::signed_t chunk,
598 typedef typename traits_t< T >::unsigned_t UT;
599 typedef typename traits_t< T >::signed_t ST;
600 typedef typename traits_t< T >::floating_t DBL;
601 static const int ___kmp_size_type =
sizeof( UT );
607 kmp_uint32 my_buffer_index;
608 dispatch_private_info_template< T > * pr;
609 dispatch_shared_info_template< UT >
volatile * sh;
611 KMP_BUILD_ASSERT(
sizeof( dispatch_private_info_template< T > ) ==
sizeof( dispatch_private_info ) );
612 KMP_BUILD_ASSERT(
sizeof( dispatch_shared_info_template< UT > ) ==
sizeof( dispatch_shared_info ) );
614 if ( ! TCR_4( __kmp_init_parallel ) )
615 __kmp_parallel_initialize();
621 buff = __kmp_str_format(
622 "__kmp_dispatch_init: T#%%d called: schedule:%%d chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n",
623 traits_t< ST >::spec, traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
624 KD_TRACE(10, ( buff, gtid, schedule, chunk, lb, ub, st ) );
625 __kmp_str_free( &buff );
629 th = __kmp_threads[ gtid ];
630 team = th -> th.th_team;
631 active = ! team -> t.t_serialized;
632 th->th.th_ident = loc;
635 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
636 ( th -> th.th_dispatch -> th_disp_buffer );
638 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
639 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
641 my_buffer_index = th->th.th_dispatch->th_disp_index ++;
644 pr =
reinterpret_cast< dispatch_private_info_template< T > *
>
645 ( &th -> th.th_dispatch -> th_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
646 sh =
reinterpret_cast< dispatch_shared_info_template< UT >
volatile *
>
647 ( &team -> t.t_disp_buffer[ my_buffer_index % KMP_MAX_DISP_BUF ] );
657 pr->type_size = ___kmp_size_type;
665 schedule = __kmp_static;
667 if ( schedule == kmp_sch_runtime ) {
670 schedule = team -> t.t_sched.r_sched_type;
673 schedule = __kmp_guided;
675 schedule = __kmp_static;
678 chunk = team -> t.t_sched.chunk;
680 kmp_r_sched_t r_sched = __kmp_get_schedule_global();
682 schedule = r_sched.r_sched_type;
683 chunk = r_sched.chunk;
690 buff = __kmp_str_format(
691 "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n",
692 traits_t< ST >::spec );
693 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
694 __kmp_str_free( &buff );
699 schedule = __kmp_guided;
702 chunk = KMP_DEFAULT_CHUNK;
709 schedule = __kmp_auto;
714 buff = __kmp_str_format(
715 "__kmp_dispatch_init: kmp_sch_auto: T#%%d new: schedule:%%d chunk:%%%s\n",
716 traits_t< ST >::spec );
717 KD_TRACE(10, ( buff, gtid, schedule, chunk ) );
718 __kmp_str_free( &buff );
722 #endif // OMP_30_ENABLED
725 if ( team->t.t_nproc > 1<<20 && schedule == kmp_sch_guided_analytical_chunked ) {
726 schedule = kmp_sch_guided_iterative_chunked;
727 KMP_WARNING( DispatchManyThreads );
729 pr->u.p.parm1 = chunk;
732 "unknown scheduling type" );
736 if ( __kmp_env_consistency_check ) {
738 __kmp_error_construct(
739 kmp_i18n_msg_CnsLoopIncrZeroProhibited,
740 ( pr->ordered ? ct_pdo_ordered : ct_pdo ), loc
745 tc = ( ub - lb + st );
760 }
else if ( ub < lb ) {
770 pr->u.p.last_upper = ub + st;
776 if ( pr->ordered == 0 ) {
777 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo_error;
778 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo_error;
780 pr->ordered_bumped = 0;
782 pr->u.p.ordered_lower = 1;
783 pr->u.p.ordered_upper = 0;
785 th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo< UT >;
786 th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo< UT >;
790 if ( __kmp_env_consistency_check ) {
791 enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo;
793 __kmp_push_workshare( gtid, ws, loc );
796 __kmp_check_workshare( gtid, ws, loc );
797 pr->pushed_ws = ct_none;
801 switch ( schedule ) {
802 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
805 T nproc = team->t.t_nproc;
808 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid ) );
810 ntc = (tc % chunk ? 1 : 0) + tc / chunk;
811 if ( nproc > 1 && ntc >= nproc ) {
812 T
id = __kmp_tid_from_gtid(gtid);
813 T small_chunk, extras;
815 small_chunk = ntc / nproc;
816 extras = ntc % nproc;
818 init =
id * small_chunk + (
id < extras ?
id : extras );
819 pr->u.p.count = init;
820 pr->u.p.ub = init + small_chunk + (
id < extras ? 1 : 0 );
828 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_balanced\n",
830 schedule = kmp_sch_static_balanced;
836 case kmp_sch_static_balanced:
838 T nproc = team->t.t_nproc;
841 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n",
845 T
id = __kmp_tid_from_gtid(gtid);
851 pr->u.p.parm1 = (
id == tc - 1);
854 pr->u.p.parm1 = FALSE;
858 T small_chunk = tc / nproc;
859 T extras = tc % nproc;
860 init =
id * small_chunk + (
id < extras ?
id : extras);
861 limit = init + small_chunk - (
id < extras ? 0 : 1);
862 pr->u.p.parm1 = (
id == nproc - 1);
868 pr->u.p.parm1 = TRUE;
872 pr->u.p.parm1 = FALSE;
877 pr->u.p.lb = lb + init;
878 pr->u.p.ub = lb + limit;
880 T ub_tmp = lb + limit * st;
881 pr->u.p.lb = lb + init * st;
884 pr->u.p.ub = ( ub_tmp + st > ub ? ub : ub_tmp );
886 pr->u.p.ub = ( ub_tmp + st < ub ? ub : ub_tmp );
890 pr->u.p.ordered_lower = init;
891 pr->u.p.ordered_upper = limit;
895 case kmp_sch_guided_iterative_chunked :
897 int nproc = team->t.t_nproc;
898 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked case\n",gtid));
901 if ( (2UL * chunk + 1 ) * nproc >= tc ) {
903 schedule = kmp_sch_dynamic_chunked;
906 pr->u.p.parm2 = guided_int_param * nproc * ( chunk + 1 );
907 *(
double*)&pr->u.p.parm3 = guided_flt_param / nproc;
910 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",gtid));
911 schedule = kmp_sch_static_greedy;
913 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
918 case kmp_sch_guided_analytical_chunked:
920 int nproc = team->t.t_nproc;
921 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked case\n", gtid));
924 if ( (2UL * chunk + 1 ) * nproc >= tc ) {
926 schedule = kmp_sch_dynamic_chunked;
931 #if KMP_OS_WINDOWS && KMP_ARCH_X86
944 unsigned int oldFpcw = _control87(0,0x30000);
947 long double target = ((
long double)chunk * 2 + 1) * nproc / tc;
954 x = (
long double)1.0 - (
long double)0.5 / nproc;
965 ptrdiff_t natural_alignment = (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1;
967 KMP_DEBUG_ASSERT( ( ( (ptrdiff_t)&pr->u.p.parm3 ) & ( natural_alignment ) ) == 0 );
972 *(DBL*)&pr->u.p.parm3 = x;
985 p = __kmp_pow< UT >(x,right);
990 }
while(p>target && right < (1<<27));
997 while ( left + 1 < right ) {
998 mid = (left + right) / 2;
999 if ( __kmp_pow< UT >(x,mid) > target ) {
1008 KMP_ASSERT(cross && __kmp_pow< UT >(x, cross - 1) > target && __kmp_pow< UT >(x, cross) <= target);
1011 pr->u.p.parm2 = cross;
1014 #if ( ( KMP_OS_LINUX || KMP_OS_WINDOWS ) && KMP_ARCH_X86 ) && ( ! defined( KMP_I8 ) )
1015 #define GUIDED_ANALYTICAL_WORKAROUND (*( DBL * )&pr->u.p.parm3)
1017 #define GUIDED_ANALYTICAL_WORKAROUND (x)
1020 pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk;
1021 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1023 _control87(oldFpcw,0x30000);
1027 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",
1029 schedule = kmp_sch_static_greedy;
1035 case kmp_sch_static_greedy:
1036 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid));
1037 pr->u.p.parm1 = ( team -> t.t_nproc > 1 ) ?
1038 ( tc + team->t.t_nproc - 1 ) / team->t.t_nproc :
1041 case kmp_sch_static_chunked :
1042 case kmp_sch_dynamic_chunked :
1043 KD_TRACE(100,(
"__kmp_dispatch_init: T#%d kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid));
1045 case kmp_sch_trapezoidal :
1049 T parm1, parm2, parm3, parm4;
1050 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid ) );
1055 parm2 = ( tc / (2 * team->t.t_nproc) );
1066 }
else if ( parm1 > parm2 ) {
1071 parm3 = ( parm2 + parm1 );
1072 parm3 = ( 2 * tc + parm3 - 1) / parm3;
1079 parm4 = ( parm3 - 1 );
1080 parm4 = ( parm2 - parm1 ) / parm4;
1087 pr->u.p.parm1 = parm1;
1088 pr->u.p.parm2 = parm2;
1089 pr->u.p.parm3 = parm3;
1090 pr->u.p.parm4 = parm4;
1098 KMP_MSG( UnknownSchedTypeDetected ),
1099 KMP_HNT( GetNewerLibrary ),
1105 pr->schedule = schedule;
1109 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d sh->buffer_index:%d\n",
1110 gtid, my_buffer_index, sh->buffer_index) );
1111 __kmp_wait_yield< kmp_uint32 >( & sh->buffer_index, my_buffer_index, __kmp_eq< kmp_uint32 >
1112 USE_ITT_BUILD_ARG( NULL )
1117 KD_TRACE(100, (
"__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d sh->buffer_index:%d\n",
1118 gtid, my_buffer_index, sh->buffer_index) );
1120 th -> th.th_dispatch -> th_dispatch_pr_current = (dispatch_private_info_t*) pr;
1121 th -> th.th_dispatch -> th_dispatch_sh_current = (dispatch_shared_info_t*) sh;
1123 if ( pr->ordered ) {
1124 __kmp_itt_ordered_init( gtid );
1132 buff = __kmp_str_format(
1133 "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s lb:%%%s ub:%%%s" \
1134 " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" \
1135 " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n",
1136 traits_t< UT >::spec, traits_t< T >::spec, traits_t< T >::spec,
1137 traits_t< ST >::spec, traits_t< UT >::spec, traits_t< UT >::spec,
1138 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< T >::spec,
1139 traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec );
1140 KD_TRACE(10, ( buff,
1141 gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub,
1142 pr->u.p.st, pr->u.p.tc, pr->u.p.count,
1143 pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1,
1144 pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4 ) );
1145 __kmp_str_free( &buff );
1148 #if ( KMP_STATIC_STEAL_ENABLED )
1149 if ( ___kmp_size_type < 8 ) {
1158 volatile T * p = &pr->u.p.static_steal_counter;
1162 #endif // ( KMP_STATIC_STEAL_ENABLED && USE_STEALING )
1172 template<
typename UT >
1174 __kmp_dispatch_finish(
int gtid,
ident_t *loc )
1176 typedef typename traits_t< UT >::signed_t ST;
1177 kmp_info_t *th = __kmp_threads[ gtid ];
1179 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d called\n", gtid ) );
1180 if ( ! th -> th.th_team -> t.t_serialized ) {
1182 dispatch_private_info_template< UT > * pr =
1183 reinterpret_cast< dispatch_private_info_template< UT >*
>
1184 ( th->th.th_dispatch->th_dispatch_pr_current );
1185 dispatch_shared_info_template< UT >
volatile * sh =
1186 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
>
1187 ( th->th.th_dispatch->th_dispatch_sh_current );
1188 KMP_DEBUG_ASSERT( pr );
1189 KMP_DEBUG_ASSERT( sh );
1190 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1191 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1193 if ( pr->ordered_bumped ) {
1194 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1196 pr->ordered_bumped = 0;
1198 UT lower = pr->u.p.ordered_lower;
1204 buff = __kmp_str_format(
1205 "__kmp_dispatch_finish: T#%%d before wait: ordered_iteration:%%%s lower:%%%s\n",
1206 traits_t< UT >::spec, traits_t< UT >::spec );
1207 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1208 __kmp_str_free( &buff );
1212 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1213 USE_ITT_BUILD_ARG(NULL)
1220 buff = __kmp_str_format(
1221 "__kmp_dispatch_finish: T#%%d after wait: ordered_iteration:%%%s lower:%%%s\n",
1222 traits_t< UT >::spec, traits_t< UT >::spec );
1223 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) );
1224 __kmp_str_free( &buff );
1228 test_then_inc< ST >( (
volatile ST *) & sh->u.s.ordered_iteration );
1231 KD_TRACE(100, (
"__kmp_dispatch_finish: T#%d returned\n", gtid ) );
1234 #ifdef KMP_GOMP_COMPAT
1236 template<
typename UT >
1238 __kmp_dispatch_finish_chunk(
int gtid,
ident_t *loc )
1240 typedef typename traits_t< UT >::signed_t ST;
1241 kmp_info_t *th = __kmp_threads[ gtid ];
1243 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d called\n", gtid ) );
1244 if ( ! th -> th.th_team -> t.t_serialized ) {
1246 dispatch_private_info_template< UT > * pr =
1247 reinterpret_cast< dispatch_private_info_template< UT >*
>
1248 ( th->th.th_dispatch->th_dispatch_pr_current );
1249 dispatch_shared_info_template< UT >
volatile * sh =
1250 reinterpret_cast< dispatch_shared_info_template< UT >volatile*
>
1251 ( th->th.th_dispatch->th_dispatch_sh_current );
1252 KMP_DEBUG_ASSERT( pr );
1253 KMP_DEBUG_ASSERT( sh );
1254 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1255 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1258 UT lower = pr->u.p.ordered_lower;
1259 UT upper = pr->u.p.ordered_upper;
1260 UT inc = upper - lower + 1;
1262 if ( pr->ordered_bumped == inc ) {
1263 KD_TRACE(1000, (
"__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n",
1265 pr->ordered_bumped = 0;
1267 inc -= pr->ordered_bumped;
1273 buff = __kmp_str_format(
1274 "__kmp_dispatch_finish_chunk: T#%%d before wait: " \
1275 "ordered_iteration:%%%s lower:%%%s upper:%%%s\n",
1276 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1277 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower, upper ) );
1278 __kmp_str_free( &buff );
1282 __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT >
1283 USE_ITT_BUILD_ARG(NULL)
1287 KD_TRACE(1000, (
"__kmp_dispatch_finish_chunk: T#%d resetting ordered_bumped to zero\n",
1289 pr->ordered_bumped = 0;
1295 buff = __kmp_str_format(
1296 "__kmp_dispatch_finish_chunk: T#%%d after wait: " \
1297 "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n",
1298 traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec );
1299 KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper ) );
1300 __kmp_str_free( &buff );
1304 test_then_add< ST >( (
volatile ST *) & sh->u.s.ordered_iteration, inc);
1308 KD_TRACE(100, (
"__kmp_dispatch_finish_chunk: T#%d returned\n", gtid ) );
1313 template<
typename T >
1315 __kmp_dispatch_next(
1316 ident_t *loc,
int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub,
typename traits_t< T >::signed_t *p_st
1319 typedef typename traits_t< T >::unsigned_t UT;
1320 typedef typename traits_t< T >::signed_t ST;
1321 typedef typename traits_t< T >::floating_t DBL;
1322 static const int ___kmp_size_type =
sizeof( UT );
1325 dispatch_private_info_template< T > * pr;
1326 kmp_info_t * th = __kmp_threads[ gtid ];
1327 kmp_team_t * team = th -> th.th_team;
1333 buff = __kmp_str_format(
1334 "__kmp_dispatch_next: T#%%d called p_lb:%%%s p_ub:%%%s p_st:%%%s p_last: %%p\n",
1335 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1336 KD_TRACE(1000, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last ) );
1337 __kmp_str_free( &buff );
1341 if ( team -> t.t_serialized ) {
1343 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
1344 ( th -> th.th_dispatch -> th_disp_buffer );
1345 KMP_DEBUG_ASSERT( pr );
1347 if ( (status = (pr->u.p.tc != 0)) == 0 ) {
1353 if ( __kmp_env_consistency_check ) {
1354 if ( pr->pushed_ws != ct_none ) {
1355 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1358 }
else if ( pr->nomerge ) {
1361 UT limit, trip, init;
1363 T chunk = pr->u.p.parm1;
1365 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid ) );
1367 init = chunk * pr->u.p.count++;
1368 trip = pr->u.p.tc - 1;
1370 if ( (status = (init <= trip)) == 0 ) {
1373 if ( p_st != 0 ) *p_st = 0;
1374 if ( __kmp_env_consistency_check ) {
1375 if ( pr->pushed_ws != ct_none ) {
1376 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
1381 limit = chunk + init - 1;
1384 if ( (last = (limit >= trip)) != 0 ) {
1387 pr->u.p.last_upper = pr->u.p.ub;
1397 *p_lb = start + init;
1398 *p_ub = start + limit;
1400 *p_lb = start + init * incr;
1401 *p_ub = start + limit * incr;
1404 if ( pr->ordered ) {
1405 pr->u.p.ordered_lower = init;
1406 pr->u.p.ordered_upper = limit;
1411 buff = __kmp_str_format(
1412 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1413 traits_t< UT >::spec, traits_t< UT >::spec );
1414 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1415 __kmp_str_free( &buff );
1426 pr->u.p.last_upper = *p_ub;
1440 buff = __kmp_str_format(
1441 "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " \
1442 "p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
1443 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
1444 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, *p_st, p_last, status) );
1445 __kmp_str_free( &buff );
1451 dispatch_shared_info_template< UT > *sh;
1454 UT limit, trip, init;
1456 KMP_DEBUG_ASSERT( th->th.th_dispatch ==
1457 &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] );
1459 pr =
reinterpret_cast< dispatch_private_info_template< T >*
>
1460 ( th->th.th_dispatch->th_dispatch_pr_current );
1461 KMP_DEBUG_ASSERT( pr );
1462 sh =
reinterpret_cast< dispatch_shared_info_template< UT >*
>
1463 ( th->th.th_dispatch->th_dispatch_sh_current );
1464 KMP_DEBUG_ASSERT( sh );
1466 if ( pr->u.p.tc == 0 ) {
1470 switch (pr->schedule) {
1471 #if ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
1474 T chunk = pr->u.p.parm1;
1476 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", gtid) );
1478 trip = pr->u.p.tc - 1;
1480 if ( ___kmp_size_type > 4 ) {
1483 init = ( pr->u.p.count )++;
1484 status = ( init < pr->u.p.ub );
1496 union_i4 vold, vnew;
1497 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1500 while( ! KMP_COMPARE_AND_STORE_ACQ64(
1501 (
volatile kmp_int64* )&pr->u.p.count,
1502 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1503 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1505 vold.b = *(
volatile kmp_int64 * )(&pr->u.p.count);
1510 init = vnew.p.count;
1511 status = ( init < vnew.p.ub ) ;
1515 kmp_info_t **other_threads = team->t.t_threads;
1516 int while_limit = 10;
1517 int while_index = 0;
1521 while ( ( !status ) && ( while_limit != ++while_index ) ) {
1522 union_i4 vold, vnew;
1523 kmp_int32 remaining;
1524 T victimIdx = pr->u.p.parm4;
1525 T oldVictimIdx = victimIdx;
1526 dispatch_private_info_template< T > * victim;
1530 victimIdx = team->t.t_nproc - 1;
1534 victim =
reinterpret_cast< dispatch_private_info_template< T >*
>
1535 ( other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current );
1536 }
while ( (victim == NULL || victim == pr) && oldVictimIdx != victimIdx );
1539 ( (*(
volatile T * )&victim->u.p.static_steal_counter) !=
1540 (*(
volatile T * )&pr->u.p.static_steal_counter) ) ) {
1546 if ( oldVictimIdx == victimIdx ) {
1549 pr->u.p.parm4 = victimIdx;
1552 vold.b = *(
volatile kmp_int64 * )( &victim->u.p.count );
1555 KMP_DEBUG_ASSERT( (vnew.p.ub - 1) * chunk <= trip );
1556 if ( vnew.p.count >= vnew.p.ub || (remaining = vnew.p.ub - vnew.p.count) < 4 ) {
1559 vnew.p.ub -= (remaining >> 2);
1560 KMP_DEBUG_ASSERT((vnew.p.ub - 1) * chunk <= trip);
1561 #pragma warning( push )
1563 #pragma warning( disable: 186 )
1564 KMP_DEBUG_ASSERT(vnew.p.ub >= 0);
1565 #pragma warning( pop )
1567 if ( KMP_COMPARE_AND_STORE_ACQ64(
1568 (
volatile kmp_int64 * )&victim->u.p.count,
1569 *VOLATILE_CAST(kmp_int64 *)&vold.b,
1570 *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) {
1579 init = vold.p.count;
1581 pr->u.p.count = init + 1;
1582 pr->u.p.ub = vnew.p.count;
1585 vold.p.count = init + 1;
1587 *(
volatile kmp_int64 * )(&pr->u.p.count) = vold.b;
1588 #endif // KMP_ARCH_X86
1599 if ( p_st != 0 ) *p_st = 0;
1601 start = pr->u.p.parm2;
1603 limit = chunk + init - 1;
1606 KMP_DEBUG_ASSERT(init <= trip);
1607 if ( (last = (limit >= trip)) != 0 )
1612 if ( p_st != 0 ) *p_st = incr;
1615 *p_lb = start + init;
1616 *p_ub = start + limit;
1618 *p_lb = start + init * incr;
1619 *p_ub = start + limit * incr;
1622 if ( pr->ordered ) {
1623 pr->u.p.ordered_lower = init;
1624 pr->u.p.ordered_upper = limit;
1629 buff = __kmp_str_format(
1630 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1631 traits_t< UT >::spec, traits_t< UT >::spec );
1632 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1633 __kmp_str_free( &buff );
1640 #endif // ( KMP_STATIC_STEAL_ENABLED && KMP_ARCH_X86_64 )
1641 case kmp_sch_static_balanced:
1643 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid) );
1644 if ( (status = !pr->u.p.count) != 0 ) {
1648 last = pr->u.p.parm1;
1655 pr->u.p.lb = pr->u.p.ub + pr->u.p.st;
1657 if ( pr->ordered ) {
1662 buff = __kmp_str_format(
1663 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1664 traits_t< UT >::spec, traits_t< UT >::spec );
1665 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1666 __kmp_str_free( &buff );
1672 case kmp_sch_static_greedy:
1673 case kmp_sch_static_chunked:
1677 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_static_[affinity|chunked] case\n",
1679 parm1 = pr->u.p.parm1;
1681 trip = pr->u.p.tc - 1;
1682 init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid));
1684 if ( (status = (init <= trip)) != 0 ) {
1687 limit = parm1 + init - 1;
1689 if ( (last = (limit >= trip)) != 0 )
1695 if ( p_st != 0 ) *p_st = incr;
1697 pr->u.p.count += team->t.t_nproc;
1700 *p_lb = start + init;
1701 *p_ub = start + limit;
1704 *p_lb = start + init * incr;
1705 *p_ub = start + limit * incr;
1708 if ( pr->ordered ) {
1709 pr->u.p.ordered_lower = init;
1710 pr->u.p.ordered_upper = limit;
1715 buff = __kmp_str_format(
1716 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1717 traits_t< UT >::spec, traits_t< UT >::spec );
1718 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1719 __kmp_str_free( &buff );
1727 case kmp_sch_dynamic_chunked:
1729 T chunk = pr->u.p.parm1;
1731 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n",
1734 init = chunk * test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1735 trip = pr->u.p.tc - 1;
1737 if ( (status = (init <= trip)) == 0 ) {
1740 if ( p_st != 0 ) *p_st = 0;
1743 limit = chunk + init - 1;
1746 if ( (last = (limit >= trip)) != 0 )
1751 if ( p_st != 0 ) *p_st = incr;
1754 *p_lb = start + init;
1755 *p_ub = start + limit;
1757 *p_lb = start + init * incr;
1758 *p_ub = start + limit * incr;
1761 if ( pr->ordered ) {
1762 pr->u.p.ordered_lower = init;
1763 pr->u.p.ordered_upper = limit;
1768 buff = __kmp_str_format(
1769 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1770 traits_t< UT >::spec, traits_t< UT >::spec );
1771 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1772 __kmp_str_free( &buff );
1780 case kmp_sch_guided_iterative_chunked:
1782 T chunkspec = pr->u.p.parm1;
1784 (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked iterative case\n",gtid));
1789 init = sh->u.s.iteration;
1790 remaining = trip - init;
1791 if ( remaining <= 0 ) {
1796 if ( remaining < pr->u.p.parm2 ) {
1799 init = test_then_add<ST>( (ST*)&sh->u.s.iteration, (ST)chunkspec );
1800 remaining = trip - init;
1801 if (remaining <= 0) {
1806 if ( remaining > chunkspec ) {
1807 limit = init + chunkspec - 1;
1810 limit = init + remaining - 1;
1815 limit = init + (UT)( remaining * *(
double*)&pr->u.p.parm3 );
1816 if ( compare_and_swap<ST>( (ST*)&sh->u.s.iteration, (ST)init, (ST)limit ) ) {
1823 if ( status != 0 ) {
1828 if ( p_last != NULL )
1830 *p_lb = start + init * incr;
1831 *p_ub = start + limit * incr;
1832 if ( pr->ordered ) {
1833 pr->u.p.ordered_lower = init;
1834 pr->u.p.ordered_upper = limit;
1839 buff = __kmp_str_format(
1840 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1841 traits_t< UT >::spec, traits_t< UT >::spec );
1842 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1843 __kmp_str_free( &buff );
1856 case kmp_sch_guided_analytical_chunked:
1858 T chunkspec = pr->u.p.parm1;
1860 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1863 unsigned int oldFpcw;
1866 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n",
1871 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
1872 KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * team->t.t_nproc < trip);
1875 chunkIdx = test_then_inc_acq< ST >((
volatile ST *) & sh->u.s.iteration );
1876 if ( chunkIdx >= pr->u.p.parm2 ) {
1879 init = chunkIdx * chunkspec + pr->u.p.count;
1881 if ( (status = (init > 0 && init <= trip)) != 0 ) {
1882 limit = init + chunkspec -1;
1884 if ( (last = (limit >= trip)) != 0 )
1893 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1898 oldFpcw = _control87(0,0x30000);
1903 init = __kmp_dispatch_guided_remaining< T >(
1904 trip, *( DBL * )&pr->u.p.parm3, chunkIdx );
1905 KMP_DEBUG_ASSERT(init);
1909 limit = trip - __kmp_dispatch_guided_remaining< T >(
1910 trip, *( DBL * )&pr->u.p.parm3, chunkIdx + 1 );
1911 KMP_ASSERT(init <= limit);
1912 if ( init < limit ) {
1913 KMP_DEBUG_ASSERT(limit <= trip);
1920 #if KMP_OS_WINDOWS && KMP_ARCH_X86
1922 if ( oldFpcw & fpcwSet != 0 )
1923 _control87(oldFpcw,0x30000);
1925 if ( status != 0 ) {
1930 if ( p_last != NULL )
1932 *p_lb = start + init * incr;
1933 *p_ub = start + limit * incr;
1934 if ( pr->ordered ) {
1935 pr->u.p.ordered_lower = init;
1936 pr->u.p.ordered_upper = limit;
1941 buff = __kmp_str_format(
1942 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
1943 traits_t< UT >::spec, traits_t< UT >::spec );
1944 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
1945 __kmp_str_free( &buff );
1958 case kmp_sch_trapezoidal:
1961 T parm2 = pr->u.p.parm2;
1962 T parm3 = pr->u.p.parm3;
1963 T parm4 = pr->u.p.parm4;
1964 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n",
1967 index = test_then_inc< ST >( (
volatile ST *) & sh->u.s.iteration );
1969 init = ( index * ( (2*parm2) - (index-1)*parm4 ) ) / 2;
1970 trip = pr->u.p.tc - 1;
1972 if ( (status = (index < parm3 && init <= trip)) == 0 ) {
1975 if ( p_st != 0 ) *p_st = 0;
1978 limit = ( (index+1) * ( 2*parm2 - index*parm4 ) ) / 2 - 1;
1981 if ( (last = (limit >= trip)) != 0 )
1984 if ( p_last != 0 ) {
1987 if ( p_st != 0 ) *p_st = incr;
1990 *p_lb = start + init;
1991 *p_ub = start + limit;
1993 *p_lb = start + init * incr;
1994 *p_ub = start + limit * incr;
1997 if ( pr->ordered ) {
1998 pr->u.p.ordered_lower = init;
1999 pr->u.p.ordered_upper = limit;
2004 buff = __kmp_str_format(
2005 "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n",
2006 traits_t< UT >::spec, traits_t< UT >::spec );
2007 KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) );
2008 __kmp_str_free( &buff );
2018 if ( status == 0 ) {
2021 num_done = test_then_inc< ST >( (
volatile ST *) & sh->u.s.num_done );
2026 buff = __kmp_str_format(
2027 "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n",
2028 traits_t< UT >::spec );
2029 KD_TRACE(100, ( buff, gtid, sh->u.s.num_done ) );
2030 __kmp_str_free( &buff );
2034 if ( num_done == team->t.t_nproc-1 ) {
2039 sh->u.s.num_done = 0;
2040 sh->u.s.iteration = 0;
2043 if ( pr->ordered ) {
2044 sh->u.s.ordered_iteration = 0;
2049 sh -> buffer_index += KMP_MAX_DISP_BUF;
2050 KD_TRACE(100, (
"__kmp_dispatch_next: T#%d change buffer_index:%d\n",
2051 gtid, sh->buffer_index) );
2056 if ( __kmp_env_consistency_check ) {
2057 if ( pr->pushed_ws != ct_none ) {
2058 pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc );
2062 th -> th.th_dispatch -> th_deo_fcn = NULL;
2063 th -> th.th_dispatch -> th_dxo_fcn = NULL;
2064 th -> th.th_dispatch -> th_dispatch_sh_current = NULL;
2065 th -> th.th_dispatch -> th_dispatch_pr_current = NULL;
2069 pr->u.p.last_upper = pr->u.p.ub;
2078 buff = __kmp_str_format(
2079 "__kmp_dispatch_next: T#%%d normal case: " \
2080 "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n",
2081 traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec );
2082 KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status ) );
2083 __kmp_str_free( &buff );
2113 kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk )
2115 KMP_DEBUG_ASSERT( __kmp_init_serial );
2116 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2123 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk )
2125 KMP_DEBUG_ASSERT( __kmp_init_serial );
2126 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2134 kmp_int64 lb, kmp_int64 ub,
2135 kmp_int64 st, kmp_int64 chunk )
2137 KMP_DEBUG_ASSERT( __kmp_init_serial );
2138 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2146 kmp_uint64 lb, kmp_uint64 ub,
2147 kmp_int64 st, kmp_int64 chunk )
2149 KMP_DEBUG_ASSERT( __kmp_init_serial );
2150 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true );
2167 kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st )
2169 return __kmp_dispatch_next< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2177 kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st )
2179 return __kmp_dispatch_next< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2187 kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st )
2189 return __kmp_dispatch_next< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2197 kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st )
2199 return __kmp_dispatch_next< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st );
2211 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2220 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2229 __kmp_dispatch_finish< kmp_uint32 >( gtid, loc );
2238 __kmp_dispatch_finish< kmp_uint64 >( gtid, loc );
2245 kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker) {
2246 return value == checker;
2249 kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker) {
2250 return value != checker;
2253 kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker) {
2254 return value < checker;
2257 kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker) {
2258 return value >= checker;
2261 kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker) {
2262 return value <= checker;
2264 kmp_uint32 __kmp_eq_8( kmp_uint64 value, kmp_uint64 checker) {
2265 return value == checker;
2268 kmp_uint32 __kmp_neq_8( kmp_uint64 value, kmp_uint64 checker) {
2269 return value != checker;
2272 kmp_uint32 __kmp_lt_8( kmp_uint64 value, kmp_uint64 checker) {
2273 return value < checker;
2276 kmp_uint32 __kmp_ge_8( kmp_uint64 value, kmp_uint64 checker) {
2277 return value >= checker;
2280 kmp_uint32 __kmp_le_8( kmp_uint64 value, kmp_uint64 checker) {
2281 return value <= checker;
2285 __kmp_wait_yield_4(
volatile kmp_uint32 * spinner,
2287 kmp_uint32 (* pred)( kmp_uint32, kmp_uint32 )
2292 register volatile kmp_uint32 * spin = spinner;
2293 register kmp_uint32 check = checker;
2294 register kmp_uint32 spins;
2295 register kmp_uint32 (*f) ( kmp_uint32, kmp_uint32 ) = pred;
2296 register kmp_uint32 r;
2298 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
2299 KMP_INIT_YIELD( spins );
2301 while(!f(r = TCR_4(*spin), check)) {
2302 KMP_FSYNC_SPIN_PREPARE( obj );
2308 __kmp_static_delay(TRUE);
2312 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2313 KMP_YIELD_SPIN( spins );
2315 KMP_FSYNC_SPIN_ACQUIRED( obj );
2320 __kmp_wait_yield_8(
volatile kmp_uint64 * spinner,
2322 kmp_uint32 (* pred)( kmp_uint64, kmp_uint64 )
2327 register volatile kmp_uint64 * spin = spinner;
2328 register kmp_uint64 check = checker;
2329 register kmp_uint32 spins;
2330 register kmp_uint32 (*f) ( kmp_uint64, kmp_uint64 ) = pred;
2331 register kmp_uint64 r;
2333 KMP_FSYNC_SPIN_INIT( obj, (
void*) spin );
2334 KMP_INIT_YIELD( spins );
2336 #if USE_ITT_BUILD && defined( USE_ITT ) && KMP_OS_WINDOWS
2338 while(!f(r = *(
volatile kmp_uint64 *)spin, check))
2340 while(!f(r = *spin, check))
2343 KMP_FSYNC_SPIN_PREPARE( obj );
2349 __kmp_static_delay(TRUE);
2354 KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc );
2355 KMP_YIELD_SPIN( spins );
2357 KMP_FSYNC_SPIN_ACQUIRED( obj );
2363 #ifdef KMP_GOMP_COMPAT
2366 __kmp_aux_dispatch_init_4(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2367 kmp_int32 lb, kmp_int32 ub, kmp_int32 st,
2368 kmp_int32 chunk,
int push_ws )
2370 __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk,
2375 __kmp_aux_dispatch_init_4u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2376 kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st,
2377 kmp_int32 chunk,
int push_ws )
2379 __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk,
2384 __kmp_aux_dispatch_init_8(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2385 kmp_int64 lb, kmp_int64 ub, kmp_int64 st,
2386 kmp_int64 chunk,
int push_ws )
2388 __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk,
2393 __kmp_aux_dispatch_init_8u(
ident_t *loc, kmp_int32 gtid,
enum sched_type schedule,
2394 kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st,
2395 kmp_int64 chunk,
int push_ws )
2397 __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk,
2402 __kmp_aux_dispatch_fini_chunk_4(
ident_t *loc, kmp_int32 gtid )
2404 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2408 __kmp_aux_dispatch_fini_chunk_8(
ident_t *loc, kmp_int32 gtid )
2410 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );
2414 __kmp_aux_dispatch_fini_chunk_4u(
ident_t *loc, kmp_int32 gtid )
2416 __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc );
2420 __kmp_aux_dispatch_fini_chunk_8u(
ident_t *loc, kmp_int32 gtid )
2422 __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc );