LLVM OpenMP* Runtime Library
kmp_sched.cpp
1/*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13/* Static scheduling initialization.
14
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
18
19#include "kmp.h"
20#include "kmp_error.h"
21#include "kmp_i18n.h"
22#include "kmp_itt.h"
23#include "kmp_stats.h"
24#include "kmp_str.h"
25
26#if OMPT_SUPPORT
27#include "ompt-specific.h"
28#endif
29
30#ifdef KMP_DEBUG
31//-------------------------------------------------------------------------
32// template for debug prints specification ( d, u, lld, llu )
33char const *traits_t<int>::spec = "d";
34char const *traits_t<unsigned int>::spec = "u";
35char const *traits_t<long long>::spec = "lld";
36char const *traits_t<unsigned long long>::spec = "llu";
37char const *traits_t<long>::spec = "ld";
38//-------------------------------------------------------------------------
39#endif
40
41#if KMP_STATS_ENABLED
42#define KMP_STATS_LOOP_END(stat) \
43 { \
44 kmp_int64 t; \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
48 if (i == 1) { \
49 t = u - l + 1; \
50 } else if (i == -1) { \
51 t = l - u + 1; \
52 } else if (i > 0) { \
53 t = (u - l) / i + 1; \
54 } else { \
55 t = (l - u) / (-i) + 1; \
56 } \
57 KMP_COUNT_VALUE(stat, t); \
58 KMP_POP_PARTITIONED_TIMER(); \
59 }
60#else
61#define KMP_STATS_LOOP_END(stat) /* Nothing */
62#endif
63
64static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
65static inline void check_loc(ident_t *&loc) {
66 if (loc == NULL)
67 loc = &loc_stub; // may need to report location info to ittnotify
68}
69
70template <typename T>
71static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
72 kmp_int32 schedtype, kmp_int32 *plastiter,
73 T *plower, T *pupper,
74 typename traits_t<T>::signed_t *pstride,
75 typename traits_t<T>::signed_t incr,
76 typename traits_t<T>::signed_t chunk
77#if OMPT_SUPPORT && OMPT_OPTIONAL
78 ,
79 void *codeptr
80#endif
81) {
82 KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
83 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
84 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
85
86 typedef typename traits_t<T>::unsigned_t UT;
87 typedef typename traits_t<T>::signed_t ST;
88 /* this all has to be changed back to TID and such.. */
89 kmp_int32 gtid = global_tid;
90 kmp_uint32 tid;
91 kmp_uint32 nth;
92 UT trip_count;
93 kmp_team_t *team;
94 __kmp_assert_valid_gtid(gtid);
95 kmp_info_t *th = __kmp_threads[gtid];
96
97#if OMPT_SUPPORT && OMPT_OPTIONAL
98 ompt_team_info_t *team_info = NULL;
99 ompt_task_info_t *task_info = NULL;
100 ompt_work_t ompt_work_type = ompt_work_loop;
101
102 static kmp_int8 warn = 0;
103
104 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
105 // Only fully initialize variables needed by OMPT if OMPT is enabled.
106 team_info = __ompt_get_teaminfo(0, NULL);
107 task_info = __ompt_get_task_info_object(0);
108 // Determine workshare type
109 if (loc != NULL) {
110 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
111 ompt_work_type = ompt_work_loop;
112 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
113 ompt_work_type = ompt_work_sections;
114 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
115 ompt_work_type = ompt_work_distribute;
116 } else {
117 kmp_int8 bool_res =
118 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
119 if (bool_res)
120 KMP_WARNING(OmptOutdatedWorkshare);
121 }
122 KMP_DEBUG_ASSERT(ompt_work_type);
123 }
124 }
125#endif
126
127 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
128 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
129#ifdef KMP_DEBUG
130 {
131 char *buff;
132 // create format specifiers before the debug output
133 buff = __kmp_str_format(
134 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
135 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
136 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
137 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
138 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
139 *pstride, incr, chunk));
140 __kmp_str_free(&buff);
141 }
142#endif
143
144 if (__kmp_env_consistency_check) {
145 __kmp_push_workshare(global_tid, ct_pdo, loc);
146 if (incr == 0) {
147 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
148 loc);
149 }
150 }
151 /* special handling for zero-trip loops */
152 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
153 if (plastiter != NULL)
154 *plastiter = FALSE;
155 /* leave pupper and plower set to entire iteration space */
156 *pstride = incr; /* value should never be used */
157// *plower = *pupper - incr;
158// let compiler bypass the illegal loop (like for(i=1;i<10;i--))
159// THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
160// ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
161#ifdef KMP_DEBUG
162 {
163 char *buff;
164 // create format specifiers before the debug output
165 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
166 "lower=%%%s upper=%%%s stride = %%%s "
167 "signed?<%s>, loc = %%s\n",
168 traits_t<T>::spec, traits_t<T>::spec,
169 traits_t<ST>::spec, traits_t<T>::spec);
170 check_loc(loc);
171 KD_TRACE(100,
172 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
173 __kmp_str_free(&buff);
174 }
175#endif
176 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
177
178#if OMPT_SUPPORT && OMPT_OPTIONAL
179 if (ompt_enabled.ompt_callback_work) {
180 ompt_callbacks.ompt_callback(ompt_callback_work)(
181 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
182 &(task_info->task_data), 0, codeptr);
183 }
184#endif
185 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
186 return;
187 }
188
189 // Although there are schedule enumerations above kmp_ord_upper which are not
190 // schedules for "distribute", the only ones which are useful are dynamic, so
191 // cannot be seen here, since this codepath is only executed for static
192 // schedules.
193 if (schedtype > kmp_ord_upper) {
194 // we are in DISTRIBUTE construct
195 schedtype += kmp_sch_static -
196 kmp_distribute_static; // AC: convert to usual schedule type
197 if (th->th.th_team->t.t_serialized > 1) {
198 tid = 0;
199 team = th->th.th_team;
200 } else {
201 tid = th->th.th_team->t.t_master_tid;
202 team = th->th.th_team->t.t_parent;
203 }
204 } else {
205 tid = __kmp_tid_from_gtid(global_tid);
206 team = th->th.th_team;
207 }
208
209 /* determine if "for" loop is an active worksharing construct */
210 if (team->t.t_serialized) {
211 /* serialized parallel, each thread executes whole iteration space */
212 if (plastiter != NULL)
213 *plastiter = TRUE;
214 /* leave pupper and plower set to entire iteration space */
215 *pstride =
216 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
217
218#ifdef KMP_DEBUG
219 {
220 char *buff;
221 // create format specifiers before the debug output
222 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
223 "lower=%%%s upper=%%%s stride = %%%s\n",
224 traits_t<T>::spec, traits_t<T>::spec,
225 traits_t<ST>::spec);
226 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
227 __kmp_str_free(&buff);
228 }
229#endif
230 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
231
232#if OMPT_SUPPORT && OMPT_OPTIONAL
233 if (ompt_enabled.ompt_callback_work) {
234 ompt_callbacks.ompt_callback(ompt_callback_work)(
235 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
236 &(task_info->task_data), *pstride, codeptr);
237 }
238#endif
239 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
240 return;
241 }
242 nth = team->t.t_nproc;
243 if (nth == 1) {
244 if (plastiter != NULL)
245 *plastiter = TRUE;
246 *pstride =
247 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
248#ifdef KMP_DEBUG
249 {
250 char *buff;
251 // create format specifiers before the debug output
252 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
253 "lower=%%%s upper=%%%s stride = %%%s\n",
254 traits_t<T>::spec, traits_t<T>::spec,
255 traits_t<ST>::spec);
256 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
257 __kmp_str_free(&buff);
258 }
259#endif
260 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
261
262#if OMPT_SUPPORT && OMPT_OPTIONAL
263 if (ompt_enabled.ompt_callback_work) {
264 ompt_callbacks.ompt_callback(ompt_callback_work)(
265 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
266 &(task_info->task_data), *pstride, codeptr);
267 }
268#endif
269 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
270 return;
271 }
272
273 /* compute trip count */
274 if (incr == 1) {
275 trip_count = *pupper - *plower + 1;
276 } else if (incr == -1) {
277 trip_count = *plower - *pupper + 1;
278 } else if (incr > 0) {
279 // upper-lower can exceed the limit of signed type
280 trip_count = (UT)(*pupper - *plower) / incr + 1;
281 } else {
282 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
283 }
284
285#if KMP_STATS_ENABLED
286 if (KMP_MASTER_GTID(gtid)) {
287 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
288 }
289#endif
290
291 if (__kmp_env_consistency_check) {
292 /* tripcount overflow? */
293 if (trip_count == 0 && *pupper != *plower) {
294 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
295 loc);
296 }
297 }
298
299 /* compute remaining parameters */
300 switch (schedtype) {
301 case kmp_sch_static: {
302 if (trip_count < nth) {
303 KMP_DEBUG_ASSERT(
304 __kmp_static == kmp_sch_static_greedy ||
305 __kmp_static ==
306 kmp_sch_static_balanced); // Unknown static scheduling type.
307 if (tid < trip_count) {
308 *pupper = *plower = *plower + tid * incr;
309 } else {
310 // set bounds so non-active threads execute no iterations
311 *plower = *pupper + (incr > 0 ? 1 : -1);
312 }
313 if (plastiter != NULL)
314 *plastiter = (tid == trip_count - 1);
315 } else {
316 if (__kmp_static == kmp_sch_static_balanced) {
317 UT small_chunk = trip_count / nth;
318 UT extras = trip_count % nth;
319 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
320 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
321 if (plastiter != NULL)
322 *plastiter = (tid == nth - 1);
323 } else {
324 T big_chunk_inc_count =
325 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
326 T old_upper = *pupper;
327
328 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
329 // Unknown static scheduling type.
330
331 *plower += tid * big_chunk_inc_count;
332 *pupper = *plower + big_chunk_inc_count - incr;
333 if (incr > 0) {
334 if (*pupper < *plower)
335 *pupper = traits_t<T>::max_value;
336 if (plastiter != NULL)
337 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
338 if (*pupper > old_upper)
339 *pupper = old_upper; // tracker C73258
340 } else {
341 if (*pupper > *plower)
342 *pupper = traits_t<T>::min_value;
343 if (plastiter != NULL)
344 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
345 if (*pupper < old_upper)
346 *pupper = old_upper; // tracker C73258
347 }
348 }
349 }
350 *pstride = trip_count;
351 break;
352 }
353 case kmp_sch_static_chunked: {
354 ST span;
355 UT nchunks;
356 if (chunk < 1)
357 chunk = 1;
358 else if ((UT)chunk > trip_count)
359 chunk = trip_count;
360 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
361 span = chunk * incr;
362 if (nchunks < nth) {
363 *pstride = span * nchunks;
364 if (tid < nchunks) {
365 *plower = *plower + (span * tid);
366 *pupper = *plower + span - incr;
367 } else {
368 *plower = *pupper + (incr > 0 ? 1 : -1);
369 }
370 } else {
371 *pstride = span * nth;
372 *plower = *plower + (span * tid);
373 *pupper = *plower + span - incr;
374 }
375 if (plastiter != NULL)
376 *plastiter = (tid == (nchunks - 1) % nth);
377 break;
378 }
379 case kmp_sch_static_balanced_chunked: {
380 T old_upper = *pupper;
381 // round up to make sure the chunk is enough to cover all iterations
382 UT span = (trip_count + nth - 1) / nth;
383
384 // perform chunk adjustment
385 chunk = (span + chunk - 1) & ~(chunk - 1);
386
387 span = chunk * incr;
388 *plower = *plower + (span * tid);
389 *pupper = *plower + span - incr;
390 if (incr > 0) {
391 if (*pupper > old_upper)
392 *pupper = old_upper;
393 } else if (*pupper < old_upper)
394 *pupper = old_upper;
395
396 if (plastiter != NULL)
397 *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
398 break;
399 }
400 default:
401 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
402 break;
403 }
404
405#if USE_ITT_BUILD
406 // Report loop metadata
407 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
408 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
409 team->t.t_active_level == 1) {
410 kmp_uint64 cur_chunk = chunk;
411 check_loc(loc);
412 // Calculate chunk in case it was not specified; it is specified for
413 // kmp_sch_static_chunked
414 if (schedtype == kmp_sch_static) {
415 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
416 }
417 // 0 - "static" schedule
418 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
419 }
420#endif
421#ifdef KMP_DEBUG
422 {
423 char *buff;
424 // create format specifiers before the debug output
425 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
426 "upper=%%%s stride = %%%s signed?<%s>\n",
427 traits_t<T>::spec, traits_t<T>::spec,
428 traits_t<ST>::spec, traits_t<T>::spec);
429 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
430 __kmp_str_free(&buff);
431 }
432#endif
433 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
434
435#if OMPT_SUPPORT && OMPT_OPTIONAL
436 if (ompt_enabled.ompt_callback_work) {
437 ompt_callbacks.ompt_callback(ompt_callback_work)(
438 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
439 &(task_info->task_data), trip_count, codeptr);
440 }
441 if (ompt_enabled.ompt_callback_dispatch) {
442 ompt_dispatch_t dispatch_type;
443 ompt_data_t instance = ompt_data_none;
444 ompt_dispatch_chunk_t dispatch_chunk;
445 if (ompt_work_type == ompt_work_sections) {
446 dispatch_type = ompt_dispatch_section;
447 instance.ptr = codeptr;
448 } else {
449 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr);
450 dispatch_type = (ompt_work_type == ompt_work_distribute)
451 ? ompt_dispatch_distribute_chunk
452 : ompt_dispatch_ws_loop_chunk;
453 instance.ptr = &dispatch_chunk;
454 }
455 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
456 &(team_info->parallel_data), &(task_info->task_data), dispatch_type,
457 instance);
458 }
459#endif
460
461 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
462 return;
463}
464
465template <typename T>
466static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
467 kmp_int32 schedule, kmp_int32 *plastiter,
468 T *plower, T *pupper, T *pupperDist,
469 typename traits_t<T>::signed_t *pstride,
470 typename traits_t<T>::signed_t incr,
471 typename traits_t<T>::signed_t chunk
472#if OMPT_SUPPORT && OMPT_OPTIONAL
473 ,
474 void *codeptr
475#endif
476) {
477 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
478 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
479 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
480 typedef typename traits_t<T>::unsigned_t UT;
481 typedef typename traits_t<T>::signed_t ST;
482 kmp_uint32 tid;
483 kmp_uint32 nth;
484 kmp_uint32 team_id;
485 kmp_uint32 nteams;
486 UT trip_count;
487 kmp_team_t *team;
488 kmp_info_t *th;
489
490 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
491 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
492 __kmp_assert_valid_gtid(gtid);
493#ifdef KMP_DEBUG
494 {
495 char *buff;
496 // create format specifiers before the debug output
497 buff = __kmp_str_format(
498 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
499 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
500 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
501 traits_t<ST>::spec, traits_t<T>::spec);
502 KD_TRACE(100,
503 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
504 __kmp_str_free(&buff);
505 }
506#endif
507
508 if (__kmp_env_consistency_check) {
509 __kmp_push_workshare(gtid, ct_pdo, loc);
510 if (incr == 0) {
511 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
512 loc);
513 }
514 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
515 // The loop is illegal.
516 // Some zero-trip loops maintained by compiler, e.g.:
517 // for(i=10;i<0;++i) // lower >= upper - run-time check
518 // for(i=0;i>10;--i) // lower <= upper - run-time check
519 // for(i=0;i>10;++i) // incr > 0 - compile-time check
520 // for(i=10;i<0;--i) // incr < 0 - compile-time check
521 // Compiler does not check the following illegal loops:
522 // for(i=0;i<10;i+=incr) // where incr<0
523 // for(i=10;i>0;i-=incr) // where incr<0
524 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
525 }
526 }
527 tid = __kmp_tid_from_gtid(gtid);
528 th = __kmp_threads[gtid];
529 nth = th->th.th_team_nproc;
530 team = th->th.th_team;
531 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
532 nteams = th->th.th_teams_size.nteams;
533 team_id = team->t.t_master_tid;
534 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
535
536 // compute global trip count
537 if (incr == 1) {
538 trip_count = *pupper - *plower + 1;
539 } else if (incr == -1) {
540 trip_count = *plower - *pupper + 1;
541 } else if (incr > 0) {
542 // upper-lower can exceed the limit of signed type
543 trip_count = (UT)(*pupper - *plower) / incr + 1;
544 } else {
545 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
546 }
547
548 *pstride = *pupper - *plower; // just in case (can be unused)
549 if (trip_count <= nteams) {
550 KMP_DEBUG_ASSERT(
551 __kmp_static == kmp_sch_static_greedy ||
552 __kmp_static ==
553 kmp_sch_static_balanced); // Unknown static scheduling type.
554 // only primary threads of some teams get single iteration, other threads
555 // get nothing
556 if (team_id < trip_count && tid == 0) {
557 *pupper = *pupperDist = *plower = *plower + team_id * incr;
558 } else {
559 *pupperDist = *pupper;
560 *plower = *pupper + incr; // compiler should skip loop body
561 }
562 if (plastiter != NULL)
563 *plastiter = (tid == 0 && team_id == trip_count - 1);
564 } else {
565 // Get the team's chunk first (each team gets at most one chunk)
566 if (__kmp_static == kmp_sch_static_balanced) {
567 UT chunkD = trip_count / nteams;
568 UT extras = trip_count % nteams;
569 *plower +=
570 incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
571 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
572 if (plastiter != NULL)
573 *plastiter = (team_id == nteams - 1);
574 } else {
575 T chunk_inc_count =
576 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
577 T upper = *pupper;
578 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
579 // Unknown static scheduling type.
580 *plower += team_id * chunk_inc_count;
581 *pupperDist = *plower + chunk_inc_count - incr;
582 // Check/correct bounds if needed
583 if (incr > 0) {
584 if (*pupperDist < *plower)
585 *pupperDist = traits_t<T>::max_value;
586 if (plastiter != NULL)
587 *plastiter = *plower <= upper && *pupperDist > upper - incr;
588 if (*pupperDist > upper)
589 *pupperDist = upper; // tracker C73258
590 if (*plower > *pupperDist) {
591 *pupper = *pupperDist; // no iterations available for the team
592 goto end;
593 }
594 } else {
595 if (*pupperDist > *plower)
596 *pupperDist = traits_t<T>::min_value;
597 if (plastiter != NULL)
598 *plastiter = *plower >= upper && *pupperDist < upper - incr;
599 if (*pupperDist < upper)
600 *pupperDist = upper; // tracker C73258
601 if (*plower < *pupperDist) {
602 *pupper = *pupperDist; // no iterations available for the team
603 goto end;
604 }
605 }
606 }
607 // Get the parallel loop chunk now (for thread)
608 // compute trip count for team's chunk
609 if (incr == 1) {
610 trip_count = *pupperDist - *plower + 1;
611 } else if (incr == -1) {
612 trip_count = *plower - *pupperDist + 1;
613 } else if (incr > 1) {
614 // upper-lower can exceed the limit of signed type
615 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
616 } else {
617 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
618 }
619 KMP_DEBUG_ASSERT(trip_count);
620 switch (schedule) {
621 case kmp_sch_static: {
622 if (trip_count <= nth) {
623 KMP_DEBUG_ASSERT(
624 __kmp_static == kmp_sch_static_greedy ||
625 __kmp_static ==
626 kmp_sch_static_balanced); // Unknown static scheduling type.
627 if (tid < trip_count)
628 *pupper = *plower = *plower + tid * incr;
629 else
630 *plower = *pupper + incr; // no iterations available
631 if (plastiter != NULL)
632 if (*plastiter != 0 && !(tid == trip_count - 1))
633 *plastiter = 0;
634 } else {
635 if (__kmp_static == kmp_sch_static_balanced) {
636 UT chunkL = trip_count / nth;
637 UT extras = trip_count % nth;
638 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
639 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
640 if (plastiter != NULL)
641 if (*plastiter != 0 && !(tid == nth - 1))
642 *plastiter = 0;
643 } else {
644 T chunk_inc_count =
645 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
646 T upper = *pupperDist;
647 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
648 // Unknown static scheduling type.
649 *plower += tid * chunk_inc_count;
650 *pupper = *plower + chunk_inc_count - incr;
651 if (incr > 0) {
652 if (*pupper < *plower)
653 *pupper = traits_t<T>::max_value;
654 if (plastiter != NULL)
655 if (*plastiter != 0 &&
656 !(*plower <= upper && *pupper > upper - incr))
657 *plastiter = 0;
658 if (*pupper > upper)
659 *pupper = upper; // tracker C73258
660 } else {
661 if (*pupper > *plower)
662 *pupper = traits_t<T>::min_value;
663 if (plastiter != NULL)
664 if (*plastiter != 0 &&
665 !(*plower >= upper && *pupper < upper - incr))
666 *plastiter = 0;
667 if (*pupper < upper)
668 *pupper = upper; // tracker C73258
669 }
670 }
671 }
672 break;
673 }
674 case kmp_sch_static_chunked: {
675 ST span;
676 if (chunk < 1)
677 chunk = 1;
678 span = chunk * incr;
679 *pstride = span * nth;
680 *plower = *plower + (span * tid);
681 *pupper = *plower + span - incr;
682 if (plastiter != NULL)
683 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
684 *plastiter = 0;
685 break;
686 }
687 default:
688 KMP_ASSERT2(0,
689 "__kmpc_dist_for_static_init: unknown loop scheduling type");
690 break;
691 }
692 }
693end:;
694#ifdef KMP_DEBUG
695 {
696 char *buff;
697 // create format specifiers before the debug output
698 buff = __kmp_str_format(
699 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
700 "stride=%%%s signed?<%s>\n",
701 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
702 traits_t<ST>::spec, traits_t<T>::spec);
703 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
704 __kmp_str_free(&buff);
705 }
706#endif
707 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
708#if OMPT_SUPPORT && OMPT_OPTIONAL
709 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
710 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
711 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
712 if (ompt_enabled.ompt_callback_work) {
713 ompt_callbacks.ompt_callback(ompt_callback_work)(
714 ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
715 &(task_info->task_data), 0, codeptr);
716 }
717 if (ompt_enabled.ompt_callback_dispatch) {
718 ompt_data_t instance = ompt_data_none;
719 ompt_dispatch_chunk_t dispatch_chunk;
720 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr);
721 instance.ptr = &dispatch_chunk;
722 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
723 &(team_info->parallel_data), &(task_info->task_data),
724 ompt_dispatch_distribute_chunk, instance);
725 }
726 }
727#endif // OMPT_SUPPORT && OMPT_OPTIONAL
728 KMP_STATS_LOOP_END(OMP_distribute_iterations);
729 return;
730}
731
732template <typename T>
733static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
734 kmp_int32 *p_last, T *p_lb, T *p_ub,
735 typename traits_t<T>::signed_t *p_st,
736 typename traits_t<T>::signed_t incr,
737 typename traits_t<T>::signed_t chunk) {
738 // The routine returns the first chunk distributed to the team and
739 // stride for next chunks calculation.
740 // Last iteration flag set for the team that will execute
741 // the last iteration of the loop.
742 // The routine is called for dist_schedule(static,chunk) only.
743 typedef typename traits_t<T>::unsigned_t UT;
744 typedef typename traits_t<T>::signed_t ST;
745 kmp_uint32 team_id;
746 kmp_uint32 nteams;
747 UT trip_count;
748 T lower;
749 T upper;
750 ST span;
751 kmp_team_t *team;
752 kmp_info_t *th;
753
754 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
755 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
756 __kmp_assert_valid_gtid(gtid);
757#ifdef KMP_DEBUG
758 {
759 char *buff;
760 // create format specifiers before the debug output
761 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
762 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
763 traits_t<T>::spec, traits_t<T>::spec,
764 traits_t<ST>::spec, traits_t<ST>::spec,
765 traits_t<T>::spec);
766 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
767 __kmp_str_free(&buff);
768 }
769#endif
770
771 lower = *p_lb;
772 upper = *p_ub;
773 if (__kmp_env_consistency_check) {
774 if (incr == 0) {
775 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
776 loc);
777 }
778 if (incr > 0 ? (upper < lower) : (lower < upper)) {
779 // The loop is illegal.
780 // Some zero-trip loops maintained by compiler, e.g.:
781 // for(i=10;i<0;++i) // lower >= upper - run-time check
782 // for(i=0;i>10;--i) // lower <= upper - run-time check
783 // for(i=0;i>10;++i) // incr > 0 - compile-time check
784 // for(i=10;i<0;--i) // incr < 0 - compile-time check
785 // Compiler does not check the following illegal loops:
786 // for(i=0;i<10;i+=incr) // where incr<0
787 // for(i=10;i>0;i-=incr) // where incr<0
788 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
789 }
790 }
791 th = __kmp_threads[gtid];
792 team = th->th.th_team;
793 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
794 nteams = th->th.th_teams_size.nteams;
795 team_id = team->t.t_master_tid;
796 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
797
798 // compute trip count
799 if (incr == 1) {
800 trip_count = upper - lower + 1;
801 } else if (incr == -1) {
802 trip_count = lower - upper + 1;
803 } else if (incr > 0) {
804 // upper-lower can exceed the limit of signed type
805 trip_count = (UT)(upper - lower) / incr + 1;
806 } else {
807 trip_count = (UT)(lower - upper) / (-incr) + 1;
808 }
809 if (chunk < 1)
810 chunk = 1;
811 span = chunk * incr;
812 *p_st = span * nteams;
813 *p_lb = lower + (span * team_id);
814 *p_ub = *p_lb + span - incr;
815 if (p_last != NULL)
816 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
817 // Correct upper bound if needed
818 if (incr > 0) {
819 if (*p_ub < *p_lb) // overflow?
820 *p_ub = traits_t<T>::max_value;
821 if (*p_ub > upper)
822 *p_ub = upper; // tracker C73258
823 } else { // incr < 0
824 if (*p_ub > *p_lb)
825 *p_ub = traits_t<T>::min_value;
826 if (*p_ub < upper)
827 *p_ub = upper; // tracker C73258
828 }
829#ifdef KMP_DEBUG
830 {
831 char *buff;
832 // create format specifiers before the debug output
833 buff =
834 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
835 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
836 traits_t<T>::spec, traits_t<T>::spec,
837 traits_t<ST>::spec, traits_t<ST>::spec);
838 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
839 __kmp_str_free(&buff);
840 }
841#endif
842}
843
844//------------------------------------------------------------------------------
845extern "C" {
867void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
868 kmp_int32 *plastiter, kmp_int32 *plower,
869 kmp_int32 *pupper, kmp_int32 *pstride,
870 kmp_int32 incr, kmp_int32 chunk) {
871 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
872 pupper, pstride, incr, chunk
873#if OMPT_SUPPORT && OMPT_OPTIONAL
874 ,
875 OMPT_GET_RETURN_ADDRESS(0)
876#endif
877 );
878}
879
883void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
884 kmp_int32 schedtype, kmp_int32 *plastiter,
885 kmp_uint32 *plower, kmp_uint32 *pupper,
886 kmp_int32 *pstride, kmp_int32 incr,
887 kmp_int32 chunk) {
888 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
889 pupper, pstride, incr, chunk
890#if OMPT_SUPPORT && OMPT_OPTIONAL
891 ,
892 OMPT_GET_RETURN_ADDRESS(0)
893#endif
894 );
895}
896
900void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
901 kmp_int32 *plastiter, kmp_int64 *plower,
902 kmp_int64 *pupper, kmp_int64 *pstride,
903 kmp_int64 incr, kmp_int64 chunk) {
904 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
905 pupper, pstride, incr, chunk
906#if OMPT_SUPPORT && OMPT_OPTIONAL
907 ,
908 OMPT_GET_RETURN_ADDRESS(0)
909#endif
910 );
911}
912
916void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
917 kmp_int32 schedtype, kmp_int32 *plastiter,
918 kmp_uint64 *plower, kmp_uint64 *pupper,
919 kmp_int64 *pstride, kmp_int64 incr,
920 kmp_int64 chunk) {
921 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
922 pupper, pstride, incr, chunk
923#if OMPT_SUPPORT && OMPT_OPTIONAL
924 ,
925 OMPT_GET_RETURN_ADDRESS(0)
926#endif
927 );
928}
933#if OMPT_SUPPORT && OMPT_OPTIONAL
934#define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0)
935#else
936#define OMPT_CODEPTR_ARG
937#endif
938
961void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
962 kmp_int32 schedule, kmp_int32 *plastiter,
963 kmp_int32 *plower, kmp_int32 *pupper,
964 kmp_int32 *pupperD, kmp_int32 *pstride,
965 kmp_int32 incr, kmp_int32 chunk) {
966 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
967 pupper, pupperD, pstride, incr,
968 chunk OMPT_CODEPTR_ARG);
969}
970
974void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
975 kmp_int32 schedule, kmp_int32 *plastiter,
976 kmp_uint32 *plower, kmp_uint32 *pupper,
977 kmp_uint32 *pupperD, kmp_int32 *pstride,
978 kmp_int32 incr, kmp_int32 chunk) {
979 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
980 pupper, pupperD, pstride, incr,
981 chunk OMPT_CODEPTR_ARG);
982}
983
987void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
988 kmp_int32 schedule, kmp_int32 *plastiter,
989 kmp_int64 *plower, kmp_int64 *pupper,
990 kmp_int64 *pupperD, kmp_int64 *pstride,
991 kmp_int64 incr, kmp_int64 chunk) {
992 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
993 pupper, pupperD, pstride, incr,
994 chunk OMPT_CODEPTR_ARG);
995}
996
1000void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
1001 kmp_int32 schedule, kmp_int32 *plastiter,
1002 kmp_uint64 *plower, kmp_uint64 *pupper,
1003 kmp_uint64 *pupperD, kmp_int64 *pstride,
1004 kmp_int64 incr, kmp_int64 chunk) {
1005 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
1006 pupper, pupperD, pstride, incr,
1007 chunk OMPT_CODEPTR_ARG);
1008}
1013//------------------------------------------------------------------------------
1014// Auxiliary routines for Distribute Parallel Loop construct implementation
1015// Transfer call to template< type T >
1016// __kmp_team_static_init( ident_t *loc, int gtid,
1017// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
1018
1039void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1040 kmp_int32 *p_lb, kmp_int32 *p_ub,
1041 kmp_int32 *p_st, kmp_int32 incr,
1042 kmp_int32 chunk) {
1043 KMP_DEBUG_ASSERT(__kmp_init_serial);
1044 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1045 chunk);
1046}
1047
1051void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1052 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
1053 kmp_int32 *p_st, kmp_int32 incr,
1054 kmp_int32 chunk) {
1055 KMP_DEBUG_ASSERT(__kmp_init_serial);
1056 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1057 chunk);
1058}
1059
1063void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1064 kmp_int64 *p_lb, kmp_int64 *p_ub,
1065 kmp_int64 *p_st, kmp_int64 incr,
1066 kmp_int64 chunk) {
1067 KMP_DEBUG_ASSERT(__kmp_init_serial);
1068 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1069 chunk);
1070}
1071
1075void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1076 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1077 kmp_int64 *p_st, kmp_int64 incr,
1078 kmp_int64 chunk) {
1079 KMP_DEBUG_ASSERT(__kmp_init_serial);
1080 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1081 chunk);
1082}
1087} // extern "C"
@ KMP_IDENT_KMPC
Definition: kmp.h:196
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:214
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:216
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:218
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
Definition: kmp_stats.h:895
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:908
void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:900
void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:987
void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:883
void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1063
void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:974
void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1075
void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:916
void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:1051
void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:961
void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:867
void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
Definition: kmp_sched.cpp:1039
void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
Definition: kmp_sched.cpp:1000
@ kmp_sch_static
Definition: kmp.h:360
@ kmp_distribute_static
Definition: kmp.h:396
@ kmp_ord_upper
Definition: kmp.h:392
Definition: kmp.h:234
char const * psource
Definition: kmp.h:244
kmp_int32 flags
Definition: kmp.h:236