LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
36 struct flag_properties {
37  unsigned int type : 16;
38  unsigned int reserved : 16;
39 };
40 
41 template <enum flag_type FlagType> struct flag_traits {};
42 
43 template <> struct flag_traits<flag32> {
44  typedef kmp_uint32 flag_t;
45  static const flag_type t = flag32;
46  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
47  static inline flag_t test_then_add4(volatile flag_t *f) {
48  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
49  }
50  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
51  return KMP_TEST_THEN_OR32(f, v);
52  }
53  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
54  return KMP_TEST_THEN_AND32(f, v);
55  }
56 };
57 
58 template <> struct flag_traits<atomic_flag64> {
59  typedef kmp_uint64 flag_t;
60  static const flag_type t = atomic_flag64;
61  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
62  static inline flag_t test_then_add4(volatile flag_t *f) {
63  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
64  }
65  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
66  return KMP_TEST_THEN_OR64(f, v);
67  }
68  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
69  return KMP_TEST_THEN_AND64(f, v);
70  }
71 };
72 
73 template <> struct flag_traits<flag64> {
74  typedef kmp_uint64 flag_t;
75  static const flag_type t = flag64;
76  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
77  static inline flag_t test_then_add4(volatile flag_t *f) {
78  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
79  }
80  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
81  return KMP_TEST_THEN_OR64(f, v);
82  }
83  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
84  return KMP_TEST_THEN_AND64(f, v);
85  }
86 };
87 
88 template <> struct flag_traits<flag_oncore> {
89  typedef kmp_uint64 flag_t;
90  static const flag_type t = flag_oncore;
91  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
92  static inline flag_t test_then_add4(volatile flag_t *f) {
93  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
94  }
95  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
96  return KMP_TEST_THEN_OR64(f, v);
97  }
98  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
99  return KMP_TEST_THEN_AND64(f, v);
100  }
101 };
102 
104 template <flag_type FlagType> class kmp_flag {
105 protected:
106  flag_properties t;
107  kmp_info_t *waiting_threads[1];
108  kmp_uint32 num_waiting_threads;
109  std::atomic<bool> *sleepLoc;
110 
111 public:
112  typedef flag_traits<FlagType> traits_type;
113  kmp_flag() : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(nullptr) {}
114  kmp_flag(int nwaiters)
115  : t({FlagType, 0U}), num_waiting_threads(nwaiters), sleepLoc(nullptr) {}
116  kmp_flag(std::atomic<bool> *sloc)
117  : t({FlagType, 0U}), num_waiting_threads(0), sleepLoc(sloc) {}
119  flag_type get_type() { return (flag_type)(t.type); }
120 
123  kmp_info_t *get_waiter(kmp_uint32 i) {
124  KMP_DEBUG_ASSERT(i < num_waiting_threads);
125  return waiting_threads[i];
126  }
128  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
131  void set_waiter(kmp_info_t *thr) {
132  waiting_threads[0] = thr;
134  }
135  enum barrier_type get_bt() { return bs_last_barrier; }
136 };
137 
139 template <typename PtrType, flag_type FlagType, bool Sleepable>
140 class kmp_flag_native : public kmp_flag<FlagType> {
141 protected:
142  volatile PtrType *loc;
143  PtrType checker;
144  typedef flag_traits<FlagType> traits_type;
145 
146 public:
147  typedef PtrType flag_t;
148  kmp_flag_native(volatile PtrType *p) : kmp_flag<FlagType>(), loc(p) {}
149  kmp_flag_native(volatile PtrType *p, kmp_info_t *thr)
150  : kmp_flag<FlagType>(1), loc(p) {
151  this->waiting_threads[0] = thr;
152  }
153  kmp_flag_native(volatile PtrType *p, PtrType c)
154  : kmp_flag<FlagType>(), loc(p), checker(c) {}
155  kmp_flag_native(volatile PtrType *p, PtrType c, std::atomic<bool> *sloc)
156  : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
157  virtual ~kmp_flag_native() {}
158  void *operator new(size_t size) { return __kmp_allocate(size); }
159  void operator delete(void *p) { __kmp_free(p); }
160  volatile PtrType *get() { return loc; }
161  void *get_void_p() { return RCAST(void *, CCAST(PtrType *, loc)); }
162  void set(volatile PtrType *new_loc) { loc = new_loc; }
163  PtrType load() { return *loc; }
164  void store(PtrType val) { *loc = val; }
166  virtual bool done_check() {
167  if (Sleepable && !(this->sleepLoc))
168  return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
169  checker;
170  else
171  return traits_type::tcr(*(this->get())) == checker;
172  }
175  virtual bool done_check_val(PtrType old_loc) { return old_loc == checker; }
181  virtual bool notdone_check() {
182  return traits_type::tcr(*(this->get())) != checker;
183  }
187  (void)traits_type::test_then_add4((volatile PtrType *)this->get());
188  }
192  PtrType set_sleeping() {
193  if (this->sleepLoc) {
194  this->sleepLoc->store(true);
195  return *(this->get());
196  }
197  return traits_type::test_then_or((volatile PtrType *)this->get(),
198  KMP_BARRIER_SLEEP_STATE);
199  }
203  void unset_sleeping() {
204  if (this->sleepLoc) {
205  this->sleepLoc->store(false);
206  return;
207  }
208  traits_type::test_then_and((volatile PtrType *)this->get(),
209  ~KMP_BARRIER_SLEEP_STATE);
210  }
213  bool is_sleeping_val(PtrType old_loc) {
214  if (this->sleepLoc)
215  return this->sleepLoc->load();
216  return old_loc & KMP_BARRIER_SLEEP_STATE;
217  }
219  bool is_sleeping() {
220  if (this->sleepLoc)
221  return this->sleepLoc->load();
222  return is_sleeping_val(*(this->get()));
223  }
224  bool is_any_sleeping() {
225  if (this->sleepLoc)
226  return this->sleepLoc->load();
227  return is_sleeping_val(*(this->get()));
228  }
229  kmp_uint8 *get_stolen() { return NULL; }
230 };
231 
233 template <typename PtrType, flag_type FlagType, bool Sleepable>
234 class kmp_flag_atomic : public kmp_flag<FlagType> {
235 protected:
236  std::atomic<PtrType> *loc;
237  PtrType checker;
238 public:
239  typedef flag_traits<FlagType> traits_type;
240  typedef PtrType flag_t;
241  kmp_flag_atomic(std::atomic<PtrType> *p) : kmp_flag<FlagType>(), loc(p) {}
242  kmp_flag_atomic(std::atomic<PtrType> *p, kmp_info_t *thr)
243  : kmp_flag<FlagType>(1), loc(p) {
244  this->waiting_threads[0] = thr;
245  }
246  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c)
247  : kmp_flag<FlagType>(), loc(p), checker(c) {}
248  kmp_flag_atomic(std::atomic<PtrType> *p, PtrType c, std::atomic<bool> *sloc)
249  : kmp_flag<FlagType>(sloc), loc(p), checker(c) {}
251  std::atomic<PtrType> *get() { return loc; }
253  void *get_void_p() { return RCAST(void *, loc); }
255  void set(std::atomic<PtrType> *new_loc) { loc = new_loc; }
257  PtrType load() { return loc->load(std::memory_order_acquire); }
259  void store(PtrType val) { loc->store(val, std::memory_order_release); }
261  bool done_check() {
262  if (Sleepable && !(this->sleepLoc))
263  return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
264  else
265  return this->load() == checker;
266  }
269  bool done_check_val(PtrType old_loc) { return old_loc == checker; }
275  bool notdone_check() { return this->load() != checker; }
278  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
282  PtrType set_sleeping() {
283  if (this->sleepLoc) {
284  this->sleepLoc->store(true);
285  return *(this->get());
286  }
287  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
288  }
292  void unset_sleeping() {
293  if (this->sleepLoc) {
294  this->sleepLoc->store(false);
295  return;
296  }
297  KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
298  }
301  bool is_sleeping_val(PtrType old_loc) {
302  if (this->sleepLoc)
303  return this->sleepLoc->load();
304  return old_loc & KMP_BARRIER_SLEEP_STATE;
305  }
307  bool is_sleeping() {
308  if (this->sleepLoc)
309  return this->sleepLoc->load();
310  return is_sleeping_val(this->load());
311  }
312  bool is_any_sleeping() {
313  if (this->sleepLoc)
314  return this->sleepLoc->load();
315  return is_sleeping_val(this->load());
316  }
317  kmp_uint8 *get_stolen() { return NULL; }
318 };
319 
320 #if OMPT_SUPPORT
321 OMPT_NOINLINE
322 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
323  ompt_state_t ompt_state,
324  ompt_data_t *tId) {
325  int ds_tid = this_thr->th.th_info.ds.ds_tid;
326  if (ompt_state == ompt_state_wait_barrier_implicit) {
327  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
328 #if OMPT_OPTIONAL
329  void *codeptr = NULL;
330  if (ompt_enabled.ompt_callback_sync_region_wait) {
331  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
332  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
333  codeptr);
334  }
335  if (ompt_enabled.ompt_callback_sync_region) {
336  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
337  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
338  codeptr);
339  }
340 #endif
341  if (!KMP_MASTER_TID(ds_tid)) {
342  if (ompt_enabled.ompt_callback_implicit_task) {
343  int flags = this_thr->th.ompt_thread_info.parallel_flags;
344  flags = (flags & ompt_parallel_league) ? ompt_task_initial
345  : ompt_task_implicit;
346  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
347  ompt_scope_end, NULL, tId, 0, ds_tid, flags);
348  }
349  // return to idle state
350  this_thr->th.ompt_thread_info.state = ompt_state_idle;
351  } else {
352  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
353  }
354  }
355 }
356 #endif
357 
358 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
359  __kmp_wait_* must make certain that another thread calls __kmp_release
360  to wake it back up to prevent deadlocks!
361 
362  NOTE: We may not belong to a team at this point. */
363 template <class C, bool final_spin, bool Cancellable = false,
364  bool Sleepable = true>
365 static inline bool
366 __kmp_wait_template(kmp_info_t *this_thr,
367  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
368 #if USE_ITT_BUILD && USE_ITT_NOTIFY
369  volatile void *spin = flag->get();
370 #endif
371  kmp_uint32 spins;
372  int th_gtid;
373  int tasks_completed = FALSE;
374 #if !KMP_USE_MONITOR
375  kmp_uint64 poll_count;
376  kmp_uint64 hibernate_goal;
377 #else
378  kmp_uint32 hibernate;
379 #endif
380  kmp_uint64 time;
381 
382  KMP_FSYNC_SPIN_INIT(spin, NULL);
383  if (flag->done_check()) {
384  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
385  return false;
386  }
387  th_gtid = this_thr->th.th_info.ds.ds_gtid;
388  if (Cancellable) {
389  kmp_team_t *team = this_thr->th.th_team;
390  if (team && team->t.t_cancel_request == cancel_parallel)
391  return true;
392  }
393 #if KMP_OS_UNIX
394  if (final_spin)
395  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
396 #endif
397  KA_TRACE(20,
398  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
399 #if KMP_STATS_ENABLED
400  stats_state_e thread_state = KMP_GET_THREAD_STATE();
401 #endif
402 
403 /* OMPT Behavior:
404 THIS function is called from
405  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
406  these have join / fork behavior
407 
408  In these cases, we don't change the state or trigger events in THIS
409 function.
410  Events are triggered in the calling code (__kmp_barrier):
411 
412  state := ompt_state_overhead
413  barrier-begin
414  barrier-wait-begin
415  state := ompt_state_wait_barrier
416  call join-barrier-implementation (finally arrive here)
417  {}
418  call fork-barrier-implementation (finally arrive here)
419  {}
420  state := ompt_state_overhead
421  barrier-wait-end
422  barrier-end
423  state := ompt_state_work_parallel
424 
425 
426  __kmp_fork_barrier (after thread creation, before executing implicit task)
427  call fork-barrier-implementation (finally arrive here)
428  {} // worker arrive here with state = ompt_state_idle
429 
430 
431  __kmp_join_barrier (implicit barrier at end of parallel region)
432  state := ompt_state_barrier_implicit
433  barrier-begin
434  barrier-wait-begin
435  call join-barrier-implementation (finally arrive here
436 final_spin=FALSE)
437  {
438  }
439  __kmp_fork_barrier (implicit barrier at end of parallel region)
440  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
441 
442  Worker after task-team is finished:
443  barrier-wait-end
444  barrier-end
445  implicit-task-end
446  idle-begin
447  state := ompt_state_idle
448 
449  Before leaving, if state = ompt_state_idle
450  idle-end
451  state := ompt_state_overhead
452 */
453 #if OMPT_SUPPORT
454  ompt_state_t ompt_entry_state;
455  ompt_data_t *tId;
456  if (ompt_enabled.enabled) {
457  ompt_entry_state = this_thr->th.ompt_thread_info.state;
458  if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
459  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
460  ompt_lw_taskteam_t *team = NULL;
461  if (this_thr->th.th_team)
462  team = this_thr->th.th_team->t.ompt_serialized_team_info;
463  if (team) {
464  tId = &(team->ompt_task_info.task_data);
465  } else {
466  tId = OMPT_CUR_TASK_DATA(this_thr);
467  }
468  } else {
469  tId = &(this_thr->th.ompt_thread_info.task_data);
470  }
471  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
472  this_thr->th.th_task_team == NULL)) {
473  // implicit task is done. Either no taskqueue, or task-team finished
474  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
475  }
476  }
477 #endif
478 
479  KMP_INIT_YIELD(spins); // Setup for waiting
480  KMP_INIT_BACKOFF(time);
481 
482  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
483  __kmp_pause_status == kmp_soft_paused) {
484 #if KMP_USE_MONITOR
485 // The worker threads cannot rely on the team struct existing at this point.
486 // Use the bt values cached in the thread struct instead.
487 #ifdef KMP_ADJUST_BLOCKTIME
488  if (__kmp_pause_status == kmp_soft_paused ||
489  (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
490  // Force immediate suspend if not set by user and more threads than
491  // available procs
492  hibernate = 0;
493  else
494  hibernate = this_thr->th.th_team_bt_intervals;
495 #else
496  hibernate = this_thr->th.th_team_bt_intervals;
497 #endif /* KMP_ADJUST_BLOCKTIME */
498 
499  /* If the blocktime is nonzero, we want to make sure that we spin wait for
500  the entirety of the specified #intervals, plus up to one interval more.
501  This increment make certain that this thread doesn't go to sleep too
502  soon. */
503  if (hibernate != 0)
504  hibernate++;
505 
506  // Add in the current time value.
507  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
508  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
509  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
510  hibernate - __kmp_global.g.g_time.dt.t_value));
511 #else
512  if (__kmp_pause_status == kmp_soft_paused) {
513  // Force immediate suspend
514  hibernate_goal = KMP_NOW();
515  } else
516  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
517  poll_count = 0;
518  (void)poll_count;
519 #endif // KMP_USE_MONITOR
520  }
521 
522  KMP_MB();
523 
524  // Main wait spin loop
525  while (flag->notdone_check()) {
526  kmp_task_team_t *task_team = NULL;
527  if (__kmp_tasking_mode != tskm_immediate_exec) {
528  task_team = this_thr->th.th_task_team;
529  /* If the thread's task team pointer is NULL, it means one of 3 things:
530  1) A newly-created thread is first being released by
531  __kmp_fork_barrier(), and its task team has not been set up yet.
532  2) All tasks have been executed to completion.
533  3) Tasking is off for this region. This could be because we are in a
534  serialized region (perhaps the outer one), or else tasking was manually
535  disabled (KMP_TASKING=0). */
536  if (task_team != NULL) {
537  if (TCR_SYNC_4(task_team->tt.tt_active)) {
538  if (KMP_TASKING_ENABLED(task_team)) {
539  flag->execute_tasks(
540  this_thr, th_gtid, final_spin,
541  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
542  } else
543  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
544  } else {
545  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
546 #if OMPT_SUPPORT
547  // task-team is done now, other cases should be catched above
548  if (final_spin && ompt_enabled.enabled)
549  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
550 #endif
551  this_thr->th.th_task_team = NULL;
552  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
553  }
554  } else {
555  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
556  } // if
557  } // if
558 
559  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
560  if (TCR_4(__kmp_global.g.g_done)) {
561  if (__kmp_global.g.g_abort)
562  __kmp_abort_thread();
563  break;
564  }
565 
566  // If we are oversubscribed, or have waited a bit (and
567  // KMP_LIBRARY=throughput), then yield
568  KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
569 
570 #if KMP_STATS_ENABLED
571  // Check if thread has been signalled to idle state
572  // This indicates that the logical "join-barrier" has finished
573  if (this_thr->th.th_stats->isIdle() &&
574  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
575  KMP_SET_THREAD_STATE(IDLE);
576  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
577  }
578 #endif
579  // Check if the barrier surrounding this wait loop has been cancelled
580  if (Cancellable) {
581  kmp_team_t *team = this_thr->th.th_team;
582  if (team && team->t.t_cancel_request == cancel_parallel)
583  break;
584  }
585 
586  // For hidden helper thread, if task_team is nullptr, it means the main
587  // thread has not released the barrier. We cannot wait here because once the
588  // main thread releases all children barriers, all hidden helper threads are
589  // still sleeping. This leads to a problem that following configuration,
590  // such as task team sync, will not be performed such that this thread does
591  // not have task team. Usually it is not bad. However, a corner case is,
592  // when the first task encountered is an untied task, the check in
593  // __kmp_task_alloc will crash because it uses the task team pointer without
594  // checking whether it is nullptr. It is probably under some kind of
595  // assumption.
596  if (task_team && KMP_HIDDEN_HELPER_WORKER_THREAD(th_gtid) &&
597  !TCR_4(__kmp_hidden_helper_team_done)) {
598  // If there is still hidden helper tasks to be executed, the hidden helper
599  // thread will not enter a waiting status.
600  if (KMP_ATOMIC_LD_ACQ(&__kmp_unexecuted_hidden_helper_tasks) == 0) {
601  __kmp_hidden_helper_worker_thread_wait();
602  }
603  continue;
604  }
605 
606  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
607  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
608  __kmp_pause_status != kmp_soft_paused)
609  continue;
610 
611  // Don't suspend if there is a likelihood of new tasks being spawned.
612  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
613  continue;
614 
615 #if KMP_USE_MONITOR
616  // If we have waited a bit more, fall asleep
617  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
618  continue;
619 #else
620  if (KMP_BLOCKING(hibernate_goal, poll_count++))
621  continue;
622 #endif
623  // Don't suspend if wait loop designated non-sleepable
624  // in template parameters
625  if (!Sleepable)
626  continue;
627 
628 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
629  if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
630  KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
631  flag->mwait(th_gtid);
632  } else {
633 #endif
634  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
635 #if KMP_OS_UNIX
636  if (final_spin)
637  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
638 #endif
639  flag->suspend(th_gtid);
640 #if KMP_OS_UNIX
641  if (final_spin)
642  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
643 #endif
644 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
645  }
646 #endif
647 
648  if (TCR_4(__kmp_global.g.g_done)) {
649  if (__kmp_global.g.g_abort)
650  __kmp_abort_thread();
651  break;
652  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
653  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
654  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
655  }
656  // TODO: If thread is done with work and times out, disband/free
657  }
658 
659 #if OMPT_SUPPORT
660  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
661  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
662 #if OMPT_OPTIONAL
663  if (final_spin) {
664  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
665  ompt_exit_state = this_thr->th.ompt_thread_info.state;
666  }
667 #endif
668  if (ompt_exit_state == ompt_state_idle) {
669  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
670  }
671  }
672 #endif
673 #if KMP_STATS_ENABLED
674  // If we were put into idle state, pop that off the state stack
675  if (KMP_GET_THREAD_STATE() == IDLE) {
676  KMP_POP_PARTITIONED_TIMER();
677  KMP_SET_THREAD_STATE(thread_state);
678  this_thr->th.th_stats->resetIdleFlag();
679  }
680 #endif
681 
682 #if KMP_OS_UNIX
683  if (final_spin)
684  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
685 #endif
686  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
687  if (Cancellable) {
688  kmp_team_t *team = this_thr->th.th_team;
689  if (team && team->t.t_cancel_request == cancel_parallel) {
690  if (tasks_completed) {
691  // undo the previous decrement of unfinished_threads so that the
692  // thread can decrement at the join barrier with no problem
693  kmp_task_team_t *task_team = this_thr->th.th_task_team;
694  std::atomic<kmp_int32> *unfinished_threads =
695  &(task_team->tt.tt_unfinished_threads);
696  KMP_ATOMIC_INC(unfinished_threads);
697  }
698  return true;
699  }
700  }
701  return false;
702 }
703 
704 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
705 // Set up a monitor on the flag variable causing the calling thread to wait in
706 // a less active state until the flag variable is modified.
707 template <class C>
708 static inline void __kmp_mwait_template(int th_gtid, C *flag) {
709  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
710  kmp_info_t *th = __kmp_threads[th_gtid];
711 
712  KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
713  flag->get()));
714 
715  // User-level mwait is available
716  KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
717 
718  __kmp_suspend_initialize_thread(th);
719  __kmp_lock_suspend_mx(th);
720 
721  volatile void *spin = flag->get();
722  void *cacheline = (void *)(kmp_uintptr_t(spin) & ~(CACHE_LINE - 1));
723 
724  if (!flag->done_check()) {
725  // Mark thread as no longer active
726  th->th.th_active = FALSE;
727  if (th->th.th_active_in_pool) {
728  th->th.th_active_in_pool = FALSE;
729  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
730  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
731  }
732  flag->set_sleeping();
733  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
734 #if KMP_HAVE_UMWAIT
735  if (__kmp_umwait_enabled) {
736  __kmp_umonitor(cacheline);
737  }
738 #elif KMP_HAVE_MWAIT
739  if (__kmp_mwait_enabled) {
740  __kmp_mm_monitor(cacheline, 0, 0);
741  }
742 #endif
743  // To avoid a race, check flag between 'monitor' and 'mwait'. A write to
744  // the address could happen after the last time we checked and before
745  // monitoring started, in which case monitor can't detect the change.
746  if (flag->done_check())
747  flag->unset_sleeping();
748  else {
749  // if flag changes here, wake-up happens immediately
750  TCW_PTR(th->th.th_sleep_loc, (void *)flag);
751  th->th.th_sleep_loc_type = flag->get_type();
752  __kmp_unlock_suspend_mx(th);
753  KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
754 #if KMP_HAVE_UMWAIT
755  if (__kmp_umwait_enabled) {
756  __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
757  }
758 #elif KMP_HAVE_MWAIT
759  if (__kmp_mwait_enabled) {
760  __kmp_mm_mwait(0, __kmp_mwait_hints);
761  }
762 #endif
763  KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
764  __kmp_lock_suspend_mx(th);
765  // Clean up sleep info; doesn't matter how/why this thread stopped waiting
766  if (flag->is_sleeping())
767  flag->unset_sleeping();
768  TCW_PTR(th->th.th_sleep_loc, NULL);
769  th->th.th_sleep_loc_type = flag_unset;
770  }
771  // Mark thread as active again
772  th->th.th_active = TRUE;
773  if (TCR_4(th->th.th_in_pool)) {
774  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
775  th->th.th_active_in_pool = TRUE;
776  }
777  } // Drop out to main wait loop to check flag, handle tasks, etc.
778  __kmp_unlock_suspend_mx(th);
779  KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid));
780 }
781 #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
782 
783 /* Release any threads specified as waiting on the flag by releasing the flag
784  and resume the waiting thread if indicated by the sleep bit(s). A thread that
785  calls __kmp_wait_template must call this function to wake up the potentially
786  sleeping thread and prevent deadlocks! */
787 template <class C> static inline void __kmp_release_template(C *flag) {
788 #ifdef KMP_DEBUG
789  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
790 #endif
791  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
792  KMP_DEBUG_ASSERT(flag->get());
793  KMP_FSYNC_RELEASING(flag->get_void_p());
794 
795  flag->internal_release();
796 
797  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
798  flag->load()));
799 
800  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
801  // Only need to check sleep stuff if infinite block time not set.
802  // Are *any* threads waiting on flag sleeping?
803  if (flag->is_any_sleeping()) {
804  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
805  // if sleeping waiter exists at i, sets current_waiter to i inside flag
806  kmp_info_t *waiter = flag->get_waiter(i);
807  if (waiter) {
808  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
809  // Wake up thread if needed
810  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
811  "flag(%p) set\n",
812  gtid, wait_gtid, flag->get()));
813  flag->resume(wait_gtid); // unsets flag's current_waiter when done
814  }
815  }
816  }
817  }
818 }
819 
820 template <bool Cancellable, bool Sleepable>
821 class kmp_flag_32 : public kmp_flag_atomic<kmp_uint32, flag32, Sleepable> {
822 public:
823  kmp_flag_32(std::atomic<kmp_uint32> *p)
824  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p) {}
825  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
826  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, thr) {}
827  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
828  : kmp_flag_atomic<kmp_uint32, flag32, Sleepable>(p, c) {}
829  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
830 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
831  void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
832 #endif
833  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
834  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
835  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
836  kmp_int32 is_constrained) {
837  return __kmp_execute_tasks_32(
838  this_thr, gtid, this, final_spin,
839  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
840  }
841  bool wait(kmp_info_t *this_thr,
842  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
843  if (final_spin)
844  return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
845  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
846  else
847  return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
848  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
849  }
850  void release() { __kmp_release_template(this); }
851  flag_type get_ptr_type() { return flag32; }
852 };
853 
854 template <bool Cancellable, bool Sleepable>
855 class kmp_flag_64 : public kmp_flag_native<kmp_uint64, flag64, Sleepable> {
856 public:
857  kmp_flag_64(volatile kmp_uint64 *p)
858  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p) {}
859  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
860  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, thr) {}
861  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
862  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c) {}
863  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c, std::atomic<bool> *loc)
864  : kmp_flag_native<kmp_uint64, flag64, Sleepable>(p, c, loc) {}
865  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
866 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
867  void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
868 #endif
869  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
870  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
871  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
872  kmp_int32 is_constrained) {
873  return __kmp_execute_tasks_64(
874  this_thr, gtid, this, final_spin,
875  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
876  }
877  bool wait(kmp_info_t *this_thr,
878  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
879  if (final_spin)
880  return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
881  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
882  else
883  return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
884  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
885  }
886  void release() { __kmp_release_template(this); }
887  flag_type get_ptr_type() { return flag64; }
888 };
889 
890 template <bool Cancellable, bool Sleepable>
891 class kmp_atomic_flag_64
892  : public kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable> {
893 public:
894  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p)
895  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p) {}
896  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_info_t *thr)
897  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, thr) {}
898  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c)
899  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c) {}
900  kmp_atomic_flag_64(std::atomic<kmp_uint64> *p, kmp_uint64 c,
901  std::atomic<bool> *loc)
902  : kmp_flag_atomic<kmp_uint64, atomic_flag64, Sleepable>(p, c, loc) {}
903  void suspend(int th_gtid) { __kmp_atomic_suspend_64(th_gtid, this); }
904  void mwait(int th_gtid) { __kmp_atomic_mwait_64(th_gtid, this); }
905  void resume(int th_gtid) { __kmp_atomic_resume_64(th_gtid, this); }
906  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
907  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
908  kmp_int32 is_constrained) {
909  return __kmp_atomic_execute_tasks_64(
910  this_thr, gtid, this, final_spin,
911  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
912  }
913  bool wait(kmp_info_t *this_thr,
914  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
915  if (final_spin)
916  return __kmp_wait_template<kmp_atomic_flag_64, TRUE, Cancellable,
917  Sleepable>(
918  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
919  else
920  return __kmp_wait_template<kmp_atomic_flag_64, FALSE, Cancellable,
921  Sleepable>(
922  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
923  }
924  void release() { __kmp_release_template(this); }
925  flag_type get_ptr_type() { return atomic_flag64; }
926 };
927 
928 // Hierarchical 64-bit on-core barrier instantiation
929 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64, flag_oncore, false> {
930  kmp_uint32 offset;
931  bool flag_switch;
932  enum barrier_type bt;
933  kmp_info_t *this_thr;
934 #if USE_ITT_BUILD
935  void *itt_sync_obj;
936 #endif
937  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
938  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
939  }
940 
941 public:
942  kmp_flag_oncore(volatile kmp_uint64 *p)
943  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), flag_switch(false) {
944  }
945  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
946  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p), offset(idx),
947  flag_switch(false),
948  bt(bs_last_barrier) USE_ITT_BUILD_ARG(itt_sync_obj(nullptr)) {}
949  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
950  enum barrier_type bar_t,
951  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
952  : kmp_flag_native<kmp_uint64, flag_oncore, false>(p, c), offset(idx),
953  flag_switch(false), bt(bar_t),
954  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
955  virtual ~kmp_flag_oncore() override {}
956  void *operator new(size_t size) { return __kmp_allocate(size); }
957  void operator delete(void *p) { __kmp_free(p); }
958  bool done_check_val(kmp_uint64 old_loc) override {
959  return byteref(&old_loc, offset) == checker;
960  }
961  bool done_check() override { return done_check_val(*get()); }
962  bool notdone_check() override {
963  // Calculate flag_switch
964  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
965  flag_switch = true;
966  if (byteref(get(), offset) != 1 && !flag_switch)
967  return true;
968  else if (flag_switch) {
969  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
970  kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
971  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
972  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
973  }
974  return false;
975  }
976  void internal_release() {
977  // Other threads can write their own bytes simultaneously.
978  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
979  byteref(get(), offset) = 1;
980  } else {
981  kmp_uint64 mask = 0;
982  byteref(&mask, offset) = 1;
983  KMP_TEST_THEN_OR64(get(), mask);
984  }
985  }
986  void wait(kmp_info_t *this_thr, int final_spin) {
987  if (final_spin)
988  __kmp_wait_template<kmp_flag_oncore, TRUE>(
989  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
990  else
991  __kmp_wait_template<kmp_flag_oncore, FALSE>(
992  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
993  }
994  void release() { __kmp_release_template(this); }
995  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
996 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
997  void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
998 #endif
999  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
1000  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
1001  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
1002  kmp_int32 is_constrained) {
1003 #if OMPD_SUPPORT
1004  int ret = __kmp_execute_tasks_oncore(
1005  this_thr, gtid, this, final_spin,
1006  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1007  if (ompd_state & OMPD_ENABLE_BP)
1008  ompd_bp_task_end();
1009  return ret;
1010 #else
1011  return __kmp_execute_tasks_oncore(
1012  this_thr, gtid, this, final_spin,
1013  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1014 #endif
1015  }
1016  enum barrier_type get_bt() { return bt; }
1017  flag_type get_ptr_type() { return flag_oncore; }
1018 };
1019 
1020 static inline void __kmp_null_resume_wrapper(kmp_info_t *thr) {
1021  int gtid = __kmp_gtid_from_thread(thr);
1022  void *flag = CCAST(void *, thr->th.th_sleep_loc);
1023  flag_type type = thr->th.th_sleep_loc_type;
1024  if (!flag)
1025  return;
1026  // Attempt to wake up a thread: examine its type and call appropriate template
1027  switch (type) {
1028  case flag32:
1029  __kmp_resume_32(gtid, RCAST(kmp_flag_32<> *, flag));
1030  break;
1031  case flag64:
1032  __kmp_resume_64(gtid, RCAST(kmp_flag_64<> *, flag));
1033  break;
1034  case atomic_flag64:
1035  __kmp_atomic_resume_64(gtid, RCAST(kmp_atomic_flag_64<> *, flag));
1036  break;
1037  case flag_oncore:
1038  __kmp_resume_oncore(gtid, RCAST(kmp_flag_oncore *, flag));
1039  break;
1040 #ifdef KMP_DEBUG
1041  case flag_unset:
1042  KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d is unset\n", type));
1043  break;
1044  default:
1045  KF_TRACE(100, ("__kmp_null_resume_wrapper: flag type %d does not match any "
1046  "known flag type\n",
1047  type));
1048 #endif
1049  }
1050 }
1051 
1056 #endif // KMP_WAIT_RELEASE_H
std::atomic< PtrType > * loc
void store(PtrType val)
bool is_sleeping_val(PtrType old_loc)
PtrType set_sleeping()
bool done_check_val(PtrType old_loc)
void set(std::atomic< PtrType > *new_loc)
std::atomic< PtrType > * get()
bool is_sleeping_val(PtrType old_loc)
virtual bool notdone_check()
virtual bool done_check_val(PtrType old_loc)
virtual bool done_check()
PtrType set_sleeping()
flag_properties t
kmp_uint32 num_waiting_threads
kmp_info_t * waiting_threads[1]
flag_type get_type()
kmp_uint32 get_num_waiters()
kmp_info_t * get_waiter(kmp_uint32 i)
void set_waiter(kmp_info_t *thr)
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63