| /* |
| * kmp_tasking.cpp -- OpenMP 3.0 tasking support. |
| */ |
| |
| //===----------------------------------------------------------------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "kmp.h" |
| #include "kmp_i18n.h" |
| #include "kmp_itt.h" |
| #include "kmp_stats.h" |
| #include "kmp_wait_release.h" |
| #include "kmp_taskdeps.h" |
| |
| #if OMPT_SUPPORT |
| #include "ompt-specific.h" |
| #endif |
| |
| #include "tsan_annotations.h" |
| |
| /* forward declaration */ |
| static void __kmp_enable_tasking(kmp_task_team_t *task_team, |
| kmp_info_t *this_thr); |
| static void __kmp_alloc_task_deque(kmp_info_t *thread, |
| kmp_thread_data_t *thread_data); |
| static int __kmp_realloc_task_threads_data(kmp_info_t *thread, |
| kmp_task_team_t *task_team); |
| static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask); |
| |
| #ifdef BUILD_TIED_TASK_STACK |
| |
| // __kmp_trace_task_stack: print the tied tasks from the task stack in order |
| // from top do bottom |
| // |
| // gtid: global thread identifier for thread containing stack |
| // thread_data: thread data for task team thread containing stack |
| // threshold: value above which the trace statement triggers |
| // location: string identifying call site of this function (for trace) |
| static void __kmp_trace_task_stack(kmp_int32 gtid, |
| kmp_thread_data_t *thread_data, |
| int threshold, char *location) { |
| kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; |
| kmp_taskdata_t **stack_top = task_stack->ts_top; |
| kmp_int32 entries = task_stack->ts_entries; |
| kmp_taskdata_t *tied_task; |
| |
| KA_TRACE( |
| threshold, |
| ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " |
| "first_block = %p, stack_top = %p \n", |
| location, gtid, entries, task_stack->ts_first_block, stack_top)); |
| |
| KMP_DEBUG_ASSERT(stack_top != NULL); |
| KMP_DEBUG_ASSERT(entries > 0); |
| |
| while (entries != 0) { |
| KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]); |
| // fix up ts_top if we need to pop from previous block |
| if (entries & TASK_STACK_INDEX_MASK == 0) { |
| kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top); |
| |
| stack_block = stack_block->sb_prev; |
| stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE]; |
| } |
| |
| // finish bookkeeping |
| stack_top--; |
| entries--; |
| |
| tied_task = *stack_top; |
| |
| KMP_DEBUG_ASSERT(tied_task != NULL); |
| KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); |
| |
| KA_TRACE(threshold, |
| ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " |
| "stack_top=%p, tied_task=%p\n", |
| location, gtid, entries, stack_top, tied_task)); |
| } |
| KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]); |
| |
| KA_TRACE(threshold, |
| ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n", |
| location, gtid)); |
| } |
| |
| // __kmp_init_task_stack: initialize the task stack for the first time |
| // after a thread_data structure is created. |
| // It should not be necessary to do this again (assuming the stack works). |
| // |
| // gtid: global thread identifier of calling thread |
| // thread_data: thread data for task team thread containing stack |
| static void __kmp_init_task_stack(kmp_int32 gtid, |
| kmp_thread_data_t *thread_data) { |
| kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; |
| kmp_stack_block_t *first_block; |
| |
| // set up the first block of the stack |
| first_block = &task_stack->ts_first_block; |
| task_stack->ts_top = (kmp_taskdata_t **)first_block; |
| memset((void *)first_block, '\0', |
| TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *)); |
| |
| // initialize the stack to be empty |
| task_stack->ts_entries = TASK_STACK_EMPTY; |
| first_block->sb_next = NULL; |
| first_block->sb_prev = NULL; |
| } |
| |
| // __kmp_free_task_stack: free the task stack when thread_data is destroyed. |
| // |
| // gtid: global thread identifier for calling thread |
| // thread_data: thread info for thread containing stack |
| static void __kmp_free_task_stack(kmp_int32 gtid, |
| kmp_thread_data_t *thread_data) { |
| kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; |
| kmp_stack_block_t *stack_block = &task_stack->ts_first_block; |
| |
| KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY); |
| // free from the second block of the stack |
| while (stack_block != NULL) { |
| kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL; |
| |
| stack_block->sb_next = NULL; |
| stack_block->sb_prev = NULL; |
| if (stack_block != &task_stack->ts_first_block) { |
| __kmp_thread_free(thread, |
| stack_block); // free the block, if not the first |
| } |
| stack_block = next_block; |
| } |
| // initialize the stack to be empty |
| task_stack->ts_entries = 0; |
| task_stack->ts_top = NULL; |
| } |
| |
| // __kmp_push_task_stack: Push the tied task onto the task stack. |
| // Grow the stack if necessary by allocating another block. |
| // |
| // gtid: global thread identifier for calling thread |
| // thread: thread info for thread containing stack |
| // tied_task: the task to push on the stack |
| static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread, |
| kmp_taskdata_t *tied_task) { |
| // GEH - need to consider what to do if tt_threads_data not allocated yet |
| kmp_thread_data_t *thread_data = |
| &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)]; |
| kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; |
| |
| if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) { |
| return; // Don't push anything on stack if team or team tasks are serialized |
| } |
| |
| KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); |
| KMP_DEBUG_ASSERT(task_stack->ts_top != NULL); |
| |
| KA_TRACE(20, |
| ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n", |
| gtid, thread, tied_task)); |
| // Store entry |
| *(task_stack->ts_top) = tied_task; |
| |
| // Do bookkeeping for next push |
| task_stack->ts_top++; |
| task_stack->ts_entries++; |
| |
| if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) { |
| // Find beginning of this task block |
| kmp_stack_block_t *stack_block = |
| (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE); |
| |
| // Check if we already have a block |
| if (stack_block->sb_next != |
| NULL) { // reset ts_top to beginning of next block |
| task_stack->ts_top = &stack_block->sb_next->sb_block[0]; |
| } else { // Alloc new block and link it up |
| kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc( |
| thread, sizeof(kmp_stack_block_t)); |
| |
| task_stack->ts_top = &new_block->sb_block[0]; |
| stack_block->sb_next = new_block; |
| new_block->sb_prev = stack_block; |
| new_block->sb_next = NULL; |
| |
| KA_TRACE( |
| 30, |
| ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n", |
| gtid, tied_task, new_block)); |
| } |
| } |
| KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, |
| tied_task)); |
| } |
| |
| // __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return |
| // the task, just check to make sure it matches the ending task passed in. |
| // |
| // gtid: global thread identifier for the calling thread |
| // thread: thread info structure containing stack |
| // tied_task: the task popped off the stack |
| // ending_task: the task that is ending (should match popped task) |
| static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread, |
| kmp_taskdata_t *ending_task) { |
| // GEH - need to consider what to do if tt_threads_data not allocated yet |
| kmp_thread_data_t *thread_data = |
| &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)]; |
| kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; |
| kmp_taskdata_t *tied_task; |
| |
| if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) { |
| // Don't pop anything from stack if team or team tasks are serialized |
| return; |
| } |
| |
| KMP_DEBUG_ASSERT(task_stack->ts_top != NULL); |
| KMP_DEBUG_ASSERT(task_stack->ts_entries > 0); |
| |
| KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, |
| thread)); |
| |
| // fix up ts_top if we need to pop from previous block |
| if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) { |
| kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top); |
| |
| stack_block = stack_block->sb_prev; |
| task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE]; |
| } |
| |
| // finish bookkeeping |
| task_stack->ts_top--; |
| task_stack->ts_entries--; |
| |
| tied_task = *(task_stack->ts_top); |
| |
| KMP_DEBUG_ASSERT(tied_task != NULL); |
| KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); |
| KMP_DEBUG_ASSERT(tied_task == ending_task); // If we built the stack correctly |
| |
| KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, |
| tied_task)); |
| return; |
| } |
| #endif /* BUILD_TIED_TASK_STACK */ |
| |
| // returns 1 if new task is allowed to execute, 0 otherwise |
| // checks Task Scheduling constraint (if requested) and |
| // mutexinoutset dependencies if any |
| static bool __kmp_task_is_allowed(int gtid, const kmp_int32 is_constrained, |
| const kmp_taskdata_t *tasknew, |
| const kmp_taskdata_t *taskcurr) { |
| if (is_constrained && (tasknew->td_flags.tiedness == TASK_TIED)) { |
| // Check if the candidate obeys the Task Scheduling Constraints (TSC) |
| // only descendant of all deferred tied tasks can be scheduled, checking |
| // the last one is enough, as it in turn is the descendant of all others |
| kmp_taskdata_t *current = taskcurr->td_last_tied; |
| KMP_DEBUG_ASSERT(current != NULL); |
| // check if the task is not suspended on barrier |
| if (current->td_flags.tasktype == TASK_EXPLICIT || |
| current->td_taskwait_thread > 0) { // <= 0 on barrier |
| kmp_int32 level = current->td_level; |
| kmp_taskdata_t *parent = tasknew->td_parent; |
| while (parent != current && parent->td_level > level) { |
| // check generation up to the level of the current task |
| parent = parent->td_parent; |
| KMP_DEBUG_ASSERT(parent != NULL); |
| } |
| if (parent != current) |
| return false; |
| } |
| } |
| // Check mutexinoutset dependencies, acquire locks |
| kmp_depnode_t *node = tasknew->td_depnode; |
| if (node && (node->dn.mtx_num_locks > 0)) { |
| for (int i = 0; i < node->dn.mtx_num_locks; ++i) { |
| KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL); |
| if (__kmp_test_lock(node->dn.mtx_locks[i], gtid)) |
| continue; |
| // could not get the lock, release previous locks |
| for (int j = i - 1; j >= 0; --j) |
| __kmp_release_lock(node->dn.mtx_locks[j], gtid); |
| return false; |
| } |
| // negative num_locks means all locks acquired successfully |
| node->dn.mtx_num_locks = -node->dn.mtx_num_locks; |
| } |
| return true; |
| } |
| |
| // __kmp_realloc_task_deque: |
| // Re-allocates a task deque for a particular thread, copies the content from |
| // the old deque and adjusts the necessary data structures relating to the |
| // deque. This operation must be done with the deque_lock being held |
| static void __kmp_realloc_task_deque(kmp_info_t *thread, |
| kmp_thread_data_t *thread_data) { |
| kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td); |
| kmp_int32 new_size = 2 * size; |
| |
| KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to " |
| "%d] for thread_data %p\n", |
| __kmp_gtid_from_thread(thread), size, new_size, thread_data)); |
| |
| kmp_taskdata_t **new_deque = |
| (kmp_taskdata_t **)__kmp_allocate(new_size * sizeof(kmp_taskdata_t *)); |
| |
| int i, j; |
| for (i = thread_data->td.td_deque_head, j = 0; j < size; |
| i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++) |
| new_deque[j] = thread_data->td.td_deque[i]; |
| |
| __kmp_free(thread_data->td.td_deque); |
| |
| thread_data->td.td_deque_head = 0; |
| thread_data->td.td_deque_tail = size; |
| thread_data->td.td_deque = new_deque; |
| thread_data->td.td_deque_size = new_size; |
| } |
| |
| // __kmp_push_task: Add a task to the thread's deque |
| static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) { |
| kmp_info_t *thread = __kmp_threads[gtid]; |
| kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); |
| kmp_task_team_t *task_team = thread->th.th_task_team; |
| kmp_int32 tid = __kmp_tid_from_gtid(gtid); |
| kmp_thread_data_t *thread_data; |
| |
| KA_TRACE(20, |
| ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata)); |
| |
| if (taskdata->td_flags.tiedness == TASK_UNTIED) { |
| // untied task needs to increment counter so that the task structure is not |
| // freed prematurely |
| kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count); |
| KMP_DEBUG_USE_VAR(counter); |
| KA_TRACE( |
| 20, |
| ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n", |
| gtid, counter, taskdata)); |
| } |
| |
| // The first check avoids building task_team thread data if serialized |
| if (taskdata->td_flags.task_serial) { |
| KA_TRACE(20, ("__kmp_push_task: T#%d team serialized; returning " |
| "TASK_NOT_PUSHED for task %p\n", |
| gtid, taskdata)); |
| return TASK_NOT_PUSHED; |
| } |
| |
| // Now that serialized tasks have returned, we can assume that we are not in |
| // immediate exec mode |
| KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); |
| if (!KMP_TASKING_ENABLED(task_team)) { |
| __kmp_enable_tasking(task_team, thread); |
| } |
| KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE); |
| KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL); |
| |
| // Find tasking deque specific to encountering thread |
| thread_data = &task_team->tt.tt_threads_data[tid]; |
| |
| // No lock needed since only owner can allocate |
| if (thread_data->td.td_deque == NULL) { |
| __kmp_alloc_task_deque(thread, thread_data); |
| } |
| |
| int locked = 0; |
| // Check if deque is full |
| if (TCR_4(thread_data->td.td_deque_ntasks) >= |
| TASK_DEQUE_SIZE(thread_data->td)) { |
| if (__kmp_enable_task_throttling && |
| __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, |
| thread->th.th_current_task)) { |
| KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning " |
| "TASK_NOT_PUSHED for task %p\n", |
| gtid, taskdata)); |
| return TASK_NOT_PUSHED; |
| } else { |
| __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); |
| locked = 1; |
| // expand deque to push the task which is not allowed to execute |
| __kmp_realloc_task_deque(thread, thread_data); |
| } |
| } |
| // Lock the deque for the task push operation |
| if (!locked) { |
| __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); |
| // Need to recheck as we can get a proxy task from thread outside of OpenMP |
| if (TCR_4(thread_data->td.td_deque_ntasks) >= |
| TASK_DEQUE_SIZE(thread_data->td)) { |
| if (__kmp_enable_task_throttling && |
| __kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata, |
| thread->th.th_current_task)) { |
| __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); |
| KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; " |
| "returning TASK_NOT_PUSHED for task %p\n", |
| gtid, taskdata)); |
| return TASK_NOT_PUSHED; |
| } else { |
| // expand deque to push the task which is not allowed to execute |
| __kmp_realloc_task_deque(thread, thread_data); |
| } |
| } |
| } |
| // Must have room since no thread can add tasks but calling thread |
| KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) < |
| TASK_DEQUE_SIZE(thread_data->td)); |
| |
| thread_data->td.td_deque[thread_data->td.td_deque_tail] = |
| taskdata; // Push taskdata |
| // Wrap index. |
| thread_data->td.td_deque_tail = |
| (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td); |
| TCW_4(thread_data->td.td_deque_ntasks, |
| TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count |
| |
| KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " |
| "task=%p ntasks=%d head=%u tail=%u\n", |
| gtid, taskdata, thread_data->td.td_deque_ntasks, |
| thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); |
| |
| __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); |
| |
| return TASK_SUCCESSFULLY_PUSHED; |
| } |
| |
| // __kmp_pop_current_task_from_thread: set up current task from called thread |
| // when team ends |
| // |
| // this_thr: thread structure to set current_task in. |
| void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) { |
| KF_TRACE(10, ("__kmp_pop_current_task_from_thread(enter): T#%d " |
| "this_thread=%p, curtask=%p, " |
| "curtask_parent=%p\n", |
| 0, this_thr, this_thr->th.th_current_task, |
| this_thr->th.th_current_task->td_parent)); |
| |
| this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent; |
| |
| KF_TRACE(10, ("__kmp_pop_current_task_from_thread(exit): T#%d " |
| "this_thread=%p, curtask=%p, " |
| "curtask_parent=%p\n", |
| 0, this_thr, this_thr->th.th_current_task, |
| this_thr->th.th_current_task->td_parent)); |
| } |
| |
| // __kmp_push_current_task_to_thread: set up current task in called thread for a |
| // new team |
| // |
| // this_thr: thread structure to set up |
| // team: team for implicit task data |
| // tid: thread within team to set up |
| void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team, |
| int tid) { |
| // current task of the thread is a parent of the new just created implicit |
| // tasks of new team |
| KF_TRACE(10, ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p " |
| "curtask=%p " |
| "parent_task=%p\n", |
| tid, this_thr, this_thr->th.th_current_task, |
| team->t.t_implicit_task_taskdata[tid].td_parent)); |
| |
| KMP_DEBUG_ASSERT(this_thr != NULL); |
| |
| if (tid == 0) { |
| if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) { |
| team->t.t_implicit_task_taskdata[0].td_parent = |
| this_thr->th.th_current_task; |
| this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0]; |
| } |
| } else { |
| team->t.t_implicit_task_taskdata[tid].td_parent = |
| team->t.t_implicit_task_taskdata[0].td_parent; |
| this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid]; |
| } |
| |
| KF_TRACE(10, ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p " |
| "curtask=%p " |
| "parent_task=%p\n", |
| tid, this_thr, this_thr->th.th_current_task, |
| team->t.t_implicit_task_taskdata[tid].td_parent)); |
| } |
| |
| // __kmp_task_start: bookkeeping for a task starting execution |
| // |
| // GTID: global thread id of calling thread |
| // task: task starting execution |
| // current_task: task suspending |
| static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task, |
| kmp_taskdata_t *current_task) { |
| kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); |
| kmp_info_t *thread = __kmp_threads[gtid]; |
| |
| KA_TRACE(10, |
| ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n", |
| gtid, taskdata, current_task)); |
| |
| KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); |
| |
| // mark currently executing task as suspended |
| // TODO: GEH - make sure root team implicit task is initialized properly. |
| // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 ); |
| current_task->td_flags.executing = 0; |
| |
| // Add task to stack if tied |
| #ifdef BUILD_TIED_TASK_STACK |
| if (taskdata->td_flags.tiedness == TASK_TIED) { |
| __kmp_push_task_stack(gtid, thread, taskdata); |
| } |
| #endif /* BUILD_TIED_TASK_STACK */ |
| |
| // mark starting task as executing and as current task |
| thread->th.th_current_task = taskdata; |
| |
| KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 || |
| taskdata->td_flags.tiedness == TASK_UNTIED); |
| KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 || |
| taskdata->td_flags.tiedness == TASK_UNTIED); |
| taskdata->td_flags.started = 1; |
| taskdata->td_flags.executing = 1; |
| KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0); |
| KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); |
| |
| // GEH TODO: shouldn't we pass some sort of location identifier here? |
| // APT: yes, we will pass location here. |
| // need to store current thread state (in a thread or taskdata structure) |
| // before setting work_state, otherwise wrong state is set after end of task |
| |
| KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata)); |
| |
| return; |
| } |
| |
| #if OMPT_SUPPORT |
| //------------------------------------------------------------------------------ |
| // __ompt_task_init: |
| // Initialize OMPT fields maintained by a task. This will only be called after |
| // ompt_start_tool, so we already know whether ompt is enabled or not. |
| |
| static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) { |
| // The calls to __ompt_task_init already have the ompt_enabled condition. |
| task->ompt_task_info.task_data.value = 0; |
| task->ompt_task_info.frame.exit_frame = ompt_data_none; |
| task->ompt_task_info.frame.enter_frame = ompt_data_none; |
| task->ompt_task_info.frame.exit_frame_flags = ompt_frame_runtime | ompt_frame_framepointer; |
| task->ompt_task_info.frame.enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer; |
| task->ompt_task_info.ndeps = 0; |
| task->ompt_task_info.deps = NULL; |
| } |
| |
| // __ompt_task_start: |
| // Build and trigger task-begin event |
| static inline void __ompt_task_start(kmp_task_t *task, |
| kmp_taskdata_t *current_task, |
| kmp_int32 gtid) { |
| kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); |
| ompt_task_status_t status = ompt_task_switch; |
| if (__kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded) { |
| status = ompt_task_yield; |
| __kmp_threads[gtid]->th.ompt_thread_info.ompt_task_yielded = 0; |
| } |
| /* let OMPT know that we're about to run this task */ |
| if (ompt_enabled.ompt_callback_task_schedule) { |
| ompt_callbacks.ompt_callback(ompt_callback_task_schedule)( |
| &(current_task->ompt_task_info.task_data), status, |
| &(taskdata->ompt_task_info.task_data)); |
| } |
| taskdata->ompt_task_info.scheduling_parent = current_task; |
| } |
| |
| // __ompt_task_finish: |
| // Build and trigger final task-schedule event |
| static inline void |
| __ompt_task_finish(kmp_task_t *task, kmp_taskdata_t *resumed_task, |
| ompt_task_status_t status = ompt_task_complete) { |
| kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); |
| if (__kmp_omp_cancellation && taskdata->td_taskgroup && |
| taskdata->td_taskgroup->cancel_request == cancel_taskgroup) { |
| status = ompt_task_cancel; |
| } |
| |
| /* let OMPT know that we're returning to the callee task */ |
| if (ompt_enabled.ompt_callback_task_schedule) { |
| ompt_callbacks.ompt_callback(ompt_callback_task_schedule)( |
| &(taskdata->ompt_task_info.task_data), status, |
| &((resumed_task ? resumed_task |
| : (taskdata->ompt_task_info.scheduling_parent |
| ? taskdata->ompt_task_info.scheduling_parent |
| : taskdata->td_parent)) |
| ->ompt_task_info.task_data)); |
| } |
| } |
| #endif |
| |
| template <bool ompt> |
| static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid, |
| kmp_task_t *task, |
| void *frame_address, |
| void *return_address) { |
| kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); |
| kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; |
| |
| KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p " |
| "current_task=%p\n", |
| gtid, loc_ref, taskdata, current_task)); |
| |
| if (taskdata->td_flags.tiedness == TASK_UNTIED) { |
| // untied task needs to increment counter so that the task structure is not |
| // freed prematurely |
| kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count); |
| KMP_DEBUG_USE_VAR(counter); |
| KA_TRACE(20, ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) " |
| "incremented for task %p\n", |
| gtid, counter, taskdata)); |
| } |
| |
| taskdata->td_flags.task_serial = |
| 1; // Execute this task immediately, not deferred. |
| __kmp_task_start(gtid, task, current_task); |
| |
| #if OMPT_SUPPORT |
| if (ompt) { |
| if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) { |
| current_task->ompt_task_info.frame.enter_frame.ptr = |
| taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address; |
| current_task->ompt_task_info.frame.enter_frame_flags = |
| taskdata->ompt_task_info.frame.exit_frame_flags = ompt_frame_application | ompt_frame_framepointer; |
| } |
| if (ompt_enabled.ompt_callback_task_create) { |
| ompt_task_info_t *parent_info = &(current_task->ompt_task_info); |
| ompt_callbacks.ompt_callback(ompt_callback_task_create)( |
| &(parent_info->task_data), &(parent_info->frame), |
| &(taskdata->ompt_task_info.task_data), |
| ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(taskdata), 0, |
| return_address); |
| } |
| __ompt_task_start(task, current_task, gtid); |
| } |
| #endif // OMPT_SUPPORT |
| |
| KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid, |
| loc_ref, taskdata)); |
| } |
| |
| #if OMPT_SUPPORT |
| OMPT_NOINLINE |
| static void __kmpc_omp_task_begin_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, |
| kmp_task_t *task, |
| void *frame_address, |
| void *return_address) { |
| __kmpc_omp_task_begin_if0_template<true>(loc_ref, gtid, task, frame_address, |
| return_address); |
| } |
| #endif // OMPT_SUPPORT |
| |
| // __kmpc_omp_task_begin_if0: report that a given serialized task has started |
| // execution |
| // |
| // loc_ref: source location information; points to beginning of task block. |
| // gtid: global thread number. |
| // task: task thunk for the started task. |
| void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, |
| kmp_task_t *task) { |
| #if OMPT_SUPPORT |
| if (UNLIKELY(ompt_enabled.enabled)) { |
| OMPT_STORE_RETURN_ADDRESS(gtid); |
| __kmpc_omp_task_begin_if0_ompt(loc_ref, gtid, task, |
| OMPT_GET_FRAME_ADDRESS(1), |
| OMPT_LOAD_RETURN_ADDRESS(gtid)); |
| return; |
| } |
| #endif |
| __kmpc_omp_task_begin_if0_template<false>(loc_ref, gtid, task, NULL, NULL); |
| } |
| |
| #ifdef TASK_UNUSED |
| // __kmpc_omp_task_begin: report that a given task has started execution |
| // NEVER GENERATED BY COMPILER, DEPRECATED!!! |
| void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) { |
| kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; |
| |
| KA_TRACE( |
| 10, |
| ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n", |
| gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task)); |
| |
| __kmp_task_start(gtid, task, current_task); |
| |
| KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid, |
| loc_ref, KMP_TASK_TO_TASKDATA(task))); |
| return; |
| } |
| #endif // TASK_UNUSED |
| |
| // __kmp_free_task: free the current task space and the space for shareds |
| // |
| // gtid: Global thread ID of calling thread |
| // taskdata: task to free |
| // thread: thread data structure of caller |
| static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata, |
| kmp_info_t *thread) { |
| KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", gtid, |
| taskdata)); |
| |
| // Check to make sure all flags and counters have the correct values |
| KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); |
| KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0); |
| KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1); |
| KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); |
| KMP_DEBUG_ASSERT(taskdata->td_allocated_child_tasks == 0 || |
| taskdata->td_flags.task_serial == 1); |
| KMP_DEBUG_ASSERT(taskdata->td_incomplete_child_tasks == 0); |
| |
| taskdata->td_flags.freed = 1; |
| ANNOTATE_HAPPENS_BEFORE(taskdata); |
| // deallocate the taskdata and shared variable blocks associated with this task |
| #if USE_FAST_MEMORY |
| __kmp_fast_free(thread, taskdata); |
| #else /* ! USE_FAST_MEMORY */ |
| __kmp_thread_free(thread, taskdata); |
| #endif |
| |
| KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata)); |
| } |
| |
| // __kmp_free_task_and_ancestors: free the current task and ancestors without |
| // children |
| // |
| // gtid: Global thread ID of calling thread |
| // taskdata: task to free |
| // thread: thread data structure of caller |
| static void __kmp_free_task_and_ancestors(kmp_int32 gtid, |
| kmp_taskdata_t *taskdata, |
| kmp_info_t *thread) { |
| // Proxy tasks must always be allowed to free their parents |
| // because they can be run in background even in serial mode. |
| kmp_int32 team_serial = |
| (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) && |
| !taskdata->td_flags.proxy; |
| KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); |
| |
| kmp_int32 children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1; |
| KMP_DEBUG_ASSERT(children >= 0); |
| |
| // Now, go up the ancestor tree to see if any ancestors can now be freed. |
| while (children == 0) { |
| kmp_taskdata_t *parent_taskdata = taskdata->td_parent; |
| |
| KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete " |
| "and freeing itself\n", |
| gtid, taskdata)); |
| |
| // --- Deallocate my ancestor task --- |
| __kmp_free_task(gtid, taskdata, thread); |
| |
| taskdata = parent_taskdata; |
| |
| if (team_serial) |
| return; |
| // Stop checking ancestors at implicit task instead of walking up ancestor |
| // tree to avoid premature deallocation of ancestors. |
| if (taskdata->td_flags.tasktype == TASK_IMPLICIT) { |
| if (taskdata->td_dephash) { // do we need to cleanup dephash? |
| int children = KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks); |
| kmp_tasking_flags_t flags_old = taskdata->td_flags; |
| if (children == 0 && flags_old.complete == 1) { |
| kmp_tasking_flags_t flags_new = flags_old; |
| flags_new.complete = 0; |
| if (KMP_COMPARE_AND_STORE_ACQ32( |
| RCAST(kmp_int32 *, &taskdata->td_flags), |
| *RCAST(kmp_int32 *, &flags_old), |
| *RCAST(kmp_int32 *, &flags_new))) { |
| KA_TRACE(100, ("__kmp_free_task_and_ancestors: T#%d cleans " |
| "dephash of implicit task %p\n", |
| gtid, taskdata)); |
| // cleanup dephash of finished implicit task |
| __kmp_dephash_free_entries(thread, taskdata->td_dephash); |
| } |
| } |
| } |
| return; |
| } |
| // Predecrement simulated by "- 1" calculation |
| children = KMP_ATOMIC_DEC(&taskdata->td_allocated_child_tasks) - 1; |
| KMP_DEBUG_ASSERT(children >= 0); |
| } |
| |
| KA_TRACE( |
| 20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " |
| "not freeing it yet\n", |
| gtid, taskdata, children)); |
| } |
| |
| // __kmp_task_finish: bookkeeping to do when a task finishes execution |
| // |
| // gtid: global thread ID for calling thread |
| // task: task to be finished |
| // resumed_task: task to be resumed. (may be NULL if task is serialized) |
| template <bool ompt> |
| static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, |
| kmp_taskdata_t *resumed_task) { |
| kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); |
| kmp_info_t *thread = __kmp_threads[gtid]; |
| kmp_task_team_t *task_team = |
| thread->th.th_task_team; // might be NULL for serial teams... |
| kmp_int32 children = 0; |
| |
| KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming " |
| "task %p\n", |
| gtid, taskdata, resumed_task)); |
| |
| KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); |
| |
| // Pop task from stack if tied |
| #ifdef BUILD_TIED_TASK_STACK |
| if (taskdata->td_flags.tiedness == TASK_TIED) { |
| __kmp_pop_task_stack(gtid, thread, taskdata); |
| } |
| #endif /* BUILD_TIED_TASK_STACK */ |
| |
| if (taskdata->td_flags.tiedness == TASK_UNTIED) { |
| // untied task needs to check the counter so that the task structure is not |
| // freed prematurely |
| kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1; |
| KA_TRACE( |
| 20, |
| ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n", |
| gtid, counter, taskdata)); |
| if (counter > 0) { |
| // untied task is not done, to be continued possibly by other thread, do |
| // not free it now |
| if (resumed_task == NULL) { |
| KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial); |
| resumed_task = taskdata->td_parent; // In a serialized task, the resumed |
| // task is the parent |
| } |
| thread->th.th_current_task = resumed_task; // restore current_task |
| resumed_task->td_flags.executing = 1; // resume previous task |
| KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, " |
| "resuming task %p\n", |
| gtid, taskdata, resumed_task)); |
| return; |
| } |
| } |
| #if OMPT_SUPPORT |
| if (ompt) |
| __ompt_task_finish(task, resumed_task); |
| #endif |
| |
| // Check mutexinoutset dependencies, release locks |
| kmp_depnode_t *node = taskdata->td_depnode; |
| if (node && (node->dn.mtx_num_locks < 0)) { |
| // negative num_locks means all locks were acquired |
| node->dn.mtx_num_locks = -node->dn.mtx_num_locks; |
| for (int i = node->dn.mtx_num_locks - 1; i >= 0; --i) { |
| KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL); |
| __kmp_release_lock(node->dn.mtx_locks[i], gtid); |
| } |
| } |
| |
| KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0); |
| bool detach = false; |
| if (taskdata->td_flags.detachable == TASK_DETACHABLE) { |
| if (taskdata->td_allow_completion_event.type == |
| KMP_EVENT_ALLOW_COMPLETION) { |
| // event hasn't been fulfilled yet. Try to detach task. |
| __kmp_acquire_tas_lock(&taskdata->td_allow_completion_event.lock, gtid); |
| if (taskdata->td_allow_completion_event.type == |
| KMP_EVENT_ALLOW_COMPLETION) { |
| taskdata->td_flags.proxy = TASK_PROXY; // proxify! |
| detach = true; |
| } |
| __kmp_release_tas_lock(&taskdata->td_allow_completion_event.lock, gtid); |
| } |
| } |
| KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1); |
| KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); |
| |
| if (!detach) { |
| taskdata->td_flags.complete = 1; // mark the task as completed |
| |
| // Only need to keep track of count if team parallel and tasking not |
| // serialized |
| if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) { |
| // Predecrement simulated by "- 1" calculation |
| children = |
| KMP_ATOMIC_DEC(&taskdata->td_parent->td_incomplete_child_tasks) - 1; |
| KMP_DEBUG_ASSERT(children >= 0); |
| if (taskdata->td_taskgroup) |
| KMP_ATOMIC_DEC(&taskdata->td_taskgroup->count); |
| __kmp_release_deps(gtid, taskdata); |
| } else if (task_team && task_team->tt.tt_found_proxy_tasks) { |
| // if we found proxy tasks there could exist a dependency chain |
| // with the proxy task as origin |
| __kmp_release_deps(gtid, taskdata); |
| } |
| } |
| |
| // td_flags.executing must be marked as 0 after __kmp_release_deps has been |
| // called. Othertwise, if a task is executed immediately from the release_deps |
| // code, the flag will be reset to 1 again by this same function |
| KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1); |
| taskdata->td_flags.executing = 0; // suspend the finishing task |
| |
| KA_TRACE( |
| 20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n", |
| gtid, taskdata, children)); |
| |
| /* If the tasks' destructor thunk flag has been set, we need to invoke the |
| destructor thunk that has been generated by the compiler. The code is |
| placed here, since at this point other tasks might have been released |
| hence overlapping the destructor invokations with some other work in the |
| released tasks. The OpenMP spec is not specific on when the destructors |
| are invoked, so we should be free to choose. */ |
| if (taskdata->td_flags.destructors_thunk) { |
| kmp_routine_entry_t destr_thunk = task->data1.destructors; |
| KMP_ASSERT(destr_thunk); |
| destr_thunk(gtid, task); |
| } |
| |
| // bookkeeping for resuming task: |
| // GEH - note tasking_ser => task_serial |
| KMP_DEBUG_ASSERT( |
| (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == |
| taskdata->td_flags.task_serial); |
| if (taskdata->td_flags.task_serial) { |
| if (resumed_task == NULL) { |
| resumed_task = taskdata->td_parent; // In a serialized task, the resumed |
| // task is the parent |
| } |
| } else { |
| KMP_DEBUG_ASSERT(resumed_task != |
| NULL); // verify that resumed task is passed as arguemnt |
| } |
| |
| // Free this task and then ancestor tasks if they have no children. |
| // Restore th_current_task first as suggested by John: |
| // johnmc: if an asynchronous inquiry peers into the runtime system |
| // it doesn't see the freed task as the current task. |
| thread->th.th_current_task = resumed_task; |
| if (!detach) |
| __kmp_free_task_and_ancestors(gtid, taskdata, thread); |
| |
| // TODO: GEH - make sure root team implicit task is initialized properly. |
| // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 ); |
| resumed_task->td_flags.executing = 1; // resume previous task |
| |
| KA_TRACE( |
| 10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n", |
| gtid, taskdata, resumed_task)); |
| |
| return; |
| } |
| |
| template <bool ompt> |
| static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref, |
| kmp_int32 gtid, |
| kmp_task_t *task) { |
| KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", |
| gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); |
| // this routine will provide task to resume |
| __kmp_task_finish<ompt>(gtid, task, NULL); |
| |
| KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n", |
| gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); |
| |
| #if OMPT_SUPPORT |
| if (ompt) { |
| ompt_frame_t *ompt_frame; |
| __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); |
| ompt_frame->enter_frame = ompt_data_none; |
| ompt_frame->enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer; |
| } |
| #endif |
| |
| return; |
| } |
| |
| #if OMPT_SUPPORT |
| OMPT_NOINLINE |
| void __kmpc_omp_task_complete_if0_ompt(ident_t *loc_ref, kmp_int32 gtid, |
| kmp_task_t *task) { |
| __kmpc_omp_task_complete_if0_template<true>(loc_ref, gtid, task); |
| } |
| #endif // OMPT_SUPPORT |
| |
| // __kmpc_omp_task_complete_if0: report that a task has completed execution |
| // |
| // loc_ref: source location information; points to end of task block. |
| // gtid: global thread number. |
| // task: task thunk for the completed task. |
| void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid, |
| kmp_task_t *task) { |
| #if OMPT_SUPPORT |
| if (UNLIKELY(ompt_enabled.enabled)) { |
| __kmpc_omp_task_complete_if0_ompt(loc_ref, gtid, task); |
| return; |
| } |
| #endif |
| __kmpc_omp_task_complete_if0_template<false>(loc_ref, gtid, task); |
| } |
| |
| #ifdef TASK_UNUSED |
| // __kmpc_omp_task_complete: report that a task has completed execution |
| // NEVER GENERATED BY COMPILER, DEPRECATED!!! |
| void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid, |
| kmp_task_t *task) { |
| KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid, |
| loc_ref, KMP_TASK_TO_TASKDATA(task))); |
| |
| __kmp_task_finish<false>(gtid, task, |
| NULL); // Not sure how to find task to resume |
| |
| KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid, |
| loc_ref, KMP_TASK_TO_TASKDATA(task))); |
| return; |
| } |
| #endif // TASK_UNUSED |
| |
| // __kmp_init_implicit_task: Initialize the appropriate fields in the implicit |
| // task for a given thread |
| // |
| // loc_ref: reference to source location of parallel region |
| // this_thr: thread data structure corresponding to implicit task |
| // team: team for this_thr |
| // tid: thread id of given thread within team |
| // set_curr_task: TRUE if need to push current task to thread |
| // NOTE: Routine does not set up the implicit task ICVS. This is assumed to |
| // have already been done elsewhere. |
| // TODO: Get better loc_ref. Value passed in may be NULL |
| void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, |
| kmp_team_t *team, int tid, int set_curr_task) { |
| kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid]; |
| |
| KF_TRACE( |
| 10, |
| ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n", |
| tid, team, task, set_curr_task ? "TRUE" : "FALSE")); |
| |
| task->td_task_id = KMP_GEN_TASK_ID(); |
| task->td_team = team; |
| // task->td_parent = NULL; // fix for CQ230101 (broken parent task info |
| // in debugger) |
| task->td_ident = loc_ref; |
| task->td_taskwait_ident = NULL; |
| task->td_taskwait_counter = 0; |
| task->td_taskwait_thread = 0; |
| |
| task->td_flags.tiedness = TASK_TIED; |
| task->td_flags.tasktype = TASK_IMPLICIT; |
| task->td_flags.proxy = TASK_FULL; |
| |
| // All implicit tasks are executed immediately, not deferred |
| task->td_flags.task_serial = 1; |
| task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec); |
| task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0; |
| |
| task->td_flags.started = 1; |
| task->td_flags.executing = 1; |
| task->td_flags.complete = 0; |
| task->td_flags.freed = 0; |
| |
| task->td_depnode = NULL; |
| task->td_last_tied = task; |
| task->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED; |
| |
| if (set_curr_task) { // only do this init first time thread is created |
| KMP_ATOMIC_ST_REL(&task->td_incomplete_child_tasks, 0); |
| // Not used: don't need to deallocate implicit task |
| KMP_ATOMIC_ST_REL(&task->td_allocated_child_tasks, 0); |
| task->td_taskgroup = NULL; // An implicit task does not have taskgroup |
| task->td_dephash = NULL; |
| __kmp_push_current_task_to_thread(this_thr, team, tid); |
| } else { |
| KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0); |
| KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0); |
| } |
| |
| #if OMPT_SUPPORT |
| if (UNLIKELY(ompt_enabled.enabled)) |
| __ompt_task_init(task, tid); |
| #endif |
| |
| KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid, |
| team, task)); |
| } |
| |
| // __kmp_finish_implicit_task: Release resources associated to implicit tasks |
| // at the end of parallel regions. Some resources are kept for reuse in the next |
| // parallel region. |
| // |
| // thread: thread data structure corresponding to implicit task |
| void __kmp_finish_implicit_task(kmp_info_t *thread) { |
| kmp_taskdata_t *task = thread->th.th_current_task; |
| if (task->td_dephash) { |
| int children; |
| task->td_flags.complete = 1; |
| children = KMP_ATOMIC_LD_ACQ(&task->td_incomplete_child_tasks); |
| kmp_tasking_flags_t flags_old = task->td_flags; |
| if (children == 0 && flags_old.complete == 1) { |
| kmp_tasking_flags_t flags_new = flags_old; |
| flags_new.complete = 0; |
| if (KMP_COMPARE_AND_STORE_ACQ32(RCAST(kmp_int32 *, &task->td_flags), |
| *RCAST(kmp_int32 *, &flags_old), |
| *RCAST(kmp_int32 *, &flags_new))) { |
| KA_TRACE(100, ("__kmp_finish_implicit_task: T#%d cleans " |
| "dephash of implicit task %p\n", |
| thread->th.th_info.ds.ds_gtid, task)); |
| __kmp_dephash_free_entries(thread, task->td_dephash); |
| } |
| } |
| } |
| } |
| |
| // __kmp_free_implicit_task: Release resources associated to implicit tasks |
| // when these are destroyed regions |
| // |
| // thread: thread data structure corresponding to implicit task |
| void __kmp_free_implicit_task(kmp_info_t *thread) { |
| kmp_taskdata_t *task = thread->th.th_current_task; |
| if (task && task->td_dephash) { |
| __kmp_dephash_free(thread, task->td_dephash); |
| task->td_dephash = NULL; |
| } |
| } |
| |
| // Round up a size to a power of two specified by val: Used to insert padding |
| // between structures co-allocated using a single malloc() call |
| static size_t __kmp_round_up_to_val(size_t size, size_t val) { |
| if (size & (val - 1)) { |
| size &= ~(val - 1); |
| if (size <= KMP_SIZE_T_MAX - val) { |
| size += val; // Round up if there is no overflow. |
| } |
| } |
| return size; |
| } // __kmp_round_up_to_va |
| |
| // __kmp_task_alloc: Allocate the taskdata and task data structures for a task |
| // |
| // loc_ref: source location information |
| // gtid: global thread number. |
| // flags: include tiedness & task type (explicit vs. implicit) of the ''new'' |
| // task encountered. Converted from kmp_int32 to kmp_tasking_flags_t in routine. |
| // sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including |
| // private vars accessed in task. |
| // sizeof_shareds: Size in bytes of array of pointers to shared vars accessed |
| // in task. |
| // task_entry: Pointer to task code entry point generated by compiler. |
| // returns: a pointer to the allocated kmp_task_t structure (task). |
| kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, |
| kmp_tasking_flags_t *flags, |
| size_t sizeof_kmp_task_t, size_t sizeof_shareds, |
| kmp_routine_entry_t task_entry) { |
| kmp_task_t *task; |
| kmp_taskdata_t *taskdata; |
| kmp_info_t *thread = __kmp_threads[gtid]; |
| kmp_team_t *team = thread->th.th_team; |
| kmp_taskdata_t *parent_task = thread->th.th_current_task; |
| size_t shareds_offset; |
| |
| if (!TCR_4(__kmp_init_middle)) |
| __kmp_middle_initialize(); |
| |
| KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " |
| "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", |
| gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t, |
| sizeof_shareds, task_entry)); |
| |
| if (parent_task->td_flags.final) { |
| if (flags->merged_if0) { |
| } |
| flags->final = 1; |
| } |
| if (flags->tiedness == TASK_UNTIED && !team->t.t_serialized) { |
| // Untied task encountered causes the TSC algorithm to check entire deque of |
| // the victim thread. If no untied task encountered, then checking the head |
| // of the deque should be enough. |
| KMP_CHECK_UPDATE(thread->th.th_task_team->tt.tt_untied_task_encountered, 1); |
| } |
| |
| // Detachable tasks are not proxy tasks yet but could be in the future. Doing |
| // the tasking setup |
| // when that happens is too late. |
| if (flags->proxy == TASK_PROXY || flags->detachable == TASK_DETACHABLE) { |
| if (flags->proxy == TASK_PROXY) { |
| flags->tiedness = TASK_UNTIED; |
| flags->merged_if0 = 1; |
| } |
| /* are we running in a sequential parallel or tskm_immediate_exec... we need |
| tasking support enabled */ |
| if ((thread->th.th_task_team) == NULL) { |
| /* This should only happen if the team is serialized |
| setup a task team and propagate it to the thread */ |
| KMP_DEBUG_ASSERT(team->t.t_serialized); |
| KA_TRACE(30, |
| ("T#%d creating task team in __kmp_task_alloc for proxy task\n", |
| gtid)); |
| __kmp_task_team_setup( |
| thread, team, |
| 1); // 1 indicates setup the current team regardless of nthreads |
| thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state]; |
| } |
| kmp_task_team_t *task_team = thread->th.th_task_team; |
| |
| /* tasking must be enabled now as the task might not be pushed */ |
| if (!KMP_TASKING_ENABLED(task_team)) { |
| KA_TRACE( |
| 30, |
| ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid)); |
| __kmp_enable_tasking(task_team, thread); |
| kmp_int32 tid = thread->th.th_info.ds.ds_tid; |
| kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid]; |
| // No lock needed since only owner can allocate |
| if (thread_data->td.td_deque == NULL) { |
| __kmp_alloc_task_deque(thread, thread_data); |
| } |
| } |
| |
| if (task_team->tt.tt_found_proxy_tasks == FALSE) |
| TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE); |
| } |
| |
| // Calculate shared structure offset including padding after kmp_task_t struct |
| // to align pointers in shared struct |
| shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t; |
| shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(void *)); |
| |
| // Allocate a kmp_taskdata_t block and a kmp_task_t block. |
| KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid, |
| shareds_offset)); |
| KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid, |
| sizeof_shareds)); |
| |
| // Avoid double allocation here by combining shareds with taskdata |
| #if USE_FAST_MEMORY |
| taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset + |
| sizeof_shareds); |
| #else /* ! USE_FAST_MEMORY */ |
| taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset + |
| sizeof_shareds); |
| #endif /* USE_FAST_MEMORY */ |
| ANNOTATE_HAPPENS_AFTER(taskdata); |
| |
| task = KMP_TASKDATA_TO_TASK(taskdata); |
| |
| // Make sure task & taskdata are aligned appropriately |
| #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD |
| KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0); |
| KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0); |
| #else |
| KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0); |
| KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0); |
| #endif |
| if (sizeof_shareds > 0) { |
| // Avoid double allocation here by combining shareds with taskdata |
| task->shareds = &((char *)taskdata)[shareds_offset]; |
| // Make sure shareds struct is aligned to pointer size |
| KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == |
| 0); |
| } else { |
| task->shareds = NULL; |
| } |
| task->routine = task_entry; |
| task->part_id = 0; // AC: Always start with 0 part id |
| |
| taskdata->td_task_id = KMP_GEN_TASK_ID(); |
| taskdata->td_team = team; |
| taskdata->td_alloc_thread = thread; |
| taskdata->td_parent = parent_task; |
| taskdata->td_level = parent_task->td_level + 1; // increment nesting level |
| KMP_ATOMIC_ST_RLX(&taskdata->td_untied_count, 0); |
| taskdata->td_ident = loc_ref; |
| taskdata->td_taskwait_ident = NULL; |
| taskdata->td_taskwait_counter = 0; |
| taskdata->td_taskwait_thread = 0; |
| KMP_DEBUG_ASSERT(taskdata->td_parent != NULL); |
| // avoid copying icvs for proxy tasks |
| if (flags->proxy == TASK_FULL) |
| copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs); |
| |
| taskdata->td_flags.tiedness = flags->tiedness; |
| taskdata->td_flags.final = flags->final; |
| taskdata->td_flags.merged_if0 = flags->merged_if0; |
| taskdata->td_flags.destructors_thunk = flags->destructors_thunk; |
| taskdata->td_flags.proxy = flags->proxy; |
| taskdata->td_flags.detachable = flags->detachable; |
| taskdata->td_task_team = thread->th.th_task_team; |
| taskdata->td_size_alloc = shareds_offset + sizeof_shareds; |
| taskdata->td_flags.tasktype = TASK_EXPLICIT; |
| |
| // GEH - TODO: fix this to copy parent task's value of tasking_ser flag |
| taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec); |
| |
| // GEH - TODO: fix this to copy parent task's value of team_serial flag |
| taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0; |
| |
| // GEH - Note we serialize the task if the team is serialized to make sure |
| // implicit parallel region tasks are not left until program termination to |
| // execute. Also, it helps locality to execute immediately. |
| |
| taskdata->td_flags.task_serial = |
| (parent_task->td_flags.final || taskdata->td_flags.team_serial || |
| taskdata->td_flags.tasking_ser); |
| |
| taskdata->td_flags.started = 0; |
| taskdata->td_flags.executing = 0; |
| taskdata->td_flags.complete = 0; |
| taskdata->td_flags.freed = 0; |
| |
| taskdata->td_flags.native = flags->native; |
| |
| KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0); |
| // start at one because counts current task and children |
| KMP_ATOMIC_ST_RLX(&taskdata->td_allocated_child_tasks, 1); |
| taskdata->td_taskgroup = |
| parent_task->td_taskgroup; // task inherits taskgroup from the parent task |
| taskdata->td_dephash = NULL; |
| taskdata->td_depnode = NULL; |
| if (flags->tiedness == TASK_UNTIED) |
| taskdata->td_last_tied = NULL; // will be set when the task is scheduled |
| else |
| taskdata->td_last_tied = taskdata; |
| taskdata->td_allow_completion_event.type = KMP_EVENT_UNINITIALIZED; |
| #if OMPT_SUPPORT |
| if (UNLIKELY(ompt_enabled.enabled)) |
| __ompt_task_init(taskdata, gtid); |
| #endif |
| // Only need to keep track of child task counts if team parallel and tasking not |
| // serialized or if it is a proxy or detachable task |
| if (flags->proxy == TASK_PROXY || |
| flags->detachable == TASK_DETACHABLE || |
| !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) |
| { |
| KMP_ATOMIC_INC(&parent_task->td_incomplete_child_tasks); |
| if (parent_task->td_taskgroup) |
| KMP_ATOMIC_INC(&parent_task->td_taskgroup->count); |
| // Only need to keep track of allocated child tasks for explicit tasks since |
| // implicit not deallocated |
| if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) { |
| KMP_ATOMIC_INC(&taskdata->td_parent->td_allocated_child_tasks); |
| } |
| } |
| |
| KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n", |
| gtid, taskdata, taskdata->td_parent)); |
| ANNOTATE_HAPPENS_BEFORE(task); |
| |
| return task; |
| } |
| |
| kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, |
| kmp_int32 flags, size_t sizeof_kmp_task_t, |
| size_t sizeof_shareds, |
| kmp_routine_entry_t task_entry) { |
| kmp_task_t *retval; |
| kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags; |
| |
| input_flags->native = FALSE; |
| // __kmp_task_alloc() sets up all other runtime flags |
| |
| KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s %s) " |
| "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", |
| gtid, loc_ref, input_flags->tiedness ? "tied " : "untied", |
| input_flags->proxy ? "proxy" : "", |
| input_flags->detachable ? "detachable" : "", sizeof_kmp_task_t, |
| sizeof_shareds, task_entry)); |
| |
| retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t, |
| sizeof_shareds, task_entry); |
| |
| KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval)); |
| |
| return retval; |
| } |
| |
| kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid, |
| kmp_int32 flags, |
| size_t sizeof_kmp_task_t, |
| size_t sizeof_shareds, |
| kmp_routine_entry_t task_entry, |
| kmp_int64 device_id) { |
| return __kmpc_omp_task_alloc(loc_ref, gtid, flags, sizeof_kmp_task_t, |
| sizeof_shareds, task_entry); |
| } |
| |
| /*! |
| @ingroup TASKING |
| @param loc_ref location of the original task directive |
| @param gtid Global Thread ID of encountering thread |
| @param new_task task thunk allocated by __kmpc_omp_task_alloc() for the ''new |
| task'' |
| @param naffins Number of affinity items |
| @param affin_list List of affinity items |
| @return Returns non-zero if registering affinity information was not successful. |
| Returns 0 if registration was successful |
| This entry registers the affinity information attached to a task with the task |
| thunk structure kmp_taskdata_t. |
| */ |
| kmp_int32 |
| __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, |
| kmp_task_t *new_task, kmp_int32 naffins, |
| kmp_task_affinity_info_t *affin_list) { |
| return 0; |
| } |
| |
| // __kmp_invoke_task: invoke the specified task |
| // |
| // gtid: global thread ID of caller |
| // task: the task to invoke |
| // current_task: the task to resume after task invokation |
| static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task, |
| kmp_taskdata_t *current_task) { |
| kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); |
| kmp_info_t *thread; |
| int discard = 0 /* false */; |
| KA_TRACE( |
| 30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n", |
| gtid, taskdata, current_task)); |
| KMP_DEBUG_ASSERT(task); |
| if (taskdata->td_flags.proxy == TASK_PROXY && |
| taskdata->td_flags.complete == 1) { |
| // This is a proxy task that was already completed but it needs to run |
| // its bottom-half finish |
| KA_TRACE( |
| 30, |
| ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n", |
| gtid, taskdata)); |
| |
| __kmp_bottom_half_finish_proxy(gtid, task); |
| |
| KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for " |
| "proxy task %p, resuming task %p\n", |
| gtid, taskdata, current_task)); |
| |
| return; |
| } |
| |
| #if OMPT_SUPPORT |
| // For untied tasks, the first task executed only calls __kmpc_omp_task and |
| // does not execute code. |
| ompt_thread_info_t oldInfo; |
| if (UNLIKELY(ompt_enabled.enabled)) { |
| // Store the threads states and restore them after the task |
| thread = __kmp_threads[gtid]; |
| oldInfo = thread->th.ompt_thread_info; |
| thread->th.ompt_thread_info.wait_id = 0; |
| thread->th.ompt_thread_info.state = (thread->th.th_team_serialized) |
| ? ompt_state_work_serial |
| : ompt_state_work_parallel; |
| taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
| } |
| #endif |
| |
| // Proxy tasks are not handled by the runtime |
| if (taskdata->td_flags.proxy != TASK_PROXY) { |
| ANNOTATE_HAPPENS_AFTER(task); |
| __kmp_task_start(gtid, task, current_task); // OMPT only if not discarded |
| } |
| |
| // TODO: cancel tasks if the parallel region has also been cancelled |
| // TODO: check if this sequence can be hoisted above __kmp_task_start |
| // if cancellation has been enabled for this run ... |
| if (__kmp_omp_cancellation) { |
| thread = __kmp_threads[gtid]; |
| kmp_team_t *this_team = thread->th.th_team; |
| kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup; |
| if ((taskgroup && taskgroup->cancel_request) || |
| (this_team->t.t_cancel_request == cancel_parallel)) { |
| #if OMPT_SUPPORT && OMPT_OPTIONAL |
| ompt_data_t *task_data; |
| if (UNLIKELY(ompt_enabled.ompt_callback_cancel)) { |
| __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); |
| ompt_callbacks.ompt_callback(ompt_callback_cancel)( |
| task_data, |
| ((taskgroup && taskgroup->cancel_request) ? ompt_cancel_taskgroup |
| : ompt_cancel_parallel) | |
| ompt_cancel_discarded_task, |
| NULL); |
| } |
| #endif |
| KMP_COUNT_BLOCK(TASK_cancelled); |
| // this task belongs to a task group and we need to cancel it |
| discard = 1 /* true */; |
| } |
| } |
| |
| // Invoke the task routine and pass in relevant data. |
| // Thunks generated by gcc take a different argument list. |
| if (!discard) { |
| if (taskdata->td_flags.tiedness == TASK_UNTIED) { |
| taskdata->td_last_tied = current_task->td_last_tied; |
| KMP_DEBUG_ASSERT(taskdata->td_last_tied); |
| } |
| #if KMP_STATS_ENABLED |
| KMP_COUNT_BLOCK(TASK_executed); |
| switch (KMP_GET_THREAD_STATE()) { |
| case FORK_JOIN_BARRIER: |
| KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); |
| break; |
| case PLAIN_BARRIER: |
| KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); |
| break; |
| case TASKYIELD: |
| KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); |
| break; |
| case TASKWAIT: |
| KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); |
| break; |
| case TASKGROUP: |
| KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); |
| break; |
| default: |
| KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); |
| break; |
| } |
| #endif // KMP_STATS_ENABLED |
| |
| // OMPT task begin |
| #if OMPT_SUPPORT |
| if (UNLIKELY(ompt_enabled.enabled)) |
| __ompt_task_start(task, current_task, gtid); |
| #endif |
| |
| #if USE_ITT_BUILD && USE_ITT_NOTIFY |
| kmp_uint64 cur_time; |
| kmp_int32 kmp_itt_count_task = |
| __kmp_forkjoin_frames_mode == 3 && !taskdata->td_flags.task_serial && |
| current_task->td_flags.tasktype == TASK_IMPLICIT; |
| if (kmp_itt_count_task) { |
| thread = __kmp_threads[gtid]; |
| // Time outer level explicit task on barrier for adjusting imbalance time |
| if (thread->th.th_bar_arrive_time) |
| cur_time = __itt_get_timestamp(); |
| else |
| kmp_itt_count_task = 0; // thread is not on a barrier - skip timing |
| } |
| #endif |
| |
| #ifdef KMP_GOMP_COMPAT |
| if (taskdata->td_flags.native) { |
| ((void (*)(void *))(*(task->routine)))(task->shareds); |
| } else |
| #endif /* KMP_GOMP_COMPAT */ |
| { |
| (*(task->routine))(gtid, task); |
| } |
| KMP_POP_PARTITIONED_TIMER(); |
| |
| #if USE_ITT_BUILD && USE_ITT_NOTIFY |
| if (kmp_itt_count_task) { |
| // Barrier imbalance - adjust arrive time with the task duration |
| thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time); |
| } |
| #endif |
| |
| } |
| |
| |
| // Proxy tasks are not handled by the runtime |
| if (taskdata->td_flags.proxy != TASK_PROXY) { |
| ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent); |
| #if OMPT_SUPPORT |
| if (UNLIKELY(ompt_enabled.enabled)) { |
| thread->th.ompt_thread_info = oldInfo; |
| if (taskdata->td_flags.tiedness == TASK_TIED) { |
| taskdata->ompt_task_info.frame.exit_frame = ompt_data_none; |
| } |
| __kmp_task_finish<true>(gtid, task, current_task); |
| } else |
| #endif |
| __kmp_task_finish<false>(gtid, task, current_task); |
| } |
| |
| KA_TRACE( |
| 30, |
| ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n", |
| gtid, taskdata, current_task)); |
| return; |
| } |
| |
| // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution |
| // |
| // loc_ref: location of original task pragma (ignored) |
| // gtid: Global Thread ID of encountering thread |
| // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task'' |
| // Returns: |
| // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to |
| // be resumed later. |
| // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be |
| // resumed later. |
| kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, |
| kmp_task_t *new_task) { |
| kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); |
| |
| KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid, |
| loc_ref, new_taskdata)); |
| |
| #if OMPT_SUPPORT |
| kmp_taskdata_t *parent; |
| if (UNLIKELY(ompt_enabled.enabled)) { |
| parent = new_taskdata->td_parent; |
| if (ompt_enabled.ompt_callback_task_create) { |
| ompt_data_t task_data = ompt_data_none; |
| ompt_callbacks.ompt_callback(ompt_callback_task_create)( |
| parent ? &(parent->ompt_task_info.task_data) : &task_data, |
| parent ? &(parent->ompt_task_info.frame) : NULL, |
| &(new_taskdata->ompt_task_info.task_data), ompt_task_explicit, 0, |
| OMPT_GET_RETURN_ADDRESS(0)); |
| } |
| } |
| #endif |
| |
| /* Should we execute the new task or queue it? For now, let's just always try |
| to queue it. If the queue fills up, then we'll execute it. */ |
| |
| if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer |
| { // Execute this task immediately |
| kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; |
| new_taskdata->td_flags.task_serial = 1; |
| __kmp_invoke_task(gtid, new_task, current_task); |
| } |
| |
| KA_TRACE( |
| 10, |
| ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " |
| "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", |
| gtid, loc_ref, new_taskdata)); |
| |
| ANNOTATE_HAPPENS_BEFORE(new_task); |
| #if OMPT_SUPPORT |
| if (UNLIKELY(ompt_enabled.enabled)) { |
| parent->ompt_task_info.frame.enter_frame = ompt_data_none; |
| } |
| #endif |
| return TASK_CURRENT_NOT_QUEUED; |
| } |
| |
| // __kmp_omp_task: Schedule a non-thread-switchable task for execution |
| // |
| // gtid: Global Thread ID of encountering thread |
| // new_task:non-thread-switchable task thunk allocated by __kmp_omp_task_alloc() |
| // serialize_immediate: if TRUE then if the task is executed immediately its |
| // execution will be serialized |
| // Returns: |
| // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to |
| // be resumed later. |
| // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be |
| // resumed later. |
| kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, |
| bool serialize_immediate) { |
| kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); |
| |
| /* Should we execute the new task or queue it? For now, let's just always try |
| to queue it. If the queue fills up, then we'll execute it. */ |
| if (new_taskdata->td_flags.proxy == TASK_PROXY || |
| __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer |
| { // Execute this task immediately |
| kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; |
| if (serialize_immediate) |
| new_taskdata->td_flags.task_serial = 1; |
| __kmp_invoke_task(gtid, new_task, current_task); |
| } |
| |
| ANNOTATE_HAPPENS_BEFORE(new_task); |
| return TASK_CURRENT_NOT_QUEUED; |
| } |
| |
| // __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a |
| // non-thread-switchable task from the parent thread only! |
| // |
| // loc_ref: location of original task pragma (ignored) |
| // gtid: Global Thread ID of encountering thread |
| // new_task: non-thread-switchable task thunk allocated by |
| // __kmp_omp_task_alloc() |
| // Returns: |
| // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to |
| // be resumed later. |
| // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be |
| // resumed later. |
| kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, |
| kmp_task_t *new_task) { |
| kmp_int32 res; |
| KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK); |
| |
| #if KMP_DEBUG || OMPT_SUPPORT |
| kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); |
| #endif |
| KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, |
| new_taskdata)); |
| |
| #if OMPT_SUPPORT |
| kmp_taskdata_t *parent = NULL; |
| if (UNLIKELY(ompt_enabled.enabled)) { |
| if (!new_taskdata->td_flags.started) { |
| OMPT_STORE_RETURN_ADDRESS(gtid); |
| parent = new_taskdata->td_parent; |
| if (!parent->ompt_task_info.frame.enter_frame.ptr) { |
| parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
| } |
| if (ompt_enabled.ompt_callback_task_create) { |
| ompt_data_t task_data = ompt_data_none; |
| ompt_callbacks.ompt_callback(ompt_callback_task_create)( |
| parent ? &(parent->ompt_task_info.task_data) : &task_data, |
| parent ? &(parent->ompt_task_info.frame) : NULL, |
| &(new_taskdata->ompt_task_info.task_data), |
| ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, |
| OMPT_LOAD_RETURN_ADDRESS(gtid)); |
| } |
| } else { |
| // We are scheduling the continuation of an UNTIED task. |
| // Scheduling back to the parent task. |
| __ompt_task_finish(new_task, |
| new_taskdata->ompt_task_info.scheduling_parent, |
| ompt_task_switch); |
| new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none; |
| } |
| } |
| #endif |
| |
| res = __kmp_omp_task(gtid, new_task, true); |
| |
| KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning " |
| "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", |
| gtid, loc_ref, new_taskdata)); |
| #if OMPT_SUPPORT |
| if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) { |
| parent->ompt_task_info.frame.enter_frame = ompt_data_none; |
| } |
| #endif |
| return res; |
| } |
| |
| // __kmp_omp_taskloop_task: Wrapper around __kmp_omp_task to schedule |
| // a taskloop task with the correct OMPT return address |
| // |
| // loc_ref: location of original task pragma (ignored) |
| // gtid: Global Thread ID of encountering thread |
| // new_task: non-thread-switchable task thunk allocated by |
| // __kmp_omp_task_alloc() |
| // codeptr_ra: return address for OMPT callback |
| // Returns: |
| // TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to |
| // be resumed later. |
| // TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be |
| // resumed later. |
| kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid, |
| kmp_task_t *new_task, void *codeptr_ra) { |
| kmp_int32 res; |
| KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK); |
| |
| #if KMP_DEBUG || OMPT_SUPPORT |
| kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); |
| #endif |
| KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, |
| new_taskdata)); |
| |
| #if OMPT_SUPPORT |
| kmp_taskdata_t *parent = NULL; |
| if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) { |
| parent = new_taskdata->td_parent; |
| if (!parent->ompt_task_info.frame.enter_frame.ptr) |
| parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); |
| if (ompt_enabled.ompt_callback_task_create) { |
| ompt_data_t task_data = ompt_data_none; |
| ompt_callbacks.ompt_callback(ompt_callback_task_create)( |
| parent ? &(parent->ompt_task_info.task_data) : &task_data, |
| parent ? &(parent->ompt_task_info.frame) : NULL, |
| &(new_taskdata->ompt_task_info.task_data), |
| ompt_task_explicit | TASK_TYPE_DETAILS_FORMAT(new_taskdata), 0, |
| codeptr_ra); |
| } |
| } |
| #endif |
| |
| res = __kmp_omp_task(gtid, new_task, true); |
| |
| KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning " |
| "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", |
| gtid, loc_ref, new_taskdata)); |
| #if OMPT_SUPPORT |
| if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) { |
| parent->ompt_task_info.frame.enter_frame = ompt_data_none; |
| } |
| #endif |
| return res; |
| } |
| |
| template <bool ompt> |
| static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, |
| void *frame_address, |
| void *return_address) { |
| kmp_taskdata_t *taskdata; |
| kmp_info_t *thread; |
| int thread_finished = FALSE; |
| KMP_SET_THREAD_STATE_BLOCK(TASKWAIT); |
| |
| KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref)); |
| |
| if (__kmp_tasking_mode != tskm_immediate_exec) { |
| thread = __kmp_threads[gtid]; |
| taskdata = thread->th.th_current_task; |
| |
| #if OMPT_SUPPORT && OMPT_OPTIONAL |
| ompt_data_t *my_task_data; |
| ompt_data_t *my_parallel_data; |
| |
| if (ompt) { |
| my_task_data = &(taskdata->ompt_task_info.task_data); |
| my_parallel_data = OMPT_CUR_TEAM_DATA(thread); |
| |
| taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address; |
| |
| if (ompt_enabled.ompt_callback_sync_region) { |
| ompt_callbacks.ompt_callback(ompt_callback_sync_region)( |
| ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data, |
| my_task_data, return_address); |
| } |
| |
| if (ompt_enabled.ompt_callback_sync_region_wait) { |
| ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( |
| ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data, |
| my_task_data, return_address); |
| } |
| } |
| #endif // OMPT_SUPPORT && OMPT_OPTIONAL |
| |
| // Debugger: The taskwait is active. Store location and thread encountered the |
| // taskwait. |
| #if USE_ITT_BUILD |
| // Note: These values are used by ITT events as well. |
| #endif /* USE_ITT_BUILD */ |
| taskdata->td_taskwait_counter += 1; |
| taskdata->td_taskwait_ident = loc_ref; |
| taskdata->td_taskwait_thread = gtid + 1; |
| |
| #if USE_ITT_BUILD |
| void *itt_sync_obj = __kmp_itt_taskwait_object(gtid); |
| if (itt_sync_obj != NULL) |
| __kmp_itt_taskwait_starting(gtid, itt_sync_obj); |
| #endif /* USE_ITT_BUILD */ |
| |
| bool must_wait = |
| !taskdata->td_flags.team_serial && !taskdata->td_flags.final; |
| |
| must_wait = must_wait || (thread->th.th_task_team != NULL && |
| thread->th.th_task_team->tt.tt_found_proxy_tasks); |
| if (must_wait) { |
| kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *, |
| &(taskdata->td_incomplete_child_tasks)), |
| 0U); |
| while (KMP_ATOMIC_LD_ACQ(&taskdata->td_incomplete_child_tasks) != 0) { |
| flag.execute_tasks(thread, gtid, FALSE, |
| &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), |
| __kmp_task_stealing_constraint); |
| } |
| } |
| #if USE_ITT_BUILD |
| if (itt_sync_obj != NULL) |
| __kmp_itt_taskwait_finished(gtid, itt_sync_obj); |
| #endif /* USE_ITT_BUILD */ |
| |
| // Debugger: The taskwait is completed. Location remains, but thread is |
| // negated. |
| taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; |
| |
| #if OMPT_SUPPORT && OMPT_OPTIONAL |
| if (ompt) { |
| if (ompt_enabled.ompt_callback_sync_region_wait) { |
| ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( |
| ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data, |
| my_task_data, return_address); |
| } |
| if (ompt_enabled.ompt_callback_sync_region) { |
| ompt_callbacks.ompt_callback(ompt_callback_sync_region)( |
| ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data, |
| my_task_data, return_address); |
| } |
| taskdata->ompt_task_info.frame.enter_frame = ompt_data_none; |
| } |
| #endif // OMPT_SUPPORT && OMPT_OPTIONAL |
| |
| ANNOTATE_HAPPENS_AFTER(taskdata); |
| } |
| |
| KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " |
| "returning TASK_CURRENT_NOT_QUEUED\n", |
| gtid, taskdata)); |
| |
| return TASK_CURRENT_NOT_QUEUED; |
| } |
| |
| #if OMPT_SUPPORT && OMPT_OPTIONAL |
| OMPT_NOINLINE |
| static kmp_int32 __kmpc_omp_taskwait_ompt(ident_t *loc_ref, kmp_int32 gtid, |
| void *frame_address, |
| void *return_address) { |
| return __kmpc_omp_taskwait_template<true>(loc_ref, gtid, frame_address, |
| return_address); |
| } |
| #endif // OMPT_SUPPORT && OMPT_OPTIONAL |
| |
| // __kmpc_omp_taskwait: Wait until all tasks generated by the current task are |
| // complete |
| kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { |
| #if OMPT_SUPPORT && OMPT_OPTIONAL |
| if (UNLIKELY(ompt_enabled.enabled)) { |
| OMPT_STORE_RETURN_ADDRESS(gtid); |
| return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0), |
| OMPT_LOAD_RETURN_ADDRESS(gtid)); |
| } |
| #endif |
| return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL); |
| } |
| |
| // __kmpc_omp_taskyield: switch to a different task |
| kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) { |
| kmp_taskdata_t *taskdata; |
| kmp_info_t *thread; |
| int thread_finished = FALSE; |
| |
| KMP_COUNT_BLOCK(OMP_TASKYIELD); |
| KMP_SET_THREAD_STATE_BLOCK(TASKYIELD); |
| |
| KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n", |
| gtid, loc_ref, end_part)); |
| |
| if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) { |
| thread = __kmp_threads[gtid]; |
| taskdata = thread->th.th_current_task; |
| // Should we model this as a task wait or not? |
| // Debugger: The taskwait is active. Store location and thread encountered the |
| // taskwait. |
| #if USE_ITT_BUILD |
| // Note: These values are used by ITT events as well. |
| #endif /* USE_ITT_BUILD */ |
| taskdata->td_taskwait_counter += 1; |
| taskdata->td_taskwait_ident = loc_ref; |
| taskdata->td_taskwait_thread = gtid + 1; |
| |
| #if USE_ITT_BUILD |
| void *itt_sync_obj = __kmp_itt_taskwait_object(gtid); |
| if (itt_sync_obj != NULL) |
| __kmp_itt_taskwait_starting(gtid, itt_sync_obj); |
| #endif /* USE_ITT_BUILD */ |
| if (!taskdata->td_flags.team_serial) { |
| kmp_task_team_t *task_team = thread->th.th_task_team; |
| if (task_team != NULL) { |
| if (KMP_TASKING_ENABLED(task_team)) { |
| #if OMPT_SUPPORT |
| if (UNLIKELY(ompt_enabled.enabled)) |
| thread->th.ompt_thread_info.ompt_task_yielded = 1; |
| #endif |
| __kmp_execute_tasks_32( |
| thread, gtid, NULL, FALSE, |
| &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), |
| __kmp_task_stealing_constraint); |
| #if OMPT_SUPPORT |
| if (UNLIKELY(ompt_enabled.enabled)) |
| thread->th.ompt_thread_info.ompt_task_yielded = 0; |
| #endif |
| } |
| } |
| } |
| #if USE_ITT_BUILD |
| if (itt_sync_obj != NULL) |
| __kmp_itt_taskwait_finished(gtid, itt_sync_obj); |
| #endif /* USE_ITT_BUILD */ |
| |
| // Debugger: The taskwait is completed. Location remains, but thread is |
| // negated. |
| taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; |
| } |
| |
| KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, " |
| "returning TASK_CURRENT_NOT_QUEUED\n", |
| gtid, taskdata)); |
| |
| return TASK_CURRENT_NOT_QUEUED; |
| } |
| |
| // Task Reduction implementation |
| // |
| // Note: initial implementation didn't take into account the possibility |
| // to specify omp_orig for initializer of the UDR (user defined reduction). |
| // Corrected implementation takes into account the omp_orig object. |
| // Compiler is free to use old implementation if omp_orig is not specified. |
| |
| /*! |
| @ingroup BASIC_TYPES |
| @{ |
| */ |
| |
| /*! |
| Flags for special info per task reduction item. |
| */ |
| typedef struct kmp_taskred_flags { |
| /*! 1 - use lazy alloc/init (e.g. big objects, #tasks < #threads) */ |
| unsigned lazy_priv : 1; |
| unsigned reserved31 : 31; |
| } kmp_taskred_flags_t; |
| |
| /*! |
| Internal struct for reduction data item related info set up by compiler. |
| */ |
| typedef struct kmp_task_red_input { |
| void *reduce_shar; /**< shared between tasks item to reduce into */ |
| size_t reduce_size; /**< size of data item in bytes */ |
| // three compiler-generated routines (init, fini are optional): |
| void *reduce_init; /**< data initialization routine (single parameter) */ |
| void *reduce_fini; /**< data finalization routine */ |
| void *reduce_comb; /**< data combiner routine */ |
| kmp_taskred_flags_t flags; /**< flags for additional info from compiler */ |
| } kmp_task_red_input_t; |
| |
| /*! |
| Internal struct for reduction data item related info saved by the library. |
| */ |
| typedef struct kmp_taskred_data { |
| void *reduce_shar; /**< shared between tasks item to reduce into */ |
| size_t reduce_size; /**< size of data item */ |
| kmp_taskred_flags_t flags; /**< flags for additional info from compiler */ |
| void *reduce_priv; /**< array of thread specific items */ |
| void *reduce_pend; /**< end of private data for faster comparison op */ |
| // three compiler-generated routines (init, fini are optional): |
| void *reduce_comb; /**< data combiner routine */ |
| void *reduce_init; /**< data initialization routine (two parameters) */ |
| void *reduce_fini; /**< data finalization routine */ |
| void *reduce_orig; /**< original item (can be used in UDR initializer) */ |
| } kmp_taskred_data_t; |
| |
| /*! |
| Internal struct for reduction data item related info set up by compiler. |
| |
| New interface: added reduce_orig field to provide omp_orig for UDR initializer. |
| */ |
| typedef struct kmp_taskred_input { |
| void *reduce_shar; /**< shared between tasks item to reduce into */ |
| void *reduce_orig; /**< original reduction item used for initialization */ |
| size_t reduce_size; /**< size of data item */ |
| // three compiler-generated routines (init, fini are optional): |
| void *reduce_init; /**< data initialization routine (two parameters) */ |
| void *reduce_fini; /**< data finalization routine */ |
| void *reduce_comb; /**< data combiner routine */ |
| kmp_taskred_flags_t flags; /**< flags for additional info from compiler */ |
| } kmp_taskred_input_t; |
| /*! |
| @} |
| */ |
| |
| template <typename T> void __kmp_assign_orig(kmp_taskred_data_t &item, T &src); |
| template <> |
| void __kmp_assign_orig<kmp_task_red_input_t>(kmp_taskred_data_t &item, |
| kmp_task_red_input_t &src) { |
| item.reduce_orig = NULL; |
| } |
| template <> |
| void __kmp_assign_orig<kmp_taskred_input_t>(kmp_taskred_data_t &item, |
| kmp_taskred_input_t &src) { |
| if (src.reduce_orig != NULL) { |
| item.reduce_orig = src.reduce_orig; |
| } else { |
| item.reduce_orig = src.reduce_shar; |
| } // non-NULL reduce_orig means new interface used |
| } |
| |
| template <typename T> void __kmp_call_init(kmp_taskred_data_t &item, int j); |
| template <> |
| void __kmp_call_init<kmp_task_red_input_t>(kmp_taskred_data_t &item, |
| int offset) { |
| ((void (*)(void *))item.reduce_init)((char *)(item.reduce_priv) + offset); |
| } |
| template <> |
| void __kmp_call_init<kmp_taskred_input_t>(kmp_taskred_data_t &item, |
| int offset) { |
| ((void (*)(void *, void *))item.reduce_init)( |
| (char *)(item.reduce_priv) + offset, item.reduce_orig); |
| } |
| |
| template <typename T> |
| void *__kmp_task_reduction_init(int gtid, int num, T *data) { |
| kmp_info_t *thread = __kmp_threads[gtid]; |
| kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup; |
| kmp_int32 nth = thread->th.th_team_nproc; |
| kmp_taskred_data_t *arr; |
| |
| // check input data just in case |
| KMP_ASSERT(tg != NULL); |
| KMP_ASSERT(data != NULL); |
| KMP_ASSERT(num > 0); |
| if (nth == 1) { |
| KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n", |
| gtid, tg)); |
| return (void *)tg; |
| } |
| KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n", |
| gtid, tg, num)); |
| arr = (kmp_taskred_data_t *)__kmp_thread_malloc( |
| thread, num * sizeof(kmp_taskred_data_t)); |
| for (int i = 0; i < num; ++i) { |
| size_t size = data[i].reduce_size - 1; |
| // round the size up to cache line per thread-specific item |
| size += CACHE_LINE - size % CACHE_LINE; |
| KMP_ASSERT(data[i].reduce_comb != NULL); // combiner is mandatory |
| arr[i].reduce_shar = data[i].reduce_shar; |
| arr[i].reduce_size = size; |
| arr[i].flags = data[i].flags; |
| arr[i].reduce_comb = data[i].reduce_comb; |
| arr[i].reduce_init = data[i].reduce_init; |
| arr[i].reduce_fini = data[i].reduce_fini; |
| __kmp_assign_orig<T>(arr[i], data[i]); |
| if (!arr[i].flags.lazy_priv) { |
| // allocate cache-line aligned block and fill it with zeros |
| arr[i].reduce_priv = __kmp_allocate(nth * size); |
| arr[i].reduce_pend = (char *)(arr[i].reduce_priv) + nth * size; |
| if (arr[i].reduce_init != NULL) { |
| // initialize all thread-specific items |
| for (int j = 0; j < nth; ++j) { |
| __kmp_call_init<T>(arr[i], j * size); |
| } |
| } |
| } else { |
| // only allocate space for pointers now, |
| // objects will be lazily allocated/initialized if/when requested |
| // note that __kmp_allocate zeroes the allocated memory |
| arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void *)); |
| } |
| } |
| tg->reduce_data = (void *)arr; |
| tg->reduce_num_data = num; |
| return (void *)tg; |
| } |
| |
| /*! |
| @ingroup TASKING |
| @param gtid Global thread ID |
| @param num Number of data items to reduce |
| @param data Array of data for reduction |
| @return The taskgroup identifier |
| |
| Initialize task reduction for the taskgroup. |
| |
| Note: this entry supposes the optional compiler-generated initializer routine |
| has single parameter - pointer to object to be initialized. That means |
| the reduction either does not use omp_orig object, or the omp_orig is accessible |
| without help of the runtime library. |
| */ |
| void *__kmpc_task_reduction_init(int gtid, int num, void *data) { |
| return __kmp_task_reduction_init(gtid, num, (kmp_task_red_input_t *)data); |
| } |
| |
| /*! |
| @ingroup TASKING |
| @param gtid Global thread ID |
| @param num Number of data items to reduce |
| @param data Array of data for reduction |
| @return The taskgroup identifier |
| |
| Initialize task reduction for the taskgroup. |
| |
| Note: this entry supposes the optional compiler-generated initializer routine |
| has two parameters, pointer to object to be initialized and pointer to omp_orig |
| */ |
| void *__kmpc_taskred_init(int gtid, int num, void *data) { |
| return __kmp_task_reduction_init(gtid, num, (kmp_taskred_input_t *)data); |
| } |
| |
| // Copy task reduction data (except for shared pointers). |
| template <typename T> |
| void __kmp_task_reduction_init_copy(kmp_info_t *thr, int num, T *data, |
| kmp_taskgroup_t *tg, void *reduce_data) { |
| kmp_taskred_data_t *arr; |
| KA_TRACE(20, ("__kmp_task_reduction_init_copy: Th %p, init taskgroup %p," |
| " from data %p\n", |
| thr, tg, reduce_data)); |
| arr = (kmp_taskred_data_t *)__kmp_thread_malloc( |
| thr, num * sizeof(kmp_taskred_data_t)); |
| // threads will share private copies, thunk routines, sizes, flags, etc.: |
| KMP_MEMCPY(arr, reduce_data, num * sizeof(kmp_taskred_data_t)); |
| for (int i = 0; i < num; ++i) { |
| arr[i].reduce_shar = data[i].reduce_shar; // init unique shared pointers |
| } |
| tg->reduce_data = (void *)arr; |
| tg->reduce_num_data = num; |
| } |
| |
| /*! |
| @ingroup TASKING |
| @param gtid Global thread ID |
| @param tskgrp The taskgroup ID (optional) |
| @param data Shared location of the item |
| @return The pointer to per-thread data |
| |
| Get thread-specific location of data item |
| */ |
| void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) { |
| kmp_info_t *thread = __kmp_threads[gtid]; |
| kmp_int32 nth = thread->th.th_team_nproc; |
| if (nth == 1) |
| return data; // nothing to do |
| |
| kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp; |
| if (tg == NULL) |
| tg = thread->th.th_current_task->td_taskgroup; |
| KMP_ASSERT(tg != NULL); |
| kmp_taskred_data_t *arr = (kmp_taskred_data_t *)(tg->reduce_data); |
| kmp_int32 num = tg->reduce_num_data; |
| kmp_int32 tid = thread->th.th_info.ds.ds_tid; |
| |
| KMP_ASSERT(data != NULL); |
| while (tg != NULL) { |
| for (int i = 0; i < num; ++i) { |
| if (!arr[i].flags.lazy_priv) { |
| if (data == arr[i].reduce_shar || |
| (data >= arr[i].reduce_priv && data < arr[i].reduce_pend)) |
| return (char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size; |
| } else { |
| // check shared location first |
| void **p_priv = (void **)(arr[i].reduce_priv); |
| if (data == arr[i].reduce_shar) |
| goto found; |
| // check if we get some thread specific location as parameter |
| for (int j = 0; j < nth; ++j) |
| if (data == p_priv[j]) |
| goto found; |
| continue; // not found, continue search |
| found: |
| if (p_priv[tid] == NULL) { |
| // allocate thread specific object lazily |
| p_priv[tid] = __kmp_allocate(arr[i].reduce_size); |
| if (arr[i].reduce_init != NULL) { |
| if (arr[i].reduce_orig != NULL) { // new interface |
| ((void (*)(void *, void *))arr[i].reduce_init)( |
| p_priv[tid], arr[i].reduce_orig); |
| } else { // old interface (single parameter) |
| ((void (*)(void *))arr[i].reduce_init)(p_priv[tid]); |
| } |
| } |
| } |
| return p_priv[tid]; |
| } |
| } |
| tg = tg->parent; |
| arr = (kmp_taskred_data_t *)(tg->reduce_data); |
| num = tg->reduce_num_data; |
| } |
| KMP_ASSERT2(0, "Unknown task reduction item"); |
| return NULL; // ERROR, this line never executed |
| } |
| |
| // Finalize task reduction. |
| // Called from __kmpc_end_taskgroup() |
| static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) { |
| kmp_int32 nth = th->th.th_team_nproc; |
| KMP_DEBUG_ASSERT(nth > 1); // should not be called if nth == 1 |
| kmp_taskred_data_t *arr = (kmp_taskred_data_t *)tg->reduce_data; |
| kmp_int32 num = tg->reduce_num_data; |
| for (int i = 0; i < num; ++i) { |
| void *sh_data = arr[i].reduce_shar; |
| void (*f_fini)(void *) = (void (*)(void *))(arr[i].reduce_fini); |
| void (*f_comb)(void *, void *) = |
| (void (*)(void *, void *))(arr[i].reduce_comb); |
| if (!arr[i].flags.lazy_priv) { |
| void *pr_data = arr[i].reduce_priv; |
| size_t size = arr[i].reduce_size; |
| for (int j = 0; j < nth; ++j) { |
| void *priv_data = (char *)pr_data + j * size; |
| f_comb(sh_data, priv_data); // combine results |
| if (f_fini) |
| f_fini(priv_data); // finalize if needed |
| } |
| } else { |
| void **pr_data = (void **)(arr[i].reduce_priv); |
| for (int j = 0; j < nth; ++j) { |
| if (pr_data[j] != NULL) { |
| f_comb(sh_data, pr_data[j]); // combine results |
| if (f_fini) |
| f_fini(pr_data[j]); // finalize if needed |
| __kmp_free(pr_data[j]); |
| } |
| } |
| } |
| __kmp_free(arr[i].reduce_priv); |
| } |
| __kmp_thread_free(th, arr); |
| tg->reduce_data = NULL; |
| tg->reduce_num_data = 0; |
| } |
| |
| // Cleanup task reduction data for parallel or worksharing, |
| // do not touch task private data other threads still working with. |
| // Called from __kmpc_end_taskgroup() |
| static void __kmp_task_reduction_clean(kmp_info_t *th, kmp_taskgroup_t *tg) { |
| __kmp_thread_free(th, tg->reduce_data); |
| tg->reduce_data = NULL; |
| tg->reduce_num_data = 0; |
| } |
| |
| template <typename T> |
| void *__kmp_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, |
| int num, T *data) { |
| kmp_info_t *thr = __kmp_threads[gtid]; |
| kmp_int32 nth = thr->th.th_team_nproc; |
| __kmpc_taskgroup(loc, gtid); // form new taskgroup first |
| if (nth == 1) { |
| KA_TRACE(10, |
| ("__kmpc_reduction_modifier_init: T#%d, tg %p, exiting nth=1\n", |
| gtid, thr->th.th_current_task->td_taskgroup)); |
| return (void *)thr->th.th_current_task->td_taskgroup; |
| } |
| kmp_team_t *team = thr->th.th_team; |
| void *reduce_data; |
| kmp_taskgroup_t *tg; |
| reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]); |
| if (reduce_data == NULL && |
| __kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data, |
| (void *)1)) { |
| // single thread enters this block to initialize common reduction data |
| KMP_DEBUG_ASSERT(reduce_data == NULL); |
| // first initialize own data, then make a copy other threads can use |
| tg = (kmp_taskgroup_t *)__kmp_task_reduction_init<T>(gtid, num, data); |
| reduce_data = __kmp_thread_malloc(thr, num * sizeof(kmp_taskred_data_t)); |
| KMP_MEMCPY(reduce_data, tg->reduce_data, num * sizeof(kmp_taskred_data_t)); |
| // fini counters should be 0 at this point |
| KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[0]) == 0); |
| KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&team->t.t_tg_fini_counter[1]) == 0); |
| KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], reduce_data); |
| } else { |
| while ( |
| (reduce_data = KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws])) == |
| (void *)1) { // wait for task reduction initialization |
| KMP_CPU_PAUSE(); |
| } |
| KMP_DEBUG_ASSERT(reduce_data > (void *)1); // should be valid pointer here |
| tg = thr->th.th_current_task->td_taskgroup; |
| __kmp_task_reduction_init_copy<T>(thr, num, data, tg, reduce_data); |
| } |
| return tg; |
| } |
| |
| /*! |
| @ingroup TASKING |
| @param loc Source location info |
| @param gtid Global thread ID |
| @param is_ws Is 1 if the reduction is for worksharing, 0 otherwise |
| @param num Number of data items to reduce |
| @param data Array of data for reduction |
| @return The taskgroup identifier |
| |
| Initialize task reduction for a parallel or worksharing. |
| |
| Note: this entry supposes the optional compiler-generated initializer routine |
| has single parameter - pointer to object to be initialized. That means |
| the reduction either does not use omp_orig object, or the omp_orig is accessible |
| without help of the runtime library. |
| */ |
| void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid, int is_ws, |
| int num, void *data) { |
| return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num, |
| (kmp_task_red_input_t *)data); |
| } |
| |
| /*! |
| @ingroup TASKING |
| @param loc Source location info |
| @param gtid Global thread ID |
| @param is_ws Is 1 if the reduction is for worksharing, 0 otherwise |
| @param num Number of data items to reduce |
| @param data Array of data for reduction |
| @return The taskgroup identifier |
| |
| Initialize task reduction for a parallel or worksharing. |
| |
| Note: this entry supposes the optional compiler-generated initializer routine |
| has two parameters, pointer to object to be initialized and pointer to omp_orig |
| */ |
| void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws, int num, |
| void *data) { |
| return __kmp_task_reduction_modifier_init(loc, gtid, is_ws, num, |
| (kmp_taskred_input_t *)data); |
| } |
| |
| /*! |
| @ingroup TASKING |
| @param loc Source location info |
| @param gtid Global thread ID |
| @param is_ws Is 1 if the reduction is for worksharing, 0 otherwise |
| |
| Finalize task reduction for a parallel or worksharing. |
| */ |
| void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid, int is_ws) { |
| __kmpc_end_taskgroup(loc, gtid); |
| } |
| |
| // __kmpc_taskgroup: Start a new taskgroup |
| void __kmpc_taskgroup(ident_t *loc, int gtid) { |
| kmp_info_t *thread = __kmp_threads[gtid]; |
| kmp_taskdata_t *taskdata = thread->th.th_current_task; |
| kmp_taskgroup_t *tg_new = |
| (kmp_taskgroup_t *)__kmp_thread_malloc(thread, sizeof(kmp_taskgroup_t)); |
| KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new)); |
| KMP_ATOMIC_ST_RLX(&tg_new->count, 0); |
| KMP_ATOMIC_ST_RLX(&tg_new->cancel_request, cancel_noreq); |
| tg_new->parent = taskdata->td_taskgroup; |
| tg_new->reduce_data = NULL; |
| tg_new->reduce_num_data = 0; |
| taskdata->td_taskgroup = tg_new; |
| |
| #if OMPT_SUPPORT && OMPT_OPTIONAL |
| if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) { |
| void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
| if (!codeptr) |
| codeptr = OMPT_GET_RETURN_ADDRESS(0); |
| kmp_team_t *team = thread->th.th_team; |
| ompt_data_t my_task_data = taskdata->ompt_task_info.task_data; |
| // FIXME: I think this is wrong for lwt! |
| ompt_data_t my_parallel_data = team->t.ompt_team_info.parallel_data; |
| |
| ompt_callbacks.ompt_callback(ompt_callback_sync_region)( |
| ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data), |
| &(my_task_data), codeptr); |
| } |
| #endif |
| } |
| |
| // __kmpc_end_taskgroup: Wait until all tasks generated by the current task |
| // and its descendants are complete |
| void __kmpc_end_taskgroup(ident_t *loc, int gtid) { |
| kmp_info_t *thread = __kmp_threads[gtid]; |
| kmp_taskdata_t *taskdata = thread->th.th_current_task; |
| kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup; |
| int thread_finished = FALSE; |
| |
| #if OMPT_SUPPORT && OMPT_OPTIONAL |
| kmp_team_t *team; |
| ompt_data_t my_task_data; |
| ompt_data_t my_parallel_data; |
| void *codeptr; |
| if (UNLIKELY(ompt_enabled.enabled)) { |
| team = thread->th.th_team; |
| my_task_data = taskdata->ompt_task_info.task_data; |
| // FIXME: I think this is wrong for lwt! |
| my_parallel_data = team->t.ompt_team_info.parallel_data; |
| codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid); |
| if (!codeptr) |
| codeptr = OMPT_GET_RETURN_ADDRESS(0); |
| } |
| #endif |
| |
| KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc)); |
| KMP_DEBUG_ASSERT(taskgroup != NULL); |
| KMP_SET_THREAD_STATE_BLOCK(TASKGROUP); |
| |
| if (__kmp_tasking_mode != tskm_immediate_exec) { |
| // mark task as waiting not on a barrier |
| taskdata->td_taskwait_counter += 1; |
| taskdata->td_taskwait_ident = loc; |
| taskdata->td_taskwait_thread = gtid + 1; |
| #if USE_ITT_BUILD |
| // For ITT the taskgroup wait is similar to taskwait until we need to |
| // distinguish them |
| void *itt_sync_obj = __kmp_itt_taskwait_object(gtid); |
| if (itt_sync_obj != NULL) |
| __kmp_itt_taskwait_starting(gtid, itt_sync_obj); |
| #endif /* USE_ITT_BUILD */ |
| |
| #if OMPT_SUPPORT && OMPT_OPTIONAL |
| if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) { |
| ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( |
| ompt_sync_region_taskgroup, ompt_scope_begin, &(my_parallel_data), |
| &(my_task_data), codeptr); |
| } |
| #endif |
| |
| if (!taskdata->td_flags.team_serial || |
| (thread->th.th_task_team != NULL && |
| thread->th.th_task_team->tt.tt_found_proxy_tasks)) { |
| kmp_flag_32 flag(RCAST(std::atomic<kmp_uint32> *, &(taskgroup->count)), |
| 0U); |
| while (KMP_ATOMIC_LD_ACQ(&taskgroup->count) != 0) { |
| flag.execute_tasks(thread, gtid, FALSE, |
| &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), |
| __kmp_task_stealing_constraint); |
| } |
| } |
| taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; // end waiting |
| |
| #if OMPT_SUPPORT && OMPT_OPTIONAL |
| if (UNLIKELY(ompt_enabled.ompt_callback_sync_region_wait)) { |
| ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( |
| ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data), |
| &(my_task_data), codeptr); |
| } |
| #endif |
| |
| #if USE_ITT_BUILD |
| if (itt_sync_obj != NULL) |
| __kmp_itt_taskwait_finished(gtid, itt_sync_obj); |
| #endif /* USE_ITT_BUILD */ |
| } |
| KMP_DEBUG_ASSERT(taskgroup->count == 0); |
| |
| if (taskgroup->reduce_data != NULL) { // need to reduce? |
| int cnt; |
| void *reduce_data; |
| kmp_team_t *t = thread->th.th_team; |
| kmp_taskred_data_t *arr = (kmp_taskred_data_t *)taskgroup->reduce_data; |
| // check if <priv> data of the first reduction variable shared for the team |
| void *priv0 = arr[0].reduce_priv; |
| if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[0])) != NULL && |
| ((kmp_taskred_data_t *)reduce_data)[0].reduce_priv == priv0) { |
| // finishing task reduction on parallel |
| cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[0]); |
| if (cnt == thread->th.th_team_nproc - 1) { |
| // we are the last thread passing __kmpc_reduction_modifier_fini() |
| // finalize task reduction: |
| __kmp_task_reduction_fini(thread, taskgroup); |
| // cleanup fields in the team structure: |
| // TODO: is relaxed store enough here (whole barrier should follow)? |
| __kmp_thread_free(thread, reduce_data); |
| KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[0], NULL); |
| KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[0], 0); |
| } else { |
| // we are not the last thread passing __kmpc_reduction_modifier_fini(), |
| // so do not finalize reduction, just clean own copy of the data |
| __kmp_task_reduction_clean(thread, taskgroup); |
| } |
| } else if ((reduce_data = KMP_ATOMIC_LD_ACQ(&t->t.t_tg_reduce_data[1])) != |
| NULL && |
| ((kmp_taskred_data_t *)reduce_data)[0].reduce_priv == priv0) { |
| // finishing task reduction on worksharing |
| cnt = KMP_ATOMIC_INC(&t->t.t_tg_fini_counter[1]); |
| if (cnt == thread->th.th_team_nproc - 1) { |
| // we are the last thread passing __kmpc_reduction_modifier_fini() |
| __kmp_task_reduction_fini(thread, taskgroup); |
| // cleanup fields in team structure: |
| // TODO: is relaxed store enough here (whole barrier should follow)? |
| __kmp_thread_free(thread, reduce_data); |
| KMP_ATOMIC_ST_REL(&t->t.t_tg_reduce_data[1], NULL); |
| KMP_ATOMIC_ST_REL(&t->t.t_tg_fini_counter[1], 0); |
| } else { |
| // we are not the last thread passing __kmpc_reduction_modifier_fini(), |
| // so do not finalize reduction, just clean own copy of the data |
| __kmp_task_reduction_clean(thread, taskgroup); |
| } |
| } else { |
| // finishing task reduction on taskgroup |
| __kmp_task_reduction_fini(thread, taskgroup); |
| } |
| } |
| // Restore parent taskgroup for the current task |
| taskdata->td_taskgroup = taskgroup->parent; |
| __kmp_thread_free(thread, taskgroup); |
| |
| KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", |
| gtid, taskdata)); |
| ANNOTATE_HAPPENS_AFTER(taskdata); |
| |
| #if OMPT_SUPPORT && OMPT_OPTIONAL |
| if (UNLIKELY(ompt_enabled.ompt_callback_sync_region)) { |
| ompt_callbacks.ompt_callback(ompt_callback_sync_region)( |
| ompt_sync_region_taskgroup, ompt_scope_end, &(my_parallel_data), |
| &(my_task_data), codeptr); |
| } |
| #endif |
| } |
| |
| // __kmp_remove_my_task: remove a task from my own deque |
| static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid, |
| kmp_task_team_t *task_team, |
| kmp_int32 is_constrained) { |
| kmp_task_t *task; |
| kmp_taskdata_t *taskdata; |
| kmp_thread_data_t *thread_data; |
| kmp_uint32 tail; |
| |
| KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); |
| KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data != |
| NULL); // Caller should check this condition |
| |
| thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)]; |
| |
| KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n", |
| gtid, thread_data->td.td_deque_ntasks, |
| thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); |
| |
| if (TCR_4(thread_data->td.td_deque_ntasks) == 0) { |
| KA_TRACE(10, |
| ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: " |
| "ntasks=%d head=%u tail=%u\n", |
| gtid, thread_data->td.td_deque_ntasks, |
| thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); |
| return NULL; |
| } |
| |
| __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); |
| |
| if (TCR_4(thread_data->td.td_deque_ntasks) == 0) { |
| __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); |
| KA_TRACE(10, |
| ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: " |
| "ntasks=%d head=%u tail=%u\n", |
| gtid, thread_data->td.td_deque_ntasks, |
| thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); |
| return NULL; |
| } |
| |
| tail = (thread_data->td.td_deque_tail - 1) & |
| TASK_DEQUE_MASK(thread_data->td); // Wrap index. |
| taskdata = thread_data->td.td_deque[tail]; |
| |
|