blob: 859e405a7008e19f93d9233a50cce788c80e86be [file] [log] [blame]
// Copyright 2016 The Fuchsia Authors
//
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
#include "object/job_dispatcher.h"
#include <inttypes.h>
#include <lib/counters.h>
#include <platform.h>
#include <zircon/errors.h>
#include <zircon/rights.h>
#include <zircon/syscalls/policy.h>
#include <zircon/types.h>
#include <fbl/alloc_checker.h>
#include <fbl/array.h>
#include <fbl/auto_lock.h>
#include <fbl/inline_array.h>
#include <kernel/mutex.h>
#include <ktl/algorithm.h>
#include <object/process_dispatcher.h>
#include <object/root_job_observer.h>
#include <ktl/enforce.h>
KCOUNTER(dispatcher_job_create_count, "dispatcher.job.create")
KCOUNTER(dispatcher_job_destroy_count, "dispatcher.job.destroy")
// The starting max_height value of the root job.
static constexpr uint32_t kRootJobMaxHeight = 32;
static constexpr char kRootJobName[] = "root";
template <>
uint64_t JobDispatcher::ChildCountLocked<JobDispatcher>() const {
return jobs_.size();
}
template <>
uint64_t JobDispatcher::ChildCountLocked<ProcessDispatcher>() const {
return procs_.size();
}
// To come up with an order on our recursive locks we take advantage of the fact that our
// max_height reduces from parent to child. As we acquire locks from parent->child we can build an
// increasing counter by inverting the max_height. We add 1 to the counter just so the order value
// of 0 is reserved for the default order when the lock is acquired without an order being
// specified.
uint32_t JobDispatcher::LockOrder() const { return kRootJobMaxHeight - max_height() + 1; }
// Calls the provided |zx_status_t func(fbl::RefPtr<DISPATCHER_TYPE>)|
// function on all live elements of |children|, which must be one of |jobs_|
// or |procs_|. Stops iterating early if |func| returns a value other than
// ZX_OK, returning that value from this method. |lock_| must be held when
// calling this method, and it will still be held while the callback is
// called.
//
// The returned |LiveRefsArray| needs to be destructed when |lock_| is not
// held anymore. The recommended pattern is:
//
// LiveRefsArray refs;
// {
// Guard<Mutex> guard{get_lock()};
// refs = ForEachChildInLocked(...);
// }
//
template <typename T, typename L, typename Fn>
JobDispatcher::LiveRefsArray<T> JobDispatcher::ForEachChildInLocked(L& children,
zx_status_t* result, Fn func) {
const uint64_t count = ChildCountLocked<typename L::ValueType>();
if (!count) {
*result = ZX_OK;
return LiveRefsArray<T>();
}
fbl::AllocChecker ac;
LiveRefsArray<T> refs(new (&ac) fbl::RefPtr<T>[count], count);
if (!ac.check()) {
*result = ZX_ERR_NO_MEMORY;
return LiveRefsArray<T>();
}
size_t ix = 0;
*result = TakeEachChildLocked(children, [&ix, &refs, &func](auto&& cref) {
zx_status_t result = func(cref);
// As part of our contract with ForEachChildKeepAliveInLocked we must not
// destroy the |cref| we were given, as it might be the last reference to
// the object. Therefore we keep the reference alive in the |refs| array
// and pass the responsibility of releasing them outside the lock to the
// caller.
refs[ix++] = ktl::move(cref);
return result;
});
return refs;
}
// Calls the provided |zx_status_t func(fbl::RefPtr<DISPATCHER_TYPE>)|
// function on all live elements of |children|, which must be one of |jobs_|
// or |procs_|. The callback must retain the RefPtr and not destroy it until
// after this methods returns and the lock is dropped. Stops iterating early if
// |func| returns a value other than ZX_OK, returning that value from this
// method.
template <typename T, typename Fn>
zx_status_t JobDispatcher::TakeEachChildLocked(T& children, Fn func) {
// Convert child raw pointers into RefPtrs. This is tricky and requires
// special logic on the RefPtr class to handle a ref count that can be
// zero.
//
// The main requirement is that |lock_| is both controlling child
// list lookup and also making sure that the child destructor cannot
// make progress when doing so. In other words, when inspecting the
// |children| list we can be sure that a given child process or child
// job is either
// - alive, with refcount > 0
// - in destruction process but blocked, refcount == 0
for (auto& craw : children) {
auto cref = ::fbl::MakeRefPtrUpgradeFromRaw(&craw, get_lock());
if (!cref)
continue;
// |cref| might be the last reference at this point. If so,
// when we drop it in the next iteration the object dtor
// would be called here with the |get_lock()| held. To avoid that we pass
// ownership of the refptr to the callback who is required to keep it alive.
zx_status_t result = func(ktl::move(cref));
if (result != ZX_OK) {
return result;
}
}
return ZX_OK;
}
fbl::RefPtr<JobDispatcher> JobDispatcher::CreateRootJob() {
fbl::AllocChecker ac;
auto job = fbl::AdoptRef(new (&ac) JobDispatcher(0u, nullptr, JobPolicy::CreateRootPolicy()));
if (!ac.check()) {
panic("root-job: failed to allocate\n");
}
job->set_name(kRootJobName, sizeof(kRootJobName));
return job;
}
zx_status_t JobDispatcher::Create(uint32_t flags, const fbl::RefPtr<JobDispatcher>& parent,
KernelHandle<JobDispatcher>* handle, zx_rights_t* rights) {
if (parent != nullptr && parent->max_height() == 0) {
// The parent job cannot have children.
return ZX_ERR_OUT_OF_RANGE;
}
fbl::AllocChecker ac;
KernelHandle new_handle(
fbl::AdoptRef(new (&ac) JobDispatcher(flags, parent, parent->GetPolicy())));
if (!ac.check())
return ZX_ERR_NO_MEMORY;
if (!parent->AddChildJob(new_handle.dispatcher())) {
return ZX_ERR_BAD_STATE;
}
*rights = default_rights();
*handle = ktl::move(new_handle);
return ZX_OK;
}
JobDispatcher::JobDispatcher(uint32_t /*flags*/, fbl::RefPtr<JobDispatcher> parent,
JobPolicy policy)
: SoloDispatcher(ZX_JOB_NO_PROCESSES | ZX_JOB_NO_JOBS | ZX_JOB_NO_CHILDREN),
parent_(ktl::move(parent)),
max_height_(parent_ ? parent_->max_height() - 1 : kRootJobMaxHeight),
state_(State::READY),
return_code_(0),
kill_on_oom_(false),
policy_(policy),
exceptionate_(ZX_EXCEPTION_CHANNEL_TYPE_JOB),
debug_exceptionate_(ZX_EXCEPTION_CHANNEL_TYPE_JOB_DEBUGGER) {
kcounter_add(dispatcher_job_create_count, 1);
}
JobDispatcher::~JobDispatcher() {
kcounter_add(dispatcher_job_destroy_count, 1);
RemoveFromJobTreesUnlocked();
}
zx_koid_t JobDispatcher::get_related_koid() const { return parent_ ? parent_->get_koid() : 0u; }
bool JobDispatcher::AddChildProcess(const fbl::RefPtr<ProcessDispatcher>& process) {
canary_.Assert();
Guard<Mutex> guard{get_lock()};
if (state_ != State::READY)
return false;
procs_.push_back(process.get());
UpdateSignalsLocked();
return true;
}
bool JobDispatcher::AddChildJob(const fbl::RefPtr<JobDispatcher>& job) {
canary_.Assert();
Guard<Mutex> guard{get_lock()};
if (state_ != State::READY)
return false;
// Put the new job after our next-youngest child, or us if we have none.
//
// We try to make older jobs closer to the root (both hierarchically and
// temporally) show up earlier in enumeration.
JobDispatcher* neighbor = (jobs_.is_empty() ? this : &jobs_.back());
// This can only be called once, the job should not already be part
// of any job tree.
DEBUG_ASSERT(!fbl::InContainer<JobDispatcher::RawListTag>(*job));
DEBUG_ASSERT(neighbor != job.get());
jobs_.push_back(job.get());
UpdateSignalsLocked();
return true;
}
void JobDispatcher::RemoveChildProcess(ProcessDispatcher* process) {
canary_.Assert();
bool should_die = false;
{
Guard<Mutex> guard{get_lock()};
// The process dispatcher can call us in its destructor, Kill(),
// or RemoveThread().
if (!fbl::InContainer<ProcessDispatcher::RawJobListTag>(*process)) {
return;
}
procs_.erase(*process);
UpdateSignalsLocked();
should_die = IsReadyForDeadTransitionLocked();
// Aggregate runtime stats from exiting process.
aggregated_runtime_stats_.Add(process->GetAggregatedRuntime());
}
if (should_die)
FinishDeadTransitionUnlocked();
}
void JobDispatcher::RemoveChildJob(JobDispatcher* job) {
canary_.Assert();
bool should_die = false;
{
Guard<Mutex> guard{get_lock()};
if (!fbl::InContainer<JobDispatcher::RawListTag>(*job)) {
return;
}
jobs_.erase(*job);
jobs_.size();
UpdateSignalsLocked();
should_die = IsReadyForDeadTransitionLocked();
}
if (should_die)
FinishDeadTransitionUnlocked();
}
JobDispatcher::State JobDispatcher::GetState() const {
Guard<Mutex> guard{get_lock()};
return state_;
}
void JobDispatcher::RemoveFromJobTreesUnlocked() {
canary_.Assert();
if (parent_)
parent_->RemoveChildJob(this);
}
bool JobDispatcher::IsReadyForDeadTransitionLocked() {
canary_.Assert();
return state_ == State::KILLING && jobs_.is_empty() && procs_.is_empty();
}
void JobDispatcher::FinishDeadTransitionUnlocked() {
canary_.Assert();
// Make sure we're killing from the bottom of the tree up or else parent
// jobs could die before their children.
//
// In particular, this means we have to finish dying before leaving the job
// trees, since the last child leaving the tree can trigger its parent to
// finish dying.
DEBUG_ASSERT(!parent_ || (parent_->GetState() != State::DEAD));
{
Guard<Mutex> guard{get_lock()};
state_ = State::DEAD;
exceptionate_.Shutdown();
debug_exceptionate_.Shutdown();
UpdateStateLocked(0u, ZX_JOB_TERMINATED);
}
RemoveFromJobTreesUnlocked();
}
void JobDispatcher::UpdateSignalsLocked() {
// Clear all signals, and mark the appropriate ones active.
//
// The active signals take precedence over the clear signals.
zx_signals_t clear = (ZX_JOB_NO_JOBS | ZX_JOB_NO_PROCESSES | ZX_JOB_NO_CHILDREN);
// Removing jobs or processes.
zx_signals_t set = 0u;
if (procs_.is_empty()) {
set |= ZX_JOB_NO_PROCESSES;
}
if (jobs_.is_empty()) {
set |= ZX_JOB_NO_JOBS;
}
if (jobs_.is_empty() && procs_.is_empty()) {
set |= ZX_JOB_NO_CHILDREN;
}
UpdateStateLocked(clear, set);
}
JobPolicy JobDispatcher::GetPolicy() const {
Guard<Mutex> guard{get_lock()};
return policy_;
}
bool JobDispatcher::KillJobWithKillOnOOM() {
// Get list of jobs with kill bit set.
OOMBitJobArray oom_jobs;
int count = 0;
CollectJobsWithOOMBit(&oom_jobs, &count);
if (count == 0) {
printf("OOM: no jobs with kill_on_oom found\n");
return false;
}
// Sort |oom_jobs| in descending order by max height.
ktl::stable_sort(oom_jobs.begin(), oom_jobs.begin() + count,
[](const fbl::RefPtr<JobDispatcher>& a, const fbl::RefPtr<JobDispatcher>& b) {
return a->max_height() > b->max_height();
});
// Kill lowest to highest until we find something to kill.
for (int i = count - 1; i >= 0; --i) {
auto& job = oom_jobs[i];
if (job->Kill(ZX_TASK_RETCODE_OOM_KILL)) {
char name[ZX_MAX_NAME_LEN];
job->get_name(name);
printf("OOM: killing %" PRIu64 " '%s'\n", job->get_koid(), name);
return true;
}
}
printf("OOM: no job found to kill\n");
return false;
}
void JobDispatcher::CollectJobsWithOOMBit(OOMBitJobArray* into, int* count) {
// As CollectJobsWithOOMBit will recurse we need to give a lock order to the guard.
Guard<Mutex> guard{AssertOrderedLock, get_lock(), LockOrder()};
if (kill_on_oom_) {
if (*count >= static_cast<int>(into->size())) {
printf("OOM: skipping some jobs, exceeded max count\n");
return;
}
auto cref = ::fbl::MakeRefPtrUpgradeFromRaw(this, get_lock());
if (!cref)
return;
(*into)[*count] = ktl::move(cref);
*count += 1;
}
for (auto& job : jobs_) {
job.CollectJobsWithOOMBit(into, count);
}
}
bool JobDispatcher::Kill(int64_t return_code) {
canary_.Assert();
JobList jobs_to_kill;
ProcessList procs_to_kill;
bool should_die = false;
{
Guard<Mutex> guard{get_lock()};
if (state_ != State::READY)
return false;
return_code_ = return_code;
state_ = State::KILLING;
__UNUSED zx_status_t result;
// Gather the refs for our children. We can use |ForEachChildKeepAliveInLocked| since we will be
// recording and keeping alive the RefPtrs in the callback in the *_to_kill lists.
result = TakeEachChildLocked(jobs_, [&jobs_to_kill](fbl::RefPtr<JobDispatcher>&& job) {
jobs_to_kill.push_front(ktl::move(job));
return ZX_OK;
});
DEBUG_ASSERT(result == ZX_OK);
result = TakeEachChildLocked(procs_, [&procs_to_kill](fbl::RefPtr<ProcessDispatcher>&& proc) {
procs_to_kill.push_front(ktl::move(proc));
return ZX_OK;
});
DEBUG_ASSERT(result == ZX_OK);
should_die = IsReadyForDeadTransitionLocked();
}
if (should_die)
FinishDeadTransitionUnlocked();
// Since we kill the child jobs first we have a depth-first massacre.
while (!jobs_to_kill.is_empty()) {
// TODO(cpu): This recursive call can overflow the stack.
jobs_to_kill.pop_front()->Kill(return_code);
}
while (!procs_to_kill.is_empty()) {
procs_to_kill.pop_front()->Kill(return_code);
}
return true;
}
void JobDispatcher::CriticalProcessKill(fbl::RefPtr<ProcessDispatcher> dead_process) {
char proc_name[ZX_MAX_NAME_LEN];
dead_process->get_name(proc_name);
char job_name[ZX_MAX_NAME_LEN];
get_name(job_name);
printf("critical-process: process '%s' (%" PRIu64 ") died, killing job '%s' (%" PRIu64 ")\n",
proc_name, dead_process->get_koid(), job_name, get_koid());
if (GetRootJobDispatcher().get() == this) {
RootJobObserver::CriticalProcessKill(ktl::move(dead_process));
}
Kill(ZX_TASK_RETCODE_CRITICAL_PROCESS_KILL);
}
bool JobDispatcher::CanSetPolicy() TA_REQ(get_lock()) {
// Can't set policy when there are active processes or jobs. This constraint ensures that a
// process's policy cannot change over its lifetime. Because a process's policy cannot change,
// the risk of TOCTOU bugs is reduced and we are free to apply policy at the ProcessDispatcher
// without having to walk up the tree to its containing job.
if (!procs_.is_empty() || !jobs_.is_empty()) {
return false;
}
return true;
}
zx_status_t JobDispatcher::SetBasicPolicy(uint32_t mode, const zx_policy_basic_v1_t* in_policy,
size_t policy_count) {
fbl::AllocChecker ac;
fbl::InlineArray<zx_policy_basic_v2_t, kPolicyBasicInlineCount> policy(&ac, policy_count);
if (!ac.check()) {
return ZX_ERR_NO_MEMORY;
}
for (size_t ix = 0; ix != policy.size(); ++ix) {
policy[ix].condition = in_policy[ix].condition;
policy[ix].action = in_policy[ix].policy;
policy[ix].flags = ZX_POL_OVERRIDE_DENY;
}
return SetBasicPolicy(mode, policy.get(), policy.size());
}
zx_status_t JobDispatcher::SetBasicPolicy(uint32_t mode, const zx_policy_basic_v2_t* in_policy,
size_t policy_count) {
Guard<Mutex> guard{get_lock()};
if (!CanSetPolicy()) {
return ZX_ERR_BAD_STATE;
}
return policy_.AddBasicPolicy(mode, in_policy, policy_count);
}
zx_status_t JobDispatcher::SetTimerSlackPolicy(const zx_policy_timer_slack& policy) {
Guard<Mutex> guard{get_lock()};
if (!CanSetPolicy()) {
return ZX_ERR_BAD_STATE;
}
// Is the policy valid?
if (policy.min_slack < 0) {
return ZX_ERR_INVALID_ARGS;
}
slack_mode new_mode;
switch (policy.default_mode) {
case ZX_TIMER_SLACK_CENTER:
new_mode = TIMER_SLACK_CENTER;
break;
case ZX_TIMER_SLACK_EARLY:
new_mode = TIMER_SLACK_EARLY;
break;
case ZX_TIMER_SLACK_LATE:
new_mode = TIMER_SLACK_LATE;
break;
default:
return ZX_ERR_INVALID_ARGS;
};
const TimerSlack old_slack = policy_.GetTimerSlack();
const zx_duration_t new_amount = ktl::max(old_slack.amount(), policy.min_slack);
const TimerSlack new_slack(new_amount, new_mode);
policy_.SetTimerSlack(new_slack);
return ZX_OK;
}
bool JobDispatcher::EnumerateChildren(JobEnumerator* je) {
canary_.Assert();
LiveRefsArray<JobDispatcher> jobs_refs;
LiveRefsArray<ProcessDispatcher> proc_refs;
zx_status_t result = ZX_OK;
{
Guard<Mutex> guard{get_lock()};
proc_refs = ForEachChildInLocked<ProcessDispatcher>(
procs_, &result, [&](const fbl::RefPtr<ProcessDispatcher>& proc) { return ZX_OK; });
if (result != ZX_OK) {
return false;
}
jobs_refs = ForEachChildInLocked<JobDispatcher>(
jobs_, &result, [&](const fbl::RefPtr<JobDispatcher>& job) { return ZX_OK; });
}
// With the processes and jobs collected into their respective LiveRefsArrays, we can now invoke
// the JobEnumerator callbacks on them. We perform this here, outside the lock, instead of
// directly in the ForEachChildInLocked callbacks so that the callbacks are permitted to perform
// user copies, or generally do actions that are not permitted whilst locks are held.
for (auto& process : proc_refs) {
if (process) {
if (!je->OnProcess(&*process)) {
return false;
}
}
}
for (auto& job : jobs_refs) {
if (job) {
if (!je->OnJob(&*job)) {
return false;
}
}
}
return result == ZX_OK;
}
bool JobDispatcher::EnumerateChildrenRecursive(JobEnumerator* je) {
canary_.Assert();
LiveRefsArray<JobDispatcher> jobs_refs;
LiveRefsArray<ProcessDispatcher> proc_refs;
zx_status_t result = ZX_OK;
{
// As EnumerateChildren will recurse we need to give a lock order to the guard.
Guard<Mutex> guard{AssertOrderedLock, get_lock(), LockOrder()};
proc_refs = ForEachChildInLocked<ProcessDispatcher>(
procs_, &result, [&](const fbl::RefPtr<ProcessDispatcher>& proc) {
return je->OnProcess(proc.get()) ? ZX_OK : ZX_ERR_STOP;
});
if (result != ZX_OK) {
return false;
}
jobs_refs = ForEachChildInLocked<JobDispatcher>(
jobs_, &result, [&](const fbl::RefPtr<JobDispatcher>& job) {
if (!je->OnJob(job.get())) {
return ZX_ERR_STOP;
}
// TODO(kulakowski): This recursive call can overflow the stack.
return job->EnumerateChildrenRecursive(je) ? ZX_OK : ZX_ERR_STOP;
});
}
return result == ZX_OK;
}
fbl::RefPtr<ProcessDispatcher> JobDispatcher::LookupProcessById(zx_koid_t koid) {
canary_.Assert();
LiveRefsArray<ProcessDispatcher> proc_refs;
fbl::RefPtr<ProcessDispatcher> found_proc;
{
Guard<Mutex> guard{get_lock()};
zx_status_t result;
proc_refs = ForEachChildInLocked<ProcessDispatcher>(procs_, &result,
[&](fbl::RefPtr<ProcessDispatcher> proc) {
if (proc->get_koid() == koid) {
found_proc = ktl::move(proc);
return ZX_ERR_STOP;
}
return ZX_OK;
});
}
return found_proc; // Null if not found.
}
fbl::RefPtr<JobDispatcher> JobDispatcher::LookupJobById(zx_koid_t koid) {
canary_.Assert();
LiveRefsArray<JobDispatcher> jobs_refs;
fbl::RefPtr<JobDispatcher> found_job;
{
Guard<Mutex> guard{get_lock()};
zx_status_t result;
jobs_refs =
ForEachChildInLocked<JobDispatcher>(jobs_, &result, [&](fbl::RefPtr<JobDispatcher> job) {
if (job->get_koid() == koid) {
found_job = ktl::move(job);
return ZX_ERR_STOP;
}
return ZX_OK;
});
}
return found_job; // Null if not found.
}
void JobDispatcher::get_name(char (&out_name)[ZX_MAX_NAME_LEN]) const {
canary_.Assert();
name_.get(ZX_MAX_NAME_LEN, out_name);
}
zx_status_t JobDispatcher::set_name(const char* name, size_t len) {
canary_.Assert();
return name_.set(name, len);
}
Exceptionate* JobDispatcher::exceptionate(Exceptionate::Type type) {
canary_.Assert();
return type == Exceptionate::Type::kDebug ? &debug_exceptionate_ : &exceptionate_;
}
void JobDispatcher::set_kill_on_oom(bool value) {
Guard<Mutex> guard{get_lock()};
kill_on_oom_ = value;
}
bool JobDispatcher::get_kill_on_oom() const {
Guard<Mutex> guard{AssertOrderedLock, get_lock(), LockOrder()};
return kill_on_oom_;
}
void JobDispatcher::GetInfo(zx_info_job_t* info) const {
canary_.Assert();
Guard<Mutex> guard{get_lock()};
info->return_code = return_code_;
info->exited = (state_ == State::DEAD) || (state_ == State::KILLING);
info->kill_on_oom = kill_on_oom_;
info->debugger_attached = debug_exceptionate_.HasValidChannel();
}
zx_status_t JobDispatcher::AccumulateRuntimeTo(zx_info_task_runtime_t* info) const {
canary_.Assert();
Guard<Mutex> guard{get_lock()};
aggregated_runtime_stats_.AccumulateRuntimeTo(info);
// At this point, the process in question may be in its destructor waiting to acquire the lock and
// remove itself from this job, but its aggregated runtime is not yet part of this job's data.
//
// AccumulateRuntimeTo must be safe to be called even when the process is in its destructor.
for (const auto& proc : procs_) {
zx_status_t err = proc.AccumulateRuntimeTo(info);
if (err != ZX_OK) {
return err;
}
}
return ZX_OK;
}