[kernel][thread] Consistent lock acquisition order in thread creation

This resolves a lock ordering violation where the ThreadDispatcher acquires its
lock before calling into the ProcessDispatcher. It is a ordering violation as
the ProcessDispatcher already has a code path, in KillAllThreadsLocked, where it
calls into the ThreadDispatcher.

The change here is to invert the logic of ThreadDispatcher::Start such that the
ProcessDispatcher, and not the ThreadDispatcher, is responsible for driving the
operation and acquiring the first lock. This inversion works as both the
ProcessDispatcher and ThreadDispatcher will 'not fail' after performing their
initial checks. As such although the checks now happen in a different order the
state updates still happen under the same total conditions, and are done with
their respective locks held and so are still atomic from the point of view of
any observers.

Without this change building and running with enable_lock_dep=true results in
a consistent lock order violation during early user startup.

Test: Kerenl unit tests and e2e tests

Change-Id: Ida54022e00bd42fb060a44adad3739de29930f5c
diff --git a/zircon/kernel/object/include/object/process_dispatcher.h b/zircon/kernel/object/include/object/process_dispatcher.h
index d9d9c90..a7fdc67 100644
--- a/zircon/kernel/object/include/object/process_dispatcher.h
+++ b/zircon/kernel/object/include/object/process_dispatcher.h
@@ -328,11 +328,13 @@
                                       const arch_exception_context_t* context);
 
     // Thread lifecycle support.
-    //
-    // |suspended| indicates whether the parent process is currently suspended or not. If true,
-    // the child thread must increment its own suspend count by one and transition to suspend.
     friend class ThreadDispatcher;
-    zx_status_t AddThread(ThreadDispatcher* t, bool initial_thread, bool* suspended);
+    // Takes the given ThreadDispatcher and transitions it from the INITIALIZED state to a runnable
+    // state (RUNNING or SUSPENDED depending on whether this process is suspended) by calling
+    // ThreadDispatcher::MakeRunnable. The thread is then added to the thread_list_ for this process
+    // and we transition to running if this is the initial_thread.
+    zx_status_t AddInitializedThread(ThreadDispatcher* t, bool initial_thread,
+                                     const ThreadDispatcher::EntryState& entry);
     void RemoveThread(ThreadDispatcher* t);
 
     void SetStateLocked(State) TA_REQ(get_lock());
diff --git a/zircon/kernel/object/include/object/thread_dispatcher.h b/zircon/kernel/object/include/object/thread_dispatcher.h
index eb93bc9..81d7edc 100644
--- a/zircon/kernel/object/include/object/thread_dispatcher.h
+++ b/zircon/kernel/object/include/object/thread_dispatcher.h
@@ -96,7 +96,13 @@
     // Performs initialization on a newly constructed ThreadDispatcher
     // If this fails, then the object is invalid and should be deleted
     zx_status_t Initialize(const char* name, size_t len);
+    // Start this thread running inside the parent process with the provided entry state, only
+    // valid to be called on a thread in the INITIALIZED state that has not yet been started.
     zx_status_t Start(const EntryState& entry, bool initial_thread);
+    // Transitions a thread from the INITIALIZED state to either the RUNNING or SUSPENDED state.
+    // Is the caller's responsibility to ensure this thread is registered with the parent process,
+    // as such this is only expected to be called from the ProcessDispatcher.
+    zx_status_t MakeRunnable(const EntryState& entry, bool suspended);
     void Exit() __NO_RETURN;
     void Kill();
 
diff --git a/zircon/kernel/object/process_dispatcher.cpp b/zircon/kernel/object/process_dispatcher.cpp
index ee058d0..cc3fb41 100644
--- a/zircon/kernel/object/process_dispatcher.cpp
+++ b/zircon/kernel/object/process_dispatcher.cpp
@@ -318,9 +318,8 @@
     }
 }
 
-zx_status_t ProcessDispatcher::AddThread(ThreadDispatcher* t,
-                                         bool initial_thread,
-                                         bool* suspended) {
+zx_status_t ProcessDispatcher::AddInitializedThread(ThreadDispatcher* t, bool initial_thread,
+                                                    const ThreadDispatcher::EntryState& entry) {
     LTRACE_ENTRY_OBJ;
 
     Guard<fbl::Mutex> guard{get_lock()};
@@ -335,15 +334,21 @@
             return ZX_ERR_BAD_STATE;
     }
 
+    // Now that we know our state is okay we can attempt to start the thread running. This is okay
+    // since as long as the thread doesn't refuse to start running then we cannot fail from here
+    // and so we will update our thread_list_ and state before we drop the lock, making this
+    // whole process atomic to any observers.
+    zx_status_t result = t->MakeRunnable(entry, suspend_count_ > 0);
+    if (result != ZX_OK) {
+        return result;
+    }
+
     // add the thread to our list
     DEBUG_ASSERT(thread_list_.is_empty() == initial_thread);
     thread_list_.push_back(t);
 
     DEBUG_ASSERT(t->process() == this);
 
-    // If we're suspended, start this thread in suspended state as well.
-    *suspended = (suspend_count_ > 0);
-
     if (initial_thread)
         SetStateLocked(State::RUNNING);
 
diff --git a/zircon/kernel/object/thread_dispatcher.cpp b/zircon/kernel/object/thread_dispatcher.cpp
index 9c7a327..280bc0f 100644
--- a/zircon/kernel/object/thread_dispatcher.cpp
+++ b/zircon/kernel/object/thread_dispatcher.cpp
@@ -189,6 +189,12 @@
 
     is_initial_thread_ = initial_thread;
 
+    // add ourselves to the process, which may fail if the process is in a dead state.
+    // If the process is live then it will call our StartRunning routine.
+    return process_->AddInitializedThread(this, initial_thread, entry);
+}
+
+zx_status_t ThreadDispatcher::MakeRunnable(const EntryState& entry, bool suspended) {
     Guard<fbl::Mutex> guard{get_lock()};
 
     if (state_.lifecycle() != ThreadState::Lifecycle::INITIALIZED)
@@ -197,12 +203,6 @@
     // save the user space entry state
     user_entry_ = entry;
 
-    // add ourselves to the process, which may fail if the process is in a dead state
-    bool suspended;
-    auto ret = process_->AddThread(this, initial_thread, &suspended);
-    if (ret < 0)
-        return ret;
-
     // update our suspend count to account for our parent state
     if (suspended)
         suspend_count_++;