[crashanalyzer] Host crashanalyzer in svchost

Rather than having devmgr launch crashanalyzer directly, devmgr now
connects to crashanalyzer as a service hosted by svchost. This approach
will make it easier to host a crashpad-based crash analyzer in sysmgr in
the future.

Change-Id: I27afb9efb37593420c67daf4167f90627e657298
diff --git a/docs/kernel_cmdline.md b/docs/kernel_cmdline.md
index 641ec81..a823209 100644
--- a/docs/kernel_cmdline.md
+++ b/docs/kernel_cmdline.md
@@ -26,10 +26,10 @@
 ## crashsvc.analyzer=\<path\>
 
 If this option is set, the given analyzer will be used when crashsvc encounters
-an exception. If it is empty, the default (`/boot/bin/crashanalyzer`) will be
-used which logs exception information and a backtrace to the system log. The
-analyzer process is passed two startup handles: the process and thread that
-sustained the exception.
+an exception. If it is empty, the default crash analyzer will be used which
+logs exception information and a backtrace to the system log. The analyzer
+process is passed two startup handles: the process and thread that sustained
+the exception.
 
 ## devmgr\.epoch=\<seconds\>
 
diff --git a/system/core/crashanalyzer/crashanalyzer.cpp b/system/core/crashanalyzer/crashanalyzer.cpp
index bc96fcf..0f473c7 100644
--- a/system/core/crashanalyzer/crashanalyzer.cpp
+++ b/system/core/crashanalyzer/crashanalyzer.cpp
@@ -10,6 +10,13 @@
 #include <string.h>
 #include <threads.h>
 
+#include <fuchsia/crash/c/fidl.h>
+#include <inspector/inspector.h>
+#include <lib/async/cpp/wait.h>
+#include <lib/crashanalyzer/crashanalyzer.h>
+#include <lib/fdio/util.h>
+#include <lib/fidl/cpp/message_buffer.h>
+#include <pretty/hexdump.h>
 #include <zircon/assert.h>
 #include <zircon/crashlogger.h>
 #include <zircon/process.h>
@@ -19,9 +26,6 @@
 #include <zircon/syscalls/exception.h>
 #include <zircon/syscalls/port.h>
 #include <zircon/threads.h>
-#include <lib/fdio/util.h>
-#include <inspector/inspector.h>
-#include <pretty/hexdump.h>
 
 static int verbosity_level = 0;
 
@@ -115,16 +119,16 @@
 
 // How much memory to dump, in bytes.
 // Space for this is allocated on the stack, so this can't be too large.
-constexpr size_t kMemoryDumpSize = 256;
+static constexpr size_t kMemoryDumpSize = 256;
 
 // Handle of the thread we're dumping.
 // This is used by both the main thread and the self-dumper thread.
 // However there is no need to lock it as the self-dumper thread only runs
 // when the main thread has crashed.
-zx_handle_t crashed_thread = ZX_HANDLE_INVALID;
+static zx_handle_t crashed_thread = ZX_HANDLE_INVALID;
 
 // The exception that |crashed_thread| got.
-uint32_t crashed_thread_excp_type;
+static uint32_t crashed_thread_excp_type;
 
 #if defined(__aarch64__)
 static bool write_general_regs(zx_handle_t thread, void* buf, size_t buf_size) {
@@ -214,7 +218,7 @@
     return info.koid;
 }
 
-void process_report(zx_handle_t process, zx_handle_t thread, bool use_libunwind) {
+static void process_report(zx_handle_t process, zx_handle_t thread, bool use_libunwind) {
     zx_koid_t pid = get_koid(process);
     zx_koid_t tid = get_koid(thread);
 
@@ -334,12 +338,73 @@
     zx_handle_close(process);
 }
 
-int main(int argc, char** argv) {
-    // Whether to use libunwind or not.
-    // If not then we use a simple algorithm that assumes ABI-specific
-    // frame pointers are present.
-    bool use_libunwind = true;
+static zx_status_t handle_message(zx_handle_t channel, fidl::MessageBuffer* buffer) {
+    fidl::Message message = buffer->CreateEmptyMessage();
+    zx_status_t status = message.Read(channel, 0);
+    if (status != ZX_OK)
+        return status;
+    if (!message.has_header())
+        return ZX_ERR_INVALID_ARGS;
+    switch (message.ordinal()) {
+    case fuchsia_crash_AnalyzerAnalyzeOrdinal: {
+        const char* error_msg = nullptr;
+        zx_status_t status = message.Decode(&fuchsia_crash_AnalyzerAnalyzeRequestTable, &error_msg);
+        if (status != ZX_OK) {
+            fprintf(stderr, "crashanalyzer: error: %s\n", error_msg);
+            return status;
+        }
+        auto* request = message.GetBytesAs<fuchsia_crash_AnalyzerAnalyzeRequest>();
 
+        // Whether to use libunwind or not.
+        // If not then we use a simple algorithm that assumes ABI-specific
+        // frame pointers are present.
+        bool use_libunwind = true;
+
+        fuchsia_crash_AnalyzerAnalyzeResponse response;
+        memset(&response, 0, sizeof(response));
+        response.hdr.txid = request->hdr.txid;
+        response.hdr.ordinal = request->hdr.ordinal;
+        status = zx_channel_write(channel, 0, &response, sizeof(response), nullptr, 0);
+
+        process_report(request->process, request->thread, use_libunwind);
+
+        return status;
+    }
+    default:
+        fprintf(stderr, "crashanalyzer: error: Unknown message ordinal: %d\n", message.ordinal());
+        return ZX_ERR_NOT_SUPPORTED;
+    }
+}
+
+static void handle_ready(async_t* async,
+                         async::Wait* wait,
+                         zx_status_t status,
+                         const zx_packet_signal_t* signal) {
+    if (status != ZX_OK)
+        goto done;
+
+    if (signal->observed & ZX_CHANNEL_READABLE) {
+        fidl::MessageBuffer buffer;
+        for (uint64_t i = 0; i < signal->count; i++) {
+            status = handle_message(wait->object(), &buffer);
+            if (status == ZX_ERR_SHOULD_WAIT)
+                break;
+            if (status != ZX_OK)
+                goto done;
+        }
+        status = wait->Begin(async);
+        if (status != ZX_OK)
+            goto done;
+        return;
+    }
+
+    ZX_DEBUG_ASSERT(signal->observed & ZX_CHANNEL_PEER_CLOSED);
+done:
+    zx_handle_close(wait->object());
+    delete wait;
+}
+
+static zx_status_t init(void** out_ctx) {
     inspector_set_verbosity(verbosity_level);
 
     // At debugging level 1 print our dso list (in case we crash in a way
@@ -352,18 +417,48 @@
         inspector_dso_free_list(dso_list);
     }
 
-    zx_handle_t process = zx_get_startup_handle(PA_HND(PA_USER0, 0));
-    if (process == ZX_HANDLE_INVALID) {
-        fprintf(stderr, "error: no process in PA_USER0, 0\n");
-        return 1;
+    *out_ctx = nullptr;
+    return ZX_OK;
+}
+
+static zx_status_t connect(void* ctx, async_t* async, const char* service_name,
+                           zx_handle_t request) {
+    if (!strcmp(service_name, "fuchsia.crash.Analyzer")) {
+        auto wait = new async::Wait(request,
+                                    ZX_CHANNEL_READABLE | ZX_CHANNEL_PEER_CLOSED,
+                                    handle_ready);
+        zx_status_t status = wait->Begin(async);
+
+        if (status != ZX_OK) {
+            delete wait;
+            zx_handle_close(request);
+            return status;
+        }
+
+        return ZX_OK;
     }
 
-    zx_handle_t thread = zx_get_startup_handle(PA_HND(PA_USER0, 1));
-    if (thread == ZX_HANDLE_INVALID) {
-        fprintf(stderr, "error: no thread in PA_USER0, 1\n");
-        return 1;
-    }
+    zx_handle_close(request);
+    return ZX_ERR_NOT_SUPPORTED;
+}
 
-    process_report(process, thread, use_libunwind);
-    return 0;
+static constexpr const char* crashanalyzer_services[] = {
+    "fuchsia.crash.Analyzer",
+    nullptr,
+};
+
+static constexpr zx_service_ops_t crashanalyzer_ops = {
+    .init = init,
+    .connect = connect,
+    .release = nullptr,
+};
+
+static constexpr zx_service_provider_t crashanalyzer_service_provider = {
+    .version = SERVICE_PROVIDER_VERSION,
+    .services = crashanalyzer_services,
+    .ops = &crashanalyzer_ops,
+};
+
+const zx_service_provider_t* crashanalyzer_get_service_provider() {
+    return &crashanalyzer_service_provider;
 }
diff --git a/system/core/crashanalyzer/include/lib/crashanalyzer/crashanalyzer.h b/system/core/crashanalyzer/include/lib/crashanalyzer/crashanalyzer.h
new file mode 100644
index 0000000..6fd1a07
--- /dev/null
+++ b/system/core/crashanalyzer/include/lib/crashanalyzer/crashanalyzer.h
@@ -0,0 +1,14 @@
+// Copyright 2018 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#pragma once
+
+#include <lib/svc/service.h>
+#include <zircon/compiler.h>
+
+__BEGIN_CDECLS
+
+const zx_service_provider_t* crashanalyzer_get_service_provider(void);
+
+__END_CDECLS
diff --git a/system/core/crashanalyzer/rules.mk b/system/core/crashanalyzer/rules.mk
index 60a9f3b..39825a5 100644
--- a/system/core/crashanalyzer/rules.mk
+++ b/system/core/crashanalyzer/rules.mk
@@ -6,20 +6,26 @@
 
 MODULE := $(LOCAL_DIR)
 
-MODULE_TYPE := userapp
-MODULE_GROUP := core
+MODULE_TYPE := userlib
 
 MODULE_SRCS += \
     $(LOCAL_DIR)/crashanalyzer.cpp
 
-MODULE_NAME := crashanalyzer
+MODULE_FIDL_LIBS := \
+    system/fidl/crash
+
+MODULE_HEADER_DEPS := \
+    system/ulib/svc \
 
 MODULE_STATIC_LIBS := \
     system/ulib/inspector \
-    system/ulib/zxcpp \
+    system/ulib/async \
+    system/ulib/async.cpp \
     system/ulib/fbl \
+    system/ulib/fidl \
     system/ulib/pretty \
-    system/ulib/runtime
+    system/ulib/runtime \
+    system/ulib/zxcpp
 
 MODULE_LIBS := \
     third_party/ulib/backtrace \
diff --git a/system/core/devmgr/devmgr.c b/system/core/devmgr/devmgr.c
index a7495d7..b982580 100644
--- a/system/core/devmgr/devmgr.c
+++ b/system/core/devmgr/devmgr.c
@@ -11,12 +11,14 @@
 #include <threads.h>
 #include <unistd.h>
 
+#include <fuchsia/crash/c/fidl.h>
 #include <launchpad/launchpad.h>
 #include <loader-service/loader-service.h>
 #include <zircon/boot/bootdata.h>
 #include <zircon/dlfcn.h>
 #include <zircon/process.h>
 #include <zircon/processargs.h>
+#include <zircon/status.h>
 #include <zircon/syscalls.h>
 #include <zircon/syscalls/exception.h>
 #include <zircon/syscalls/object.h>
@@ -215,17 +217,15 @@
             printf("devmgr: analyzer_starter: thread handle duplicate failed: %d\n", status);
             zx_handle_close(handles[0]);
             zx_handle_close(handles[1]);
+            // Shouldn't we resume handles[1] in this case?
             continue;
         }
 
-        const char* analyzer_command = getenv("crashsvc.analyzer");
-        static const char default_analyzer[] = "/boot/bin/crashanalyzer";
-        if (analyzer_command == NULL) {
-            analyzer_command = default_analyzer;
-        }
+        printf("devmgr: analyzer_starter: analyzing exception type 0x%x\n", exception_type);
 
-        for (;;) {
-            printf("devmgr: analyzer_starter: launching for exception type 0x%x\n", exception_type);
+        const char* analyzer_command = getenv("crashsvc.analyzer");
+        if (analyzer_command) {
+            // If we have an analyzer_command, attempt that first.
             const char* argv_crashanalyzer[] = {analyzer_command};
             uint32_t handle_types[] = {PA_HND(PA_USER0, 0), PA_HND(PA_USER0, 1)};
             // The FS_* flags that grant access should be reduced to a minimal
@@ -235,25 +235,67 @@
                                    NULL, -1, handles, handle_types, countof(handles),
                                    NULL, FS_SVC | FS_DATA);
             if (status == ZX_OK) {
-                break;
+                zx_handle_close(thread_handle);
+                continue;
             }
 
-            printf("devmgr: analyzer_starter: launch failed: %d\n", status);
-            if (strcmp(analyzer_command, default_analyzer) == 0) {
-                // The analyzer to be launched was already our fallback one,
-                // and it still failed. Terminate and bail.
-                status = zx_task_resume(thread_handle, ZX_RESUME_EXCEPTION | ZX_RESUME_TRY_NEXT);
-                if (status != ZX_OK) {
-                    printf("devmgr: analyzer_starter: zx_task_resume: %d\n", status);
-                }
-                break;
-            } else {
-                // The configured analyzer failed to launch, try the default
-                // crashanalyzer as a fallback.
-                analyzer_command = default_analyzer;
-            }
+            printf("devmgr: analyzer_starter: launch failed: %d (%s)\n",
+                   status, zx_status_get_string(status));
+
+            // Fall through to fuchsia.crash.Analyzer.
         }
 
+        zx_handle_t analyzer_request = ZX_HANDLE_INVALID;
+        zx_handle_t analyzer = ZX_HANDLE_INVALID;
+        status = zx_channel_create(0, &analyzer_request, &analyzer);
+        if (status != ZX_OK)
+            goto cleanup;
+        status = fdio_service_connect_at(svchost_outgoing, "public/fuchsia.crash.Analyzer", analyzer_request);
+        analyzer_request = ZX_HANDLE_INVALID;
+        if (status != ZX_OK)
+            goto cleanup;
+        fuchsia_crash_AnalyzerAnalyzeRequest request;
+        fuchsia_crash_AnalyzerAnalyzeResponse response;
+        memset(&request, 0, sizeof(request));
+        memset(&response, 0, sizeof(response));
+        request.hdr.ordinal = fuchsia_crash_AnalyzerAnalyzeOrdinal;
+        request.process = FIDL_HANDLE_PRESENT;
+        request.thread = FIDL_HANDLE_PRESENT;
+        {
+            zx_channel_call_args_t args = {
+                .wr_bytes = &request,
+                .wr_handles = handles,
+                .rd_bytes = &response,
+                .rd_handles = NULL,
+                .wr_num_bytes = sizeof(request),
+                .wr_num_handles = countof(handles),
+                .rd_num_bytes = sizeof(response),
+                .rd_num_handles = 0u,
+            };
+            uint32_t actual_bytes = 0u;
+            uint32_t actual_handles = 0u;
+            status = zx_channel_call(analyzer, 0, ZX_TIME_INFINITE, &args,
+                                    &actual_bytes, &actual_handles, NULL);
+        }
+        // zx_channel_call consumes the handles with these two result codes.
+        if (status == ZX_OK || status == ZX_ERR_CALL_FAILED)
+            memset(handles, 0, sizeof(handles));
+cleanup:
+        if (analyzer)
+            zx_handle_close(analyzer);
+        if (handles[0])
+            zx_handle_close(handles[0]);
+        if (handles[1])
+            zx_handle_close(handles[1]);
+        if (status != ZX_OK) {
+            printf("devmgr: analyzer_starter: failed to analyze crash: %d (%s)\n",
+                    status, zx_status_get_string(status));
+            status = zx_task_resume(thread_handle, ZX_RESUME_EXCEPTION | ZX_RESUME_TRY_NEXT);
+            if (status != ZX_OK) {
+                printf("devmgr: analyzer_starter: zx_task_resume: %d (%s)\n",
+                        status, zx_status_get_string(status));
+            }
+        }
         zx_handle_close(thread_handle);
     }
 }
diff --git a/system/core/devmgr/rules.mk b/system/core/devmgr/rules.mk
index ade47a4..ee643c4 100644
--- a/system/core/devmgr/rules.mk
+++ b/system/core/devmgr/rules.mk
@@ -26,6 +26,10 @@
 # can be linked dynamically.  But it doesn't support any means to look
 # up other shared libraries, so everything else must be linked statically.
 
+# We can avoid this dependency if crashsvc connects directly to the analyzer.
+MODULE_FIDL_LIBS := \
+    system/fidl/crash
+
 # ddk is needed only for ddk/device.h
 MODULE_HEADER_DEPS := \
     system/ulib/ddk
diff --git a/system/core/svchost/rules.mk b/system/core/svchost/rules.mk
index 55d51d6..bac1cdf 100644
--- a/system/core/svchost/rules.mk
+++ b/system/core/svchost/rules.mk
@@ -12,10 +12,15 @@
 MODULE_SRCS += \
     $(LOCAL_DIR)/svchost.cpp \
 
+MODULE_FIDL_LIBS := \
+    system/fidl/crash
+
 MODULE_STATIC_LIBS := \
+    system/ulib/inspector \
     system/ulib/svc \
     system/ulib/process-launcher \
     system/ulib/fs \
+    system/core/crashanalyzer \
     system/ulib/async \
     system/ulib/async.cpp \
     system/ulib/async-loop.cpp \
@@ -23,10 +28,13 @@
     system/ulib/trace \
     system/ulib/fbl \
     system/ulib/fidl \
+    system/ulib/pretty \
     system/ulib/zxcpp \
     system/ulib/zx
 
 MODULE_LIBS := \
+    third_party/ulib/backtrace \
+    third_party/ulib/ngunwind \
     system/ulib/async.default \
     system/ulib/launchpad \
     system/ulib/fdio \
diff --git a/system/core/svchost/svchost.cpp b/system/core/svchost/svchost.cpp
index d94aa2b..76b6667 100644
--- a/system/core/svchost/svchost.cpp
+++ b/system/core/svchost/svchost.cpp
@@ -2,7 +2,9 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+#include <fbl/algorithm.h>
 #include <lib/async-loop/cpp/loop.h>
+#include <lib/crashanalyzer/crashanalyzer.h>
 #include <lib/fdio/util.h>
 #include <lib/process-launcher/launcher.h>
 #include <lib/svc/outgoing.h>
@@ -146,16 +148,18 @@
         return 1;
     }
 
-    zx_service_provider_instance_t launcher = {
-        .provider = launcher_get_service_provider(),
-        .ctx = nullptr,
+    zx_service_provider_instance_t service_providers[] = {
+        {.provider = launcher_get_service_provider(), .ctx = nullptr},
+        {.provider = crashanalyzer_get_service_provider(), .ctx = nullptr},
     };
 
-    status = provider_load(&launcher, async, outgoing.public_dir());
-    if (status != ZX_OK) {
-        fprintf(stderr, "svchost: error: Failed to load launcher service: %d (%s).\n",
-                status, zx_status_get_string(status));
-        return 1;
+    for (size_t i = 0; i < fbl::count_of(service_providers); ++i) {
+        status = provider_load(&service_providers[i], async, outgoing.public_dir());
+        if (status != ZX_OK) {
+            fprintf(stderr, "svchost: error: Failed to load service provider %zu: %d (%s).\n",
+                    i, status, zx_status_get_string(status));
+            return 1;
+        }
     }
 
     status = publish_tracelink(outgoing.public_dir());
@@ -168,6 +172,10 @@
     publish_deprecated_services(outgoing.public_dir());
 
     status = loop.Run();
-    provider_release(&launcher);
+
+    for (size_t i = 0; i < fbl::count_of(service_providers); ++i) {
+        provider_release(&service_providers[i]);
+    }
+
     return status;
 }
diff --git a/system/fidl/crash/crash.fidl b/system/fidl/crash/crash.fidl
index 21753ea..9db1eb9 100644
--- a/system/fidl/crash/crash.fidl
+++ b/system/fidl/crash/crash.fidl
@@ -13,5 +13,5 @@
     // The behavior of this method varies depending on the implementation, but
     // a typical implementation might print a crash dump to the system log or
     // upload a crash report to a server.
-    1: Analyze(handle<process> process, handle<thread> thread);
+    1: Analyze(handle<process> process, handle<thread> thread) -> ();
 };
diff --git a/system/ulib/fidl/include/lib/fidl/cpp/message.h b/system/ulib/fidl/include/lib/fidl/cpp/message.h
index dfe140e..5e4c095 100644
--- a/system/ulib/fidl/include/lib/fidl/cpp/message.h
+++ b/system/ulib/fidl/include/lib/fidl/cpp/message.h
@@ -75,6 +75,12 @@
         return BytePart(bytes_.data() + n, bytes_.capacity() - n, bytes_.actual() - n);
     }
 
+    // The message bytes interpreted as the given type.
+    template <typename T>
+    T* GetBytesAs() const {
+        return reinterpret_cast<T*>(bytes_.data());
+    }
+
     // The message payload that follows the header interpreted as the given type.
     //
     // Valid only if has_header().