Merge "bazel: allow overriding filegroup and genrule"
diff --git a/Android.bp b/Android.bp
index e104f17..1908394 100644
--- a/Android.bp
+++ b/Android.bp
@@ -3652,6 +3652,7 @@
     name: "perfetto_protos_perfetto_metrics_chrome_descriptor",
     srcs: [
         "protos/perfetto/metrics/android/batt_metric.proto",
+        "protos/perfetto/metrics/android/camera_metric.proto",
         "protos/perfetto/metrics/android/cpu_metric.proto",
         "protos/perfetto/metrics/android/display_metrics.proto",
         "protos/perfetto/metrics/android/dma_heap_metric.proto",
@@ -3673,6 +3674,7 @@
         "protos/perfetto/metrics/android/powrails_metric.proto",
         "protos/perfetto/metrics/android/process_metadata.proto",
         "protos/perfetto/metrics/android/profiler_smaps.proto",
+        "protos/perfetto/metrics/android/simpleperf.proto",
         "protos/perfetto/metrics/android/startup_metric.proto",
         "protos/perfetto/metrics/android/surfaceflinger.proto",
         "protos/perfetto/metrics/android/sysui_cuj_metrics.proto",
@@ -3705,6 +3707,7 @@
     name: "perfetto_protos_perfetto_metrics_descriptor",
     srcs: [
         "protos/perfetto/metrics/android/batt_metric.proto",
+        "protos/perfetto/metrics/android/camera_metric.proto",
         "protos/perfetto/metrics/android/cpu_metric.proto",
         "protos/perfetto/metrics/android/display_metrics.proto",
         "protos/perfetto/metrics/android/dma_heap_metric.proto",
@@ -3726,6 +3729,7 @@
         "protos/perfetto/metrics/android/powrails_metric.proto",
         "protos/perfetto/metrics/android/process_metadata.proto",
         "protos/perfetto/metrics/android/profiler_smaps.proto",
+        "protos/perfetto/metrics/android/simpleperf.proto",
         "protos/perfetto/metrics/android/startup_metric.proto",
         "protos/perfetto/metrics/android/surfaceflinger.proto",
         "protos/perfetto/metrics/android/sysui_cuj_metrics.proto",
@@ -6836,6 +6840,7 @@
 filegroup {
     name: "perfetto_src_base_base",
     srcs: [
+        "src/base/crash_keys.cc",
         "src/base/ctrl_c_handler.cc",
         "src/base/event_fd.cc",
         "src/base/file_utils.cc",
@@ -7986,6 +7991,7 @@
     name: "perfetto_src_trace_processor_metrics_gen_merged_sql_metrics",
     srcs: [
         "src/trace_processor/metrics/android/android_batt.sql",
+        "src/trace_processor/metrics/android/android_camera.sql",
         "src/trace_processor/metrics/android/android_cpu.sql",
         "src/trace_processor/metrics/android/android_cpu_agg.sql",
         "src/trace_processor/metrics/android/android_cpu_raw_metrics_per_core.sql",
@@ -8006,6 +8012,7 @@
         "src/trace_processor/metrics/android/android_package_list.sql",
         "src/trace_processor/metrics/android/android_powrails.sql",
         "src/trace_processor/metrics/android/android_proxy_power.sql",
+        "src/trace_processor/metrics/android/android_simpleperf.sql",
         "src/trace_processor/metrics/android/android_startup.sql",
         "src/trace_processor/metrics/android/android_startup_launches.sql",
         "src/trace_processor/metrics/android/android_surfaceflinger.sql",
diff --git a/BUILD b/BUILD
index 4a9b088..a68cc52 100644
--- a/BUILD
+++ b/BUILD
@@ -336,6 +336,7 @@
     srcs = [
         "include/perfetto/ext/base/circular_queue.h",
         "include/perfetto/ext/base/container_annotations.h",
+        "include/perfetto/ext/base/crash_keys.h",
         "include/perfetto/ext/base/ctrl_c_handler.h",
         "include/perfetto/ext/base/endian.h",
         "include/perfetto/ext/base/event_fd.h",
@@ -586,6 +587,7 @@
         "include/perfetto/tracing/tracing_policy.h",
         "include/perfetto/tracing/track.h",
         "include/perfetto/tracing/track_event.h",
+        "include/perfetto/tracing/track_event_args.h",
         "include/perfetto/tracing/track_event_category_registry.h",
         "include/perfetto/tracing/track_event_interned_data_index.h",
         "include/perfetto/tracing/track_event_legacy.h",
@@ -636,10 +638,12 @@
 perfetto_cc_library(
     name = "src_base_base",
     srcs = [
+        "src/base/crash_keys.cc",
         "src/base/ctrl_c_handler.cc",
         "src/base/event_fd.cc",
         "src/base/file_utils.cc",
         "src/base/getopt_compat.cc",
+        "src/base/log_ring_buffer.h",
         "src/base/logging.cc",
         "src/base/metatrace.cc",
         "src/base/paged_memory.cc",
@@ -1013,6 +1017,7 @@
     name = "src_trace_processor_metrics_gen_merged_sql_metrics",
     srcs = [
         "src/trace_processor/metrics/android/android_batt.sql",
+        "src/trace_processor/metrics/android/android_camera.sql",
         "src/trace_processor/metrics/android/android_cpu.sql",
         "src/trace_processor/metrics/android/android_cpu_agg.sql",
         "src/trace_processor/metrics/android/android_cpu_raw_metrics_per_core.sql",
@@ -1033,6 +1038,7 @@
         "src/trace_processor/metrics/android/android_package_list.sql",
         "src/trace_processor/metrics/android/android_powrails.sql",
         "src/trace_processor/metrics/android/android_proxy_power.sql",
+        "src/trace_processor/metrics/android/android_simpleperf.sql",
         "src/trace_processor/metrics/android/android_startup.sql",
         "src/trace_processor/metrics/android/android_startup_launches.sql",
         "src/trace_processor/metrics/android/android_surfaceflinger.sql",
@@ -2547,6 +2553,7 @@
     name = "protos_perfetto_metrics_android_protos",
     srcs = [
         "protos/perfetto/metrics/android/batt_metric.proto",
+        "protos/perfetto/metrics/android/camera_metric.proto",
         "protos/perfetto/metrics/android/cpu_metric.proto",
         "protos/perfetto/metrics/android/display_metrics.proto",
         "protos/perfetto/metrics/android/dma_heap_metric.proto",
@@ -2568,6 +2575,7 @@
         "protos/perfetto/metrics/android/powrails_metric.proto",
         "protos/perfetto/metrics/android/process_metadata.proto",
         "protos/perfetto/metrics/android/profiler_smaps.proto",
+        "protos/perfetto/metrics/android/simpleperf.proto",
         "protos/perfetto/metrics/android/startup_metric.proto",
         "protos/perfetto/metrics/android/surfaceflinger.proto",
         "protos/perfetto/metrics/android/sysui_cuj_metrics.proto",
diff --git a/CHANGELOG b/CHANGELOG
index e63367e..e71d51c 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,24 +1,50 @@
 Unreleased:
   Tracing service and probes:
+    * Removed advertisement of the built-in data sources "android.heapprofd",
+      "android.java_hprof", "linux.perf" on Linux and other platforms. Built-in
+      data sources are only used to lazy-start daemons on Android OS.
+  Trace Processor:
+    * Changed protobuf decoders to use less stack and fall back on heap sooner
+      (before: 16KB of stack per decoder, after: 1.6KB). It caused problems with
+      some embedders which use smaller per-thread stacks.
+  UI:
+    *
+  SDK:
+    *
+
+
+v20.1 - 2021-10-05:
+  Tracing service and probes:
+    * Fixed standalone Windows build. Updated the llvm-win toolchain.
+
+
+v20.0 - 2021-10-03:
+  Tracing service and probes:
     * Removed DCHECK that would cause crashes when a debug build of the service
       is used with a producer built with -DNDEBUG.
     * Changed the service-side field-level filtering configuration protobuf
       field number, because the feature had a bug. This is effectively
       equivalent to deprecating the feature and reintroducing it under a
       different name.
+    * Added support for boot tracing on Android. Early kernel tracing (prior to
+      the point when /data is mounted) is not yet supported. For instructions
+      see /docs/case-studies/android-boot-tracing.md .
   Trace Processor:
     * Added reqiurement of separating queries by semi-colon (;) followed by
       new-line when specifying a query file with -q to trace processor shell.
     * Added "ancestor_slice_by_stack" and "descendant_slice_by_stack" table
       functions to walk up and down the slice stacks.
     * Overhauled windowed sorting to be based on packet ordering and
-      lifecycle events inside the trace instead of time-based ordering. 
-    * Deprecated |SortingMode::kForceFlushPeriodWindowedSort| due to
-      windowed sorting chagnes. Embedders should switch to
-      |SortingMode::kDefaultHeuristics|; this option will be removed
-      in v21.
+      lifecycle events inside the trace instead of time-based ordering.
+    * Removed |SortingMode::kForceFlushPeriodWindowedSort| due to changes to the
+      sorting algorithm, which is now based on Flush events rather than time.
+      Embedders should switch to |SortingMode::kDefaultHeuristics|. Other
+      SortingMode enum values are no-ops and will be removed in future versions.
   UI:
-    *
+    * Added initial flamegraph support for traced_perf callstack samples.
+    * Added initial Pivot table for aggregation of userspace slices. The feature
+      is disabled by default for and requires manual enabling via
+      https://ui.perfetto.dev/#!/flags .
   SDK:
     * Changed DCHECK and DLOGs to be always disabled in SDK builds, regardless
       of NDEBUG.
diff --git a/PRESUBMIT.py b/PRESUBMIT.py
index 9f5fa6f..6be8335 100644
--- a/PRESUBMIT.py
+++ b/PRESUBMIT.py
@@ -157,6 +157,12 @@
        'std::stod throws exceptions prefer base::StringToDouble()'),
       (r'\bstd::stold\b',
        'std::stold throws exceptions prefer base::StringToDouble()'),
+      (r'\bstrncpy\b',
+       'strncpy does not null-terminate if src > dst. Use base::StringCopy'),
+      (r'[(=]\s*snprintf\(',
+       'snprintf can return > dst_size. Use base::SprintfTrunc'),
+      (r'//.*\bDNS\b',
+       '// DNS (Do Not Ship) found. Did you mean to remove some testing code?'),
       (r'\bPERFETTO_EINTR\(close\(',
        'close(2) must not be retried on EINTR on Linux and other OSes '
        'that we run on, as the fd will be closed.'),
@@ -170,6 +176,8 @@
   errors = []
   for f in input_api.AffectedSourceFiles(file_filter):
     for line_number, line in f.ChangedContents():
+      if input_api.re.search(r'^\s*//', line):
+        continue  # Skip comments
       for regex, message in bad_cpp:
         if input_api.re.search(regex, line):
           errors.append(
diff --git a/docs/analysis/trace-processor.md b/docs/analysis/trace-processor.md
index a7df0c8..361e51a 100644
--- a/docs/analysis/trace-processor.md
+++ b/docs/analysis/trace-processor.md
@@ -184,7 +184,7 @@
 ```sql
 SELECT upid
 FROM counter
-JOIN process_counter_track ON process_counter_track.id = slice.track_id
+JOIN process_counter_track ON process_counter_track.id = counter.track_id
 WHERE process_counter_track.name = 'mem.swap' AND value > 1000
 ```
 
@@ -330,6 +330,24 @@
 reasons, span join does not attempt to detect and error out in this situation;
 instead, incorrect rows will silently be produced.
 
+Left and outer span joins are also supported; both function analogously to
+the left and outer joins from SQL.
+```sql
+-- Left table partitioned + right table unpartitioned.
+CREATE VIRTUAL TABLE left_join
+USING SPAN_LEFT_JOIN(table_a PARTITIONED a, table_b);
+
+-- Both tables unpartitioned.
+CREATE VIRTUAL TABLE outer_join
+USING SPAN_OUTER_JOIN(table_x, table_y);
+```
+
+NOTE: there is a subtlety if the partitioned table is empty and is
+either a) part of an outer join b) on the right side of a left join.
+In this case, *no* slices will be emitted even if the other table is
+non-empty. This approach was decided as being the most natural
+after considering how span joins are used in practice.
+
 ### Ancestor slice
 ancestor_slice is a custom operator table that takes a
 [slice table's id column](/docs/analysis/sql-tables.autogen#slice) and computes
diff --git a/docs/case-studies/android-boot-tracing.md b/docs/case-studies/android-boot-tracing.md
new file mode 100644
index 0000000..3abc76a
--- /dev/null
+++ b/docs/case-studies/android-boot-tracing.md
@@ -0,0 +1,79 @@
+# Recording traces on Android boot
+
+Since Android 13 (T), perfetto can be configured to automatically start
+recording traces on boot. This can be useful to profile the boot process.
+
+## Steps
+
+* Create a file with the desired [trace configuration](/docs/concepts/config.md)
+  in Text format (not binary). Example (more in [/test/configs/](/test/configs/)):
+  ```
+  # One buffer allocated within the central tracing binary for the entire trace,
+  # shared by the two data sources below.
+  buffers {
+    size_kb: 32768
+    fill_policy: DISCARD
+  }
+
+  # Ftrace data from the kernel, mainly the process scheduling events.
+  data_sources {
+    config {
+      name: "linux.ftrace"
+      target_buffer: 0
+      ftrace_config {
+        ftrace_events: "sched_switch"
+        ftrace_events: "sched_waking"
+        ftrace_events: "sched_wakeup_new"
+
+        ftrace_events: "task_newtask"
+        ftrace_events: "task_rename"
+
+        ftrace_events: "sched_process_exec"
+        ftrace_events: "sched_process_exit"
+        ftrace_events: "sched_process_fork"
+        ftrace_events: "sched_process_free"
+        ftrace_events: "sched_process_hang"
+        ftrace_events: "sched_process_wait"
+      }
+    }
+  }
+
+  # Resolve process commandlines and parent/child relationships, to better
+  # interpret the ftrace events, which are in terms of pids.
+  data_sources {
+    config {
+      name: "linux.process_stats"
+      target_buffer: 0
+    }
+  }
+
+  # 10s trace, but can be stopped prematurely via `adb shell pkill -u perfetto`.
+  duration_ms: 10000
+  ```
+* Put the file on the device at `/data/misc/perfetto-configs/boottrace.pbtxt`:
+  ```
+  adb push <yourfile> /data/misc/perfetto-configs/boottrace.pbtxt
+  ```
+* Enable the `perfetto_trace_on_boot` service:
+  ```
+  adb shell setprop persist.debug.perfetto.boottrace 1
+  ```
+  The property is reset on boot. In order to trace the next boot, the command
+  must be reissued.
+* Reboot the device.
+* The output trace will be written at
+  `/data/misc/perfetto-traces/boottrace.perfetto-trace`. The file will be
+  removed before a new trace is started.
+  ```
+  adb pull /data/misc/perfetto-traces/boottrace.perfetto-trace
+  ```
+  **N.B.:** The file will appear after the recording has stopped (be sure to set
+  `duration_ms` to a reasonable value in the config) or after the first
+  `flush_period_ms`.
+* `boottrace.perfetto-trace` can now be opened in
+  [ui.perfetto.dev](https://ui.perfetto.dev/)
+
+## Implementation details
+* The trace will start only after persistent properties are loaded, which
+  happens after /data has been mounted.
+* The command to start the trace is implemented as oneshot init service.
diff --git a/docs/contributing/sdk-releasing.md b/docs/contributing/sdk-releasing.md
index 9679382..ec69e5e 100644
--- a/docs/contributing/sdk-releasing.md
+++ b/docs/contributing/sdk-releasing.md
@@ -126,6 +126,31 @@
    - [docs/instrumentation/tracing-sdk.md](/docs/instrumentation/tracing-sdk.md)
    - [examples/sdk/README.md](/examples/sdk/README.md)
 
-6. Send an email with the CHANGELOG to perfetto-dev@ (internal) and perfetto-dev@googlegroups.com.
+6. Send an email with the CHANGELOG to perfetto-dev@ (internal) and to the
+   [public perfetto-dev](https://groups.google.com/forum/#!forum/perfetto-dev).
 
-Phew, you're done!
+## Creating a GitHub release with prebuilts
+
+7. Within few mins the LUCI scheduler will trigger builds of prebuilt binaries
+   on https://luci-scheduler.appspot.com/jobs/perfetto . Wait for all the bots
+   to have completed succesfully and be back into the WAITING state.
+
+8. Run `tools/package-prebuilts-for-github-release vX.Y`. It will pull the
+   prebuilts under `/tmp/perfetto-prebuilts-vX.Y`.
+  - There must be 9 zips in total: linux-{arm,arm64,amd64},
+    android-{arm,arm64,x86,x64}, mac-amd64, win-amd64.
+  - If one or more are missing it means that one of the LUCI bots failed,
+    check the logs (follow the "Task URL: " link) from the invocation log.
+  - If this happens you'll need to respin a vX.(Y+1) release with the fix
+    (look at the history v20.1, where a Windows failure required a respin).
+
+9. Open https://github.com/google/perfetto/releases/new and
+  - Select "Choose Tag" -> vX.Y
+  - "Release title" -> "Perfetto vX.Y"
+  - "Describe release" -> Copy the CHANGELOG, wrapping it in triple backticks.
+  - "Attach binaries" -> Attache the nine .zip files from the previous step.
+
+10. Run `tools/roll-prebuilts vX.Y`. It will update the SHA256 into the various
+   scripts under `tools/`. Upload a CL with the changes.
+
+11. Phew, you're done!
diff --git a/docs/data-sources/atrace.md b/docs/data-sources/atrace.md
index 51e50f1..a54bd1a 100644
--- a/docs/data-sources/atrace.md
+++ b/docs/data-sources/atrace.md
@@ -42,7 +42,7 @@
 write the marker into `/sys/kernel/debug/tracing/trace_marker` (which is the
 most expensive part).
 
-Our team is are looking into a migration path for Android, in light of the newly
+Our team is looking into a migration path for Android, in light of the newly
 introduced [Tracing SDK](/docs/instrumentation/tracing-sdk.md). At the moment
 the advice is to keep using the existing ATrace API on Android.
 
diff --git a/docs/instrumentation/tracing-sdk.md b/docs/instrumentation/tracing-sdk.md
index 1e58bb8..6c4382c 100644
--- a/docs/instrumentation/tracing-sdk.md
+++ b/docs/instrumentation/tracing-sdk.md
@@ -30,7 +30,7 @@
 To start using the Client API, first check out the latest SDK release:
 
 ```bash
-git clone https://android.googlesource.com/platform/external/perfetto -b v17.0
+git clone https://android.googlesource.com/platform/external/perfetto -b v20.1
 ```
 
 The SDK consists of two files, `sdk/perfetto.h` and `sdk/perfetto.cc`. These are
diff --git a/docs/toc.md b/docs/toc.md
index 565ee8d..6ddd809 100644
--- a/docs/toc.md
+++ b/docs/toc.md
@@ -8,6 +8,7 @@
   * [Heap profiling](quickstart/heap-profiling.md)
 
 * [Case studies](#)
+  * [Android boot tracing](case-studies/android-boot-tracing.md)
   * [Debugging memory usage](case-studies/memory.md)
 
 * [Data sources](#)
diff --git a/examples/sdk/README.md b/examples/sdk/README.md
index 01f3eb1..0d68434 100644
--- a/examples/sdk/README.md
+++ b/examples/sdk/README.md
@@ -15,7 +15,7 @@
 First, check out the latest Perfetto release:
 
 ```bash
-git clone https://android.googlesource.com/platform/external/perfetto -b v17.0
+git clone https://android.googlesource.com/platform/external/perfetto -b v20.1
 ```
 
 Then, build using CMake:
diff --git a/gn/gen_perfetto_version_header.gni b/gn/gen_perfetto_version_header.gni
index 17c879f..59cd681 100644
--- a/gn/gen_perfetto_version_header.gni
+++ b/gn/gen_perfetto_version_header.gni
@@ -21,15 +21,20 @@
 
 import("perfetto.gni")
 
+_ver_script = "${perfetto_root_path}tools/write_version_header.py"
+_has_git = false
+if (perfetto_enable_git_rev_version_header) {
+  _has_git = "1" == exec_script(_ver_script, [ "--check_git" ], "trim string")
+}
+
 template("gen_perfetto_version_header") {
   action(target_name) {
-    script = "${perfetto_root_path}tools/write_version_header.py"
+    script = _ver_script
     changelog = "${perfetto_root_path}CHANGELOG"
     inputs = [ changelog ]
     outputs = []
     args = []
-    if (perfetto_build_standalone && !is_perfetto_build_generator &&
-        perfetto_enable_git_rev_version_header) {
+    if (_has_git) {
       inputs += [ "${perfetto_root_path}.git/HEAD" ]
     }
 
@@ -49,7 +54,7 @@
       ]
       outputs += [ invoker.ts_out ]
     }
-    if (!perfetto_enable_git_rev_version_header) {
+    if (!_has_git) {
       args += [ "--no_git" ]
     }
   }
diff --git a/gn/perfetto.gni b/gn/perfetto.gni
index c6817aa..3128050 100644
--- a/gn/perfetto.gni
+++ b/gn/perfetto.gni
@@ -175,11 +175,13 @@
         is_perfetto_build_generator
   }
 
-  # Enables base::Watchdog. Is supported only on Linux-based platforms.
+  # Enables base::Watchdog. Is supported only on Linux-based platforms in
+  # standalone GN builds (NOT in bazel/blaze).
   # gn/BUILD.gn further restricts this to OS_LINUX || OS_ANDROID when generating
   # the perfetto_build_flags.h header.
   enable_perfetto_watchdog =
-      perfetto_build_with_android || perfetto_build_standalone
+      perfetto_build_with_android ||
+      (perfetto_build_standalone && !is_perfetto_build_generator)
 
   # Misc host executable under tools/.
   enable_perfetto_tools =
@@ -235,7 +237,9 @@
 }
 
 declare_args() {
-  perfetto_enable_git_rev_version_header = enable_perfetto_version_gen
+  perfetto_enable_git_rev_version_header =
+      enable_perfetto_version_gen && perfetto_build_standalone &&
+      !is_perfetto_build_generator
 
   # The traced_probes daemon is very Linux-specific, as it depends on ftrace and
   # various /proc interfaces. There is no point making its code platform-neutral
diff --git a/include/perfetto/base/build_configs/bazel/perfetto_build_flags.h b/include/perfetto/base/build_configs/bazel/perfetto_build_flags.h
index e08a103..dba06b3 100644
--- a/include/perfetto/base/build_configs/bazel/perfetto_build_flags.h
+++ b/include/perfetto/base/build_configs/bazel/perfetto_build_flags.h
@@ -26,7 +26,7 @@
 #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_STANDALONE_BUILD() (1)
 #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_START_DAEMONS() (1)
 #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_IPC() (1)
-#define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_WATCHDOG() (PERFETTO_BUILDFLAG_DEFINE_PERFETTO_OS_ANDROID() || PERFETTO_BUILDFLAG_DEFINE_PERFETTO_OS_LINUX())
+#define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_WATCHDOG() (0)
 #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_COMPONENT_BUILD() (0)
 #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_FORCE_DLOG_ON() (0)
 #define PERFETTO_BUILDFLAG_DEFINE_PERFETTO_FORCE_DLOG_OFF() (0)
diff --git a/include/perfetto/base/compiler.h b/include/perfetto/base/compiler.h
index b060810..b30dda6 100644
--- a/include/perfetto/base/compiler.h
+++ b/include/perfetto/base/compiler.h
@@ -147,6 +147,13 @@
 #define PERFETTO_NO_DESTROY
 #endif
 
+// Macro for telling -Wimplicit-fallthrough that a fallthrough is intentional.
+#if defined(__clang__)
+#define PERFETTO_FALLTHROUGH [[clang::fallthrough]]
+#else
+#define PERFETTO_FALLTHROUGH
+#endif
+
 namespace perfetto {
 namespace base {
 
diff --git a/include/perfetto/base/logging.h b/include/perfetto/base/logging.h
index 6e128be..eaa5710 100644
--- a/include/perfetto/base/logging.h
+++ b/include/perfetto/base/logging.h
@@ -63,6 +63,24 @@
 #include <android/log.h>
 #endif
 
+// Enable the "Print the most recent PERFETTO_LOG(s) before crashing" feature
+// on Android in-tree builds and on standalone builds (mainly for testing).
+// This is deliberately no PERFETTO_OS_ANDROID because we don't want this
+// feature when perfetto is embedded in other Android projects (e.g. SDK).
+// TODO(b/203795298): TFLite is using the client library in blaze builds and is
+// targeting API 19. For now disable the feature based on API level.
+#if defined(PERFETTO_ANDROID_ASYNC_SAFE_LOG)
+#define PERFETTO_ENABLE_LOG_RING_BUFFER() 0
+#elif PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
+#define PERFETTO_ENABLE_LOG_RING_BUFFER() 1
+#elif PERFETTO_BUILDFLAG(PERFETTO_STANDALONE_BUILD) && \
+    (!PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) ||       \
+     (defined(__ANDROID_API__) && __ANDROID_API__ >= 21))
+#define PERFETTO_ENABLE_LOG_RING_BUFFER() 1
+#else
+#define PERFETTO_ENABLE_LOG_RING_BUFFER() 0
+#endif
+
 namespace perfetto {
 namespace base {
 
@@ -104,6 +122,23 @@
                                 const char* fmt,
                                 ...) PERFETTO_PRINTF_FORMAT(4, 5);
 
+// This is defined in debug_crash_stack_trace.cc, but that is only linked in
+// standalone && debug builds, see enable_perfetto_stderr_crash_dump in
+// perfetto.gni.
+PERFETTO_EXPORT void EnableStacktraceOnCrashForDebug();
+
+#if PERFETTO_ENABLE_LOG_RING_BUFFER()
+// Gets a snapshot of the logs from the internal log ring buffer and:
+// - On Android in-tree builds: Passes that to android_set_abort_message().
+//   That will attach the logs to the crash report.
+// - On standalone builds (all otther OSes) prints that on stderr.
+// This function must called only once, right before inducing a crash (This is
+// because android_set_abort_message() can only be called once).
+PERFETTO_EXPORT void MaybeSerializeLastLogsForCrashReporting();
+#else
+inline void MaybeSerializeLastLogsForCrashReporting() {}
+#endif
+
 #if defined(PERFETTO_ANDROID_ASYNC_SAFE_LOG)
 #define PERFETTO_XLOG(level, fmt, ...)                                        \
   do {                                                                        \
@@ -120,16 +155,18 @@
 #endif
 
 #if defined(_MSC_VER)
-#define PERFETTO_IMMEDIATE_CRASH() \
-  do {                             \
-    __debugbreak();                \
-    __assume(0);                   \
+#define PERFETTO_IMMEDIATE_CRASH()                               \
+  do {                                                           \
+    ::perfetto::base::MaybeSerializeLastLogsForCrashReporting(); \
+    __debugbreak();                                              \
+    __assume(0);                                                 \
   } while (0)
 #else
-#define PERFETTO_IMMEDIATE_CRASH() \
-  do {                             \
-    __builtin_trap();              \
-    __builtin_unreachable();       \
+#define PERFETTO_IMMEDIATE_CRASH()                               \
+  do {                                                           \
+    ::perfetto::base::MaybeSerializeLastLogsForCrashReporting(); \
+    __builtin_trap();                                            \
+    __builtin_unreachable();                                     \
   } while (0)
 #endif
 
diff --git a/include/perfetto/base/proc_utils.h b/include/perfetto/base/proc_utils.h
index 8818ec0..cb8e13a 100644
--- a/include/perfetto/base/proc_utils.h
+++ b/include/perfetto/base/proc_utils.h
@@ -22,8 +22,10 @@
 #include "perfetto/base/build_config.h"
 
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
-#include <Windows.h>
-#include <processthreadsapi.h>
+extern "C" {
+// Prototype extracted from the Windows SDK to avoid including windows.h.
+__declspec(dllimport) unsigned long __stdcall GetCurrentProcessId();
+}
 #elif PERFETTO_BUILDFLAG(PERFETTO_OS_FUCHSIA)
 #include <zircon/process.h>
 #include <zircon/types.h>
diff --git a/include/perfetto/base/thread_utils.h b/include/perfetto/base/thread_utils.h
index 48a6508..6af9a57 100644
--- a/include/perfetto/base/thread_utils.h
+++ b/include/perfetto/base/thread_utils.h
@@ -22,8 +22,10 @@
 #include "perfetto/base/build_config.h"
 
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
-#include <Windows.h>
-#include <processthreadsapi.h>
+extern "C" {
+// Prototype extracted from the Windows SDK to avoid including windows.h.
+__declspec(dllimport) unsigned long __stdcall GetCurrentThreadId();
+}
 #elif PERFETTO_BUILDFLAG(PERFETTO_OS_FUCHSIA)
 #include <zircon/process.h>
 #include <zircon/types.h>
diff --git a/include/perfetto/ext/base/BUILD.gn b/include/perfetto/ext/base/BUILD.gn
index ec8cca8..e86ca99 100644
--- a/include/perfetto/ext/base/BUILD.gn
+++ b/include/perfetto/ext/base/BUILD.gn
@@ -18,6 +18,7 @@
   sources = [
     "circular_queue.h",
     "container_annotations.h",
+    "crash_keys.h",
     "ctrl_c_handler.h",
     "endian.h",
     "event_fd.h",
diff --git a/include/perfetto/ext/base/crash_keys.h b/include/perfetto/ext/base/crash_keys.h
new file mode 100644
index 0000000..1d550f6
--- /dev/null
+++ b/include/perfetto/ext/base/crash_keys.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_EXT_BASE_CRASH_KEYS_H_
+#define INCLUDE_PERFETTO_EXT_BASE_CRASH_KEYS_H_
+
+#include <algorithm>
+#include <atomic>
+
+#include <stdint.h>
+#include <string.h>
+
+#include "perfetto/base/compiler.h"
+#include "perfetto/ext/base/string_view.h"
+
+// Crash keys are very simple global variables with static-storage that
+// are reported on crash time for managed crashes (CHECK/FATAL/Watchdog).
+// - Translation units can define a CrashKey and register it at some point
+//   during initialization.
+// - CrashKey instances must be long-lived. They should really be just global
+//   static variable in the anonymous namespace.
+// Example:
+// subsystem_1.cc
+//   CrashKey g_client_id("ipc_client_id");
+//   ...
+//   OnIpcReceived(client_id) {
+//      g_client_id.Set(client_id);
+//      ... // Process the IPC
+//      g_client_id.Clear();
+//   }
+//   Or equivalently:
+//   OnIpcReceived(client_id) {
+//      auto scoped_key = g_client_id.SetScoped(client_id);
+//      ... // Process the IPC
+//   }
+//
+// If a crash happens while processing the IPC, the crash report will
+// have a line "ipc_client_id: 42".
+//
+// Thread safety considerations:
+// CrashKeys can be registered and set/cleared from any thread.
+// There is no compelling use-case to have full acquire/release consistency when
+// setting a key. This means that if a thread crashes immediately after a
+// crash key has been set on another thread, the value printed on the crash
+// report could be incomplete. The code guarantees defined behavior and does
+// not rely on null-terminated string (in the worst case 32 bytes of random
+// garbage will be printed out).
+
+// The tests live in logging_unittest.cc.
+
+namespace perfetto {
+namespace base {
+
+constexpr size_t kCrashKeyMaxStrSize = 32;
+
+// CrashKey instances must be long lived
+class CrashKey {
+ public:
+  class ScopedClear {
+   public:
+    explicit ScopedClear(CrashKey* k) : key_(k) {}
+    ~ScopedClear() {
+      if (key_)
+        key_->Clear();
+    }
+    ScopedClear(const ScopedClear&) = delete;
+    ScopedClear& operator=(const ScopedClear&) = delete;
+    ScopedClear& operator=(ScopedClear&&) = delete;
+    ScopedClear(ScopedClear&& other) : key_(other.key_) {
+      other.key_ = nullptr;
+    }
+
+   private:
+    CrashKey* key_;
+  };
+
+  // constexpr so it can be used in the anon namespace without requiring a
+  // global constructor.
+  // |name| must be a long-lived string.
+  constexpr explicit CrashKey(const char* name)
+      : registered_{}, type_(Type::kUnset), name_(name), str_value_{} {}
+  CrashKey(const CrashKey&) = delete;
+  CrashKey& operator=(const CrashKey&) = delete;
+  CrashKey(CrashKey&&) = delete;
+  CrashKey& operator=(CrashKey&&) = delete;
+
+  enum class Type : uint8_t { kUnset = 0, kInt, kStr };
+
+  void Clear() {
+    int_value_ = 0;
+    type_ = Type::kUnset;
+  }
+
+  void Set(int64_t value) {
+    int_value_ = value;
+    type_ = Type::kInt;
+    if (PERFETTO_UNLIKELY(!registered_.load(std::memory_order_relaxed)))
+      Register();
+  }
+
+  void Set(StringView sv) {
+    size_t len = std::min(sv.size(), sizeof(str_value_) - 1);
+    memcpy(str_value_, sv.data(), len);
+    str_value_[len] = '\0';
+    type_ = Type::kStr;
+    if (PERFETTO_UNLIKELY(!registered_.load(std::memory_order_relaxed)))
+      Register();
+  }
+
+  ScopedClear SetScoped(int64_t value) PERFETTO_WARN_UNUSED_RESULT {
+    Set(value);
+    return ScopedClear(this);
+  }
+
+  ScopedClear SetScoped(StringView sv) PERFETTO_WARN_UNUSED_RESULT {
+    Set(sv);
+    return ScopedClear(this);
+  }
+
+  int64_t int_value() const { return int_value_; }
+  size_t ToString(char* dst, size_t len);
+
+ private:
+  void Register();
+
+  std::atomic<bool> registered_;
+  Type type_;
+  const char* const name_;
+  union {
+    char str_value_[kCrashKeyMaxStrSize];
+    int64_t int_value_;
+  };
+};
+
+// Fills |dst| with a string containing one line for each crash key
+// (excluding the unset ones).
+// Returns number of chars written, without counting the NUL terminator.
+// This is used in logging.cc when emitting the crash report abort message.
+size_t SerializeCrashKeys(char* dst, size_t len);
+
+void UnregisterAllCrashKeysForTesting();
+
+}  // namespace base
+}  // namespace perfetto
+
+#endif  // INCLUDE_PERFETTO_EXT_BASE_CRASH_KEYS_H_
diff --git a/include/perfetto/ext/base/string_utils.h b/include/perfetto/ext/base/string_utils.h
index 5b687eb..2508aec 100644
--- a/include/perfetto/ext/base/string_utils.h
+++ b/include/perfetto/ext/base/string_utils.h
@@ -17,7 +17,9 @@
 #ifndef INCLUDE_PERFETTO_EXT_BASE_STRING_UTILS_H_
 #define INCLUDE_PERFETTO_EXT_BASE_STRING_UTILS_H_
 
+#include <stdarg.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include <cinttypes>
 #include <string>
@@ -125,6 +127,83 @@
 std::string TrimLeading(const std::string& str);
 std::string Base64Encode(const void* raw, size_t size);
 
+// A BSD-style strlcpy without the return value.
+// Copies at most |dst_size|-1 characters. Unlike strncpy, it always \0
+// terminates |dst|, as long as |dst_size| is not 0.
+// Unlike strncpy and like strlcpy it does not zero-pad the rest of |dst|.
+// Returns nothing. The BSD strlcpy returns the size of |src|, which might
+// be > |dst_size|. Anecdotal experience suggests people assume the return value
+// is the number of bytes written in |dst|. That assumption can lead to
+// dangerous bugs.
+// In order to avoid being subtly uncompliant with strlcpy AND avoid misuse,
+// the choice here is to return nothing.
+inline void StringCopy(char* dst, const char* src, size_t dst_size) {
+  for (size_t i = 0; i < dst_size; ++i) {
+    if ((dst[i] = src[i]) == '\0') {
+      return;  // We hit and copied the null terminator.
+    }
+  }
+
+  // We were left off at dst_size. We over copied 1 byte. Null terminate.
+  if (PERFETTO_LIKELY(dst_size > 0))
+    dst[dst_size - 1] = 0;
+}
+
+// Like snprintf() but returns the number of chars *actually* written (without
+// counting the null terminator) NOT "the number of chars which would have been
+// written to the final string if enough  space had been available".
+// This should be used in almost all cases when the caller uses the return value
+// of snprintf(). If the return value is not used, there is no benefit in using
+// this wrapper, as this just calls snprintf() and mangles the return value.
+// It always null-terminates |dst| (even in case of errors), unless
+// |dst_size| == 0.
+// Examples:
+//   SprintfTrunc(x, 4, "123whatever"): returns 3 and writes "123\0".
+//   SprintfTrunc(x, 4, "123"): returns 3 and writes "123\0".
+//   SprintfTrunc(x, 3, "123"): returns 2 and writes "12\0".
+//   SprintfTrunc(x, 2, "123"): returns 1 and writes "1\0".
+//   SprintfTrunc(x, 1, "123"): returns 0 and writes "\0".
+//   SprintfTrunc(x, 0, "123"): returns 0 and writes nothing.
+// NOTE: This means that the caller has no way to tell when truncation happens
+//   vs the edge case of *just* fitting in the buffer.
+size_t SprintfTrunc(char* dst, size_t dst_size, const char* fmt, ...)
+    PERFETTO_PRINTF_FORMAT(3, 4);
+
+// A helper class to facilitate construction and usage of write-once stack
+// strings.
+// Example usage:
+//   StackString<32> x("format %d %s", 42, string_arg);
+//   TakeString(x.c_str() | x.string_view() | x.ToStdString());
+// Rather than char x[32] + sprintf.
+// Advantages:
+// - Avoids useless zero-fills caused by people doing `char buf[32] {}` (mainly
+//   by fearing unknown snprintf failure modes).
+// - Makes the code more robust in case of snprintf truncations (len() and
+//  string_view() will return the truncated length, unlike snprintf).
+template <size_t N>
+class StackString {
+ public:
+  explicit PERFETTO_PRINTF_FORMAT(/* 1=this */ 2, 3)
+      StackString(const char* fmt, ...) {
+    buf_[0] = '\0';
+    va_list args;
+    va_start(args, fmt);
+    int res = vsnprintf(buf_, sizeof(buf_), fmt, args);
+    va_end(args);
+    buf_[sizeof(buf_) - 1] = '\0';
+    len_ = res < 0 ? 0 : std::min(static_cast<size_t>(res), sizeof(buf_) - 1);
+  }
+
+  StringView string_view() const { return StringView(buf_, len_); }
+  std::string ToStdString() const { return std::string(buf_, len_); }
+  const char* c_str() const { return buf_; }
+  size_t len() const { return len_; }
+
+ private:
+  char buf_[N];
+  size_t len_ = 0;  // Does not include the \0.
+};
+
 }  // namespace base
 }  // namespace perfetto
 
diff --git a/include/perfetto/ext/base/string_writer.h b/include/perfetto/ext/base/string_writer.h
index 7005e3b..587b2f9 100644
--- a/include/perfetto/ext/base/string_writer.h
+++ b/include/perfetto/ext/base/string_writer.h
@@ -25,6 +25,7 @@
 #include <limits>
 
 #include "perfetto/base/logging.h"
+#include "perfetto/ext/base/string_utils.h"
 #include "perfetto/ext/base/string_view.h"
 
 namespace perfetto {
@@ -97,8 +98,8 @@
   void AppendHexInt(IntType value) {
     // TODO(lalitm): trying to optimize this is premature given we almost never
     // print hex ints. Reevaluate this in the future if we do print them more.
-    size_t res = static_cast<size_t>(
-        snprintf(buffer_ + pos_, size_ - pos_, "%" PRIx64, value));
+    size_t res =
+        base::SprintfTrunc(buffer_ + pos_, size_ - pos_, "%" PRIx64, value);
     PERFETTO_DCHECK(pos_ + res <= size_);
     pos_ += res;
   }
@@ -107,8 +108,7 @@
   void AppendDouble(double value) {
     // TODO(lalitm): trying to optimize this is premature given we almost never
     // print doubles. Reevaluate this in the future if we do print them more.
-    size_t res = static_cast<size_t>(
-        snprintf(buffer_ + pos_, size_ - pos_, "%lf", value));
+    size_t res = base::SprintfTrunc(buffer_ + pos_, size_ - pos_, "%lf", value);
     PERFETTO_DCHECK(pos_ + res <= size_);
     pos_ += res;
   }
@@ -129,7 +129,7 @@
   char* CreateStringCopy() {
     char* dup = reinterpret_cast<char*>(malloc(pos_ + 1));
     if (dup) {
-      strncpy(dup, buffer_, pos_);
+      memcpy(dup, buffer_, pos_);
       dup[pos_] = '\0';
     }
     return dup;
diff --git a/include/perfetto/ext/base/thread_utils.h b/include/perfetto/ext/base/thread_utils.h
index 2e9c4e5..4a1f0b6 100644
--- a/include/perfetto/ext/base/thread_utils.h
+++ b/include/perfetto/ext/base/thread_utils.h
@@ -20,6 +20,7 @@
 #include <string>
 
 #include "perfetto/base/build_config.h"
+#include "perfetto/ext/base/string_utils.h"
 
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) ||   \
     PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID) || \
@@ -46,8 +47,7 @@
 // string.
 inline bool MaybeSetThreadName(const std::string& name) {
   char buf[16] = {};
-  size_t sz = std::min(name.size(), static_cast<size_t>(15));
-  strncpy(buf, name.c_str(), sz);
+  StringCopy(buf, name.c_str(), sizeof(buf));
 
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE)
   return pthread_setname_np(buf) == 0;
diff --git a/include/perfetto/ext/base/watchdog_posix.h b/include/perfetto/ext/base/watchdog_posix.h
index 3c673c3..27877c7 100644
--- a/include/perfetto/ext/base/watchdog_posix.h
+++ b/include/perfetto/ext/base/watchdog_posix.h
@@ -17,13 +17,13 @@
 #ifndef INCLUDE_PERFETTO_EXT_BASE_WATCHDOG_POSIX_H_
 #define INCLUDE_PERFETTO_EXT_BASE_WATCHDOG_POSIX_H_
 
-#include "perfetto/ext/base/optional.h"
-#include "perfetto/ext/base/thread_checker.h"
+#include "perfetto/base/time.h"
+#include "perfetto/ext/base/scoped_file.h"
 
 #include <atomic>
-#include <condition_variable>
 #include <mutex>
 #include <thread>
+#include <vector>
 
 namespace perfetto {
 namespace base {
@@ -41,6 +41,20 @@
 // crashed.
 class Watchdog {
  public:
+  struct TimerData {
+    TimeMillis deadline{};  // Absolute deadline, CLOCK_MONOTONIC.
+    int thread_id = 0;      // The tid we'll send a SIGABRT to on expiry.
+
+    TimerData() = default;
+    TimerData(TimeMillis d, int t) : deadline(d), thread_id(t) {}
+    bool operator<(const TimerData& x) const {
+      return std::tie(deadline, thread_id) < std::tie(x.deadline, x.thread_id);
+    }
+    bool operator==(const TimerData& x) const {
+      return std::tie(deadline, thread_id) == std::tie(x.deadline, x.thread_id);
+    }
+  };
+
   // Handle to the timer set to crash the program. If the handle is dropped,
   // the timer is removed so the program does not crash.
   class Timer {
@@ -51,11 +65,14 @@
    private:
     friend class Watchdog;
 
-    explicit Timer(uint32_t ms);
+    explicit Timer(Watchdog*, uint32_t ms);
     Timer(const Timer&) = delete;
     Timer& operator=(const Timer&) = delete;
 
-    Optional<timer_t> timerid_;
+    // In production this is always Watchdog::GetInstance(), which is long
+    // lived. However unittests use a non-global instance.
+    Watchdog* watchdog_ = nullptr;
+    TimerData timer_data_;
   };
   virtual ~Watchdog();
 
@@ -81,10 +98,6 @@
   // Note: |window_ms| has to be a multiple of |polling_interval_ms_|.
   void SetCpuLimit(uint32_t percentage, uint32_t window_ms);
 
- protected:
-  // Protected for testing.
-  Watchdog(uint32_t polling_interval_ms);
-
  private:
   // Represents a ring buffer in which integer values can be stored.
   class WindowedInterval {
@@ -126,15 +139,23 @@
     std::unique_ptr<uint64_t[]> buffer_;
   };
 
-  explicit Watchdog(const Watchdog&) = delete;
+  Watchdog(const Watchdog&) = delete;
   Watchdog& operator=(const Watchdog&) = delete;
+  Watchdog(Watchdog&&) = delete;
+  Watchdog& operator=(Watchdog&&) = delete;
 
   // Main method for the watchdog thread.
   void ThreadMain();
 
   // Check each type of resource every |polling_interval_ms_| miillis.
-  void CheckMemory(uint64_t rss_bytes);
-  void CheckCpu(uint64_t cpu_time);
+  // Returns true if the threshold is exceeded and the process should be killed.
+  bool CheckMemory_Locked(uint64_t rss_bytes);
+  bool CheckCpu_Locked(uint64_t cpu_time);
+
+  void AddFatalTimer(TimerData);
+  void RemoveFatalTimer(TimerData);
+  void RearmTimerFd_Locked();
+  void SerializeLogsAndKillThread(int tid);
 
   // Computes the time interval spanned by a given ring buffer with respect
   // to |polling_interval_ms_|.
@@ -143,7 +164,7 @@
   const uint32_t polling_interval_ms_;
   std::atomic<bool> enabled_{false};
   std::thread thread_;
-  std::condition_variable exit_signal_;
+  ScopedPlatformHandle timer_fd_;
 
   // --- Begin lock-protected members ---
 
@@ -155,7 +176,20 @@
   uint32_t cpu_limit_percentage_ = 0;
   WindowedInterval cpu_window_time_ticks_;
 
+  // Outstanding timers created via CreateFatalTimer() and not yet destroyed.
+  // The vector is not sorted. In most cases there are only 1-2 timers, we can
+  // afford O(N) operations.
+  // All the timers in the list share the same |timer_fd_|, which is keeped
+  // armed on the min(timers_) through RearmTimerFd_Locked().
+  std::vector<TimerData> timers_;
+
   // --- End lock-protected members ---
+
+ protected:
+  // Protected for testing.
+  explicit Watchdog(uint32_t polling_interval_ms);
+
+  bool disable_kill_failsafe_for_testing_ = false;
 };
 
 }  // namespace base
diff --git a/include/perfetto/protozero/proto_decoder.h b/include/perfetto/protozero/proto_decoder.h
index d23af33..ce5bb56 100644
--- a/include/perfetto/protozero/proto_decoder.h
+++ b/include/perfetto/protozero/proto_decoder.h
@@ -277,12 +277,21 @@
 // [ field 0 (invalid) ] [ fields 1 .. N ] [ repeated fields ]
 //                                        ^                  ^
 //                                        num_fields_        size_
+// Note that if a message has high field numbers, upon creation |size_| can be
+// < |num_fields_| (until a heap expansion is hit while inserting).
 class PERFETTO_EXPORT TypedProtoDecoderBase : public ProtoDecoder {
  public:
   // If the field |id| is known at compile time, prefer the templated
   // specialization at<kFieldNumber>().
   const Field& Get(uint32_t id) const {
-    return PERFETTO_LIKELY(id < num_fields_) ? fields_[id] : fields_[0];
+    if (PERFETTO_LIKELY(id < num_fields_ && id < size_))
+      return fields_[id];
+    // If id >= num_fields_, the field id is invalid (was not known in the
+    // .proto) and we return the 0th field, which is always !valid().
+    // If id >= size_ and <= num_fields, the id is valid but the field has not
+    // been seen while decoding (hence the stack storage has not been expanded)
+    // so we return the 0th invalid field.
+    return fields_[0];
   }
 
   // Returns an object that allows to iterate over all instances of a repeated
@@ -290,8 +299,27 @@
   //   for (auto it = decoder.GetRepeated<int32_t>(N); it; ++it) { ... }
   template <typename T>
   RepeatedFieldIterator<T> GetRepeated(uint32_t field_id) const {
-    return RepeatedFieldIterator<T>(field_id, &fields_[num_fields_],
-                                    &fields_[size_], &fields_[field_id]);
+    const Field* repeated_begin;
+    // The storage for repeated fields starts after the slot for the highest
+    // field id (refer to the diagram in the class-level comment). However, if
+    // a message has more than INITIAL_STACK_CAPACITY field there will be no
+    // slots available for the repeated fields (if ExpandHeapStorage() was not
+    // called). Imagine a message that has highest field id = 102 and that is
+    // still using the stack:
+    // [ F0 ] [ F1 ] ... [ F100 ] [ F101 ] [ F1012] [ repeated fields ]
+    //                                            ^ num_fields_
+    //                          ^ size (== capacity)
+    if (PERFETTO_LIKELY(num_fields_ < size_)) {
+      repeated_begin = &fields_[num_fields_];
+    } else {
+      // This is the case of not having any storage space for repeated fields.
+      // This makes it so begin == end, so the iterator will just skip @ last.
+      repeated_begin = &fields_[size_];
+    }
+    const Field* repeated_end = &fields_[size_];
+    const Field* last = &Get(field_id);
+    return RepeatedFieldIterator<T>(field_id, repeated_begin, repeated_end,
+                                    last);
   }
 
   // Returns an objects that allows to iterate over all entries of a packed
@@ -315,10 +343,9 @@
     if (field.valid()) {
       return PackedRepeatedFieldIterator<wire_type, cpp_type>(
           field.data(), field.size(), parse_error_location);
-    } else {
-      return PackedRepeatedFieldIterator<wire_type, cpp_type>(
-          nullptr, 0, parse_error_location);
     }
+    return PackedRepeatedFieldIterator<wire_type, cpp_type>(
+        nullptr, 0, parse_error_location);
   }
 
  protected:
@@ -330,7 +357,7 @@
       : ProtoDecoder(buffer, length),
         fields_(storage),
         num_fields_(num_fields),
-        size_(num_fields),
+        size_(std::min(num_fields, capacity)),
         capacity_(capacity) {
     // The reason why Field needs to be trivially de/constructible is to avoid
     // implicit initializers on all the ~1000 entries. We need it to initialize
@@ -340,7 +367,7 @@
                       std::is_trivially_destructible<Field>::value &&
                       std::is_trivial<Field>::value,
                   "Field must be a trivial aggregate type");
-    memset(fields_, 0, sizeof(Field) * num_fields_);
+    memset(fields_, 0, sizeof(Field) * capacity_);
   }
 
   void ParseAllFields();
@@ -358,14 +385,23 @@
   // case of a large number of repeated fields.
   Field* fields_;
 
-  // Number of fields without accounting repeated storage. This is equal to
-  // MAX_FIELD_ID + 1 (to account for the invalid 0th field).
-  // This value is always <= size_ (and hence <= capacity);
+  // Number of known fields, without accounting repeated storage. This is equal
+  // to MAX_FIELD_ID + 1 (to account for the invalid 0th field). It never
+  // changes after construction.
+  // This is unrelated with |size_| and |capacity_|. If the highest field id of
+  // a proto message is 131, |num_fields_| will be = 132 but, on initialization,
+  // |size_| = |capacity_| = 100 (INITIAL_STACK_CAPACITY).
+  // One cannot generally assume that |fields_| has enough storage to
+  // dereference every field. That is only true:
+  // - For field ids < INITIAL_STACK_CAPACITY.
+  // - After the first call to ExpandHeapStorage().
   uint32_t num_fields_;
 
-  // Number of active |fields_| entries. This is initially equal to the highest
-  // number of fields for the message (num_fields_ == MAX_FIELD_ID + 1) and can
-  // grow up to |capacity_| in the case of repeated fields.
+  // Number of active |fields_| entries. This is initially equal to
+  // min(num_fields_, INITIAL_STACK_CAPACITY) and after ExpandHeapStorage() it
+  // becomes == |num_fields_|. If the message has non-packed repeated fields, it
+  // can grow further, up to |capacity_|.
+  // |size_| is always <= |capacity_|. But |num_fields_| can be > |size_|.
   uint32_t size_;
 
   // Initially equal to kFieldsCapacity of the TypedProtoDecoder
@@ -375,6 +411,11 @@
   uint32_t capacity_;
 };
 
+// This constant is a tradeoff between having a larger stack frame and being
+// able to decode field IDs up to N (or N - num_fields repeated fields) without
+// falling back on the heap.
+#define PROTOZERO_DECODER_INITIAL_STACK_CAPACITY 100
+
 // Template class instantiated by the auto-generated decoder classes declared in
 // xxx.pbzero.h files.
 template <int MAX_FIELD_ID, bool HAS_NONPACKED_REPEATED_FIELDS>
@@ -383,17 +424,25 @@
   TypedProtoDecoder(const uint8_t* buffer, size_t length)
       : TypedProtoDecoderBase(on_stack_storage_,
                               /*num_fields=*/MAX_FIELD_ID + 1,
-                              kCapacity,
+                              PROTOZERO_DECODER_INITIAL_STACK_CAPACITY,
                               buffer,
                               length) {
-    static_assert(MAX_FIELD_ID <= kMaxDecoderFieldId, "Field ordinal too high");
     TypedProtoDecoderBase::ParseAllFields();
   }
 
   template <uint32_t FIELD_ID>
   const Field& at() const {
     static_assert(FIELD_ID <= MAX_FIELD_ID, "FIELD_ID > MAX_FIELD_ID");
-    return fields_[FIELD_ID];
+    // If the field id is < the on-stack capacity, it's safe to always
+    // dereference |fields_|, whether it's still using the stack or it fell
+    // back on the heap. Because both terms of the if () are known at compile
+    // time, the compiler elides the branch for ids < INITIAL_STACK_CAPACITY.
+    if (FIELD_ID < PROTOZERO_DECODER_INITIAL_STACK_CAPACITY) {
+      return fields_[FIELD_ID];
+    } else {
+      // Otherwise use the slowpath Get() which will do a runtime check.
+      return Get(FIELD_ID);
+    }
   }
 
   TypedProtoDecoder(TypedProtoDecoder&& other) noexcept
@@ -408,20 +457,7 @@
   }
 
  private:
-  // In the case of non-repeated fields, this constant defines the highest field
-  // id we are able to decode. This is to limit the on-stack storage.
-  // In the case of repeated fields, this constant defines the max number of
-  // repeated fields that we'll be able to store before falling back on the
-  // heap. Keep this value in sync with the one in protozero_generator.cc.
-  static constexpr size_t kMaxDecoderFieldId = 999;
-
-  // If we the message has no repeated fields we need at most N Field entries
-  // in the on-stack storage, where N is the highest field id.
-  // Otherwise we need some room to store repeated fields.
-  static constexpr size_t kCapacity =
-      1 + (HAS_NONPACKED_REPEATED_FIELDS ? kMaxDecoderFieldId : MAX_FIELD_ID);
-
-  Field on_stack_storage_[kCapacity];
+  Field on_stack_storage_[PROTOZERO_DECODER_INITIAL_STACK_CAPACITY];
 };
 
 }  // namespace protozero
diff --git a/include/perfetto/trace_processor/basic_types.h b/include/perfetto/trace_processor/basic_types.h
index 5b459d0..d40714e 100644
--- a/include/perfetto/trace_processor/basic_types.h
+++ b/include/perfetto/trace_processor/basic_types.h
@@ -31,10 +31,6 @@
 namespace perfetto {
 namespace trace_processor {
 
-// Various places in trace processor assume a max number of CPUs to keep code
-// simpler (e.g. use arrays instead of vectors).
-constexpr size_t kMaxCpus = 128;
-
 // All metrics protos are in this directory. When loading metric extensions, the
 // protos are mounted onto a virtual path inside this directory.
 constexpr char kMetricProtoRoot[] = "protos/perfetto/metrics/";
diff --git a/include/perfetto/tracing/BUILD.gn b/include/perfetto/tracing/BUILD.gn
index b7a91d0..f73dafd 100644
--- a/include/perfetto/tracing/BUILD.gn
+++ b/include/perfetto/tracing/BUILD.gn
@@ -61,6 +61,7 @@
     "tracing_policy.h",
     "track.h",
     "track_event.h",
+    "track_event_args.h",
     "track_event_category_registry.h",
     "track_event_interned_data_index.h",
     "track_event_legacy.h",
diff --git a/include/perfetto/tracing/data_source.h b/include/perfetto/tracing/data_source.h
index 6b2a81a..ddad2be 100644
--- a/include/perfetto/tracing/data_source.h
+++ b/include/perfetto/tracing/data_source.h
@@ -387,6 +387,7 @@
         instance_state = static_state_.TryGetCached(instances, i);
         if (!instance_state || !instance_state->trace_lambda_enabled)
           continue;
+        tls_inst.muxer_id_for_testing = instance_state->muxer_id_for_testing;
         tls_inst.backend_id = instance_state->backend_id;
         tls_inst.backend_connection_id = instance_state->backend_connection_id;
         tls_inst.buffer_id = instance_state->buffer_id;
diff --git a/include/perfetto/tracing/event_context.h b/include/perfetto/tracing/event_context.h
index e0c1a16..9628a56 100644
--- a/include/perfetto/tracing/event_context.h
+++ b/include/perfetto/tracing/event_context.h
@@ -23,6 +23,12 @@
 #include "protos/perfetto/trace/trace_packet.pbzero.h"
 
 namespace perfetto {
+namespace protos {
+namespace pbzero {
+class DebugAnnotation;
+}  // namespace pbzero
+}  // namespace protos
+
 namespace internal {
 class TrackEventInternal;
 }
@@ -31,9 +37,10 @@
 //
 //   TRACE_EVENT_BEGIN("category", "Title",
 //                     [](perfetto::EventContext ctx) {
-//                       auto* dbg = ctx.event()->add_debug_annotations();
-//                       dbg->set_name("name");
-//                       dbg->set_int_value(1234);
+//                       auto* log = ctx.event()->set_log_message();
+//                       log->set_body_iid(1234);
+//
+//                       ctx.AddDebugAnnotation("name", 1234);
 //                     });
 //
 class PERFETTO_EXPORT EventContext {
@@ -75,6 +82,18 @@
     return TracedProto<MessageType>(message, this);
   }
 
+  // Add a new `debug_annotation` proto message and populate it from |value|
+  // using perfetto::TracedValue API. Users should generally prefer passing
+  // values directly to TRACE_EVENT (i.e. TRACE_EVENT(..., "arg", value, ...);)
+  // but in rare cases (e.g. when an argument should be written conditionally)
+  // EventContext::AddDebugAnnotation provides an explicit equivalent.
+  template <typename T>
+  void AddDebugAnnotation(const char* name, T&& value) {
+    auto annotation = AddDebugAnnotation(name);
+    WriteIntoTracedValue(internal::CreateTracedValueFromProto(annotation),
+                         std::forward<T>(value));
+  }
+
  private:
   template <typename, size_t, typename, typename>
   friend class TrackEventInternedDataIndex;
@@ -86,6 +105,8 @@
   EventContext(TracePacketHandle, internal::TrackEventIncrementalState*);
   EventContext(const EventContext&) = delete;
 
+  protos::pbzero::DebugAnnotation* AddDebugAnnotation(const char* name);
+
   TracePacketHandle trace_packet_;
   protos::pbzero::TrackEvent* event_;
   internal::TrackEventIncrementalState* incremental_state_;
diff --git a/include/perfetto/tracing/internal/data_source_internal.h b/include/perfetto/tracing/internal/data_source_internal.h
index cb052c6..f628f41 100644
--- a/include/perfetto/tracing/internal/data_source_internal.h
+++ b/include/perfetto/tracing/internal/data_source_internal.h
@@ -61,6 +61,10 @@
   // doing extra pointr arithmetic.
   bool trace_lambda_enabled = false;
 
+  // The overall TracingMuxerImpl instance id, which gets incremented by
+  // ResetForTesting.
+  uint32_t muxer_id_for_testing = 0;
+
   // The central buffer id that all TraceWriter(s) created by this data source
   // must target.
   BufferId buffer_id = 0;
@@ -150,6 +154,7 @@
   void Reset() {
     trace_writer.reset();
     incremental_state.reset();
+    muxer_id_for_testing = 0;
     backend_id = 0;
     backend_connection_id = 0;
     buffer_id = 0;
@@ -161,6 +166,7 @@
   std::unique_ptr<TraceWriterBase> trace_writer;
   IncrementalStatePointer incremental_state = {nullptr, [](void*) {}};
   uint32_t incremental_state_generation;
+  uint32_t muxer_id_for_testing;
   TracingBackendId backend_id;
   uint32_t backend_connection_id;
   BufferId buffer_id;
diff --git a/include/perfetto/tracing/internal/track_event_internal.h b/include/perfetto/tracing/internal/track_event_internal.h
index afd874c..037dc09 100644
--- a/include/perfetto/tracing/internal/track_event_internal.h
+++ b/include/perfetto/tracing/internal/track_event_internal.h
@@ -150,6 +150,8 @@
 
   static void ResetIncrementalState(TraceWriterBase*, TraceTimestamp);
 
+  // TODO(altimin): Remove this method once Chrome uses
+  // EventContext::AddDebugAnnotation directly.
   template <typename T>
   static void AddDebugAnnotation(perfetto::EventContext* event_ctx,
                                  const char* name,
diff --git a/include/perfetto/tracing/internal/write_track_event_args.h b/include/perfetto/tracing/internal/write_track_event_args.h
index ab215ae..f6e3b7f 100644
--- a/include/perfetto/tracing/internal/write_track_event_args.h
+++ b/include/perfetto/tracing/internal/write_track_event_args.h
@@ -20,6 +20,7 @@
 #include "perfetto/base/compiler.h"
 #include "perfetto/tracing/event_context.h"
 #include "perfetto/tracing/traced_proto.h"
+#include "perfetto/tracing/track_event_args.h"
 
 namespace perfetto {
 namespace internal {
@@ -54,11 +55,28 @@
   return IsValidTraceLambdaImpl<T>(nullptr);
 }
 
+template <typename T>
+static constexpr bool IsValidTraceLambdaTakingReferenceImpl(
+    typename std::enable_if<static_cast<bool>(
+        sizeof(std::declval<T>()(std::declval<EventContext&>()), 0))>::type* =
+        nullptr) {
+  return true;
+}
+
+template <typename T>
+static constexpr bool IsValidTraceLambdaTakingReferenceImpl(...) {
+  return false;
+}
+
+template <typename T>
+static constexpr bool IsValidTraceLambdaTakingReference() {
+  return IsValidTraceLambdaTakingReferenceImpl<T>(nullptr);
+}
+
 }  // namespace
 
-// Write a lambda.
-// TODO(altimin): At the moment lambda takes EventContext, which is
-// non-copyable, so only one lambda is supported and it has to be the last
+// Write an old-style lambda taking an EventContext (without a reference)
+// as it will consume EventContext via std::move, it can only be the last
 // argument.
 template <typename ArgumentFunction,
           typename ArgFunctionCheck = typename std::enable_if<
@@ -76,6 +94,28 @@
                                                 ArgValue&& arg_value,
                                                 Args&&... args);
 
+template <typename FieldMetadataType, typename ArgValue, typename... Args>
+PERFETTO_ALWAYS_INLINE void WriteTrackEventArgs(
+    EventContext event_ctx,
+    protozero::proto_utils::internal::FieldMetadataHelper<FieldMetadataType>
+        field_name,
+    ArgValue&& arg_value,
+    Args&&... args);
+
+template <typename ArgumentFunction,
+          typename... Args,
+          typename ArgFunctionCheck = typename std::enable_if<
+              IsValidTraceLambdaTakingReference<ArgumentFunction>()>::type>
+PERFETTO_ALWAYS_INLINE void WriteTrackEventArgs(EventContext event_ctx,
+                                                ArgumentFunction arg_function,
+                                                Args&&... args) {
+  // |arg_function| will capture EventContext by reference, so std::move isn't
+  // needed.
+  arg_function(event_ctx);
+
+  WriteTrackEventArgs(std::move(event_ctx), std::forward<Args>(args)...);
+}
+
 // Write one typed message and recursively write the rest of the arguments.
 template <typename FieldMetadataType, typename ArgValue, typename... Args>
 PERFETTO_ALWAYS_INLINE void WriteTrackEventArgs(
@@ -105,8 +145,7 @@
                                                 const char* arg_name,
                                                 ArgValue&& arg_value,
                                                 Args&&... args) {
-  TrackEventInternal::AddDebugAnnotation(&event_ctx, arg_name,
-                                         std::forward<ArgValue>(arg_value));
+  event_ctx.AddDebugAnnotation(arg_name, std::forward<ArgValue>(arg_value));
   WriteTrackEventArgs(std::move(event_ctx), std::forward<Args>(args)...);
 }
 
diff --git a/include/perfetto/tracing/tracing.h b/include/perfetto/tracing/tracing.h
index 8a688f2..9aae9e8 100644
--- a/include/perfetto/tracing/tracing.h
+++ b/include/perfetto/tracing/tracing.h
@@ -177,6 +177,11 @@
   static std::unique_ptr<TracingSession> NewTrace(
       BackendType = kUnspecifiedBackend);
 
+  // Uninitialize Perfetto. Only exposed for testing scenarios where it can be
+  // guaranteed that no tracing sessions or other operations are happening when
+  // this call is made.
+  static void ResetForTesting();
+
  private:
   static void InitializeInternal(const TracingInitArgs&);
 
diff --git a/include/perfetto/tracing/track.h b/include/perfetto/tracing/track.h
index 8c22aad..0c3e968 100644
--- a/include/perfetto/tracing/track.h
+++ b/include/perfetto/tracing/track.h
@@ -36,6 +36,8 @@
 namespace internal {
 class TrackRegistry;
 }
+class Flow;
+class TerminatingFlow;
 
 // Track events are recorded on a timeline track, which maintains the relative
 // time ordering of all events on that track. Each thread has its own default
@@ -133,6 +135,8 @@
 
  private:
   friend class internal::TrackRegistry;
+  friend class Flow;
+  friend class TerminatingFlow;
   static uint64_t process_uuid;
 };
 
@@ -298,6 +302,7 @@
   ~TrackRegistry();
 
   static void InitializeInstance();
+  static void ResetForTesting();
   static TrackRegistry* Get() { return instance_; }
 
   void EraseTrack(Track);
diff --git a/include/perfetto/tracing/track_event_args.h b/include/perfetto/tracing/track_event_args.h
new file mode 100644
index 0000000..21bda02
--- /dev/null
+++ b/include/perfetto/tracing/track_event_args.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef INCLUDE_PERFETTO_TRACING_TRACK_EVENT_ARGS_H_
+#define INCLUDE_PERFETTO_TRACING_TRACK_EVENT_ARGS_H_
+
+#include "perfetto/tracing/event_context.h"
+#include "perfetto/tracing/track.h"
+
+#include <functional>
+
+namespace perfetto {
+
+// A helper to add |flow_id| as a non-terminating flow id to TRACE_EVENT
+// inline: TRACE_EVENT(..., perfetto::Flow::ProcessScoped(42));
+class Flow {
+ public:
+  // |flow_id| which is local within a given process (e.g. atomic counter xor'ed
+  // with feature-specific value). This value is xor'ed with Perfetto's internal
+  // process track id to attempt to ensure that it's globally-unique.
+  static PERFETTO_ALWAYS_INLINE inline std::function<void(EventContext&)>
+  ProcessScoped(uint64_t flow_id) {
+    return Global(flow_id ^ Track::process_uuid);
+  }
+
+  // Same as above, but construct an id from a pointer.
+  // NOTE: After the object is destroyed, the value of |ptr| can be reused for a
+  // different object (in particular if the object is allocated on a stack).
+  // Please ensure that you emit a trace event with the flow id of
+  // perfetto::TerminatingFlow::FromPointer(this) from the destructor of the
+  // object to avoid accidental conflicts.
+  static PERFETTO_ALWAYS_INLINE inline std::function<void(EventContext&)>
+  FromPointer(void* ptr) {
+    return ProcessScoped(reinterpret_cast<uintptr_t>(ptr));
+  }
+
+  // Add the |flow_id|. The caller is responsible for ensuring that it's
+  // globally-unique (e.g. by generating a random value). This should be used
+  // only for flow events which cross the process boundary (e.g. IPCs).
+  static PERFETTO_ALWAYS_INLINE inline std::function<void(EventContext&)>
+  Global(uint64_t flow_id) {
+    return [flow_id](perfetto::EventContext& ctx) {
+      ctx.event()->add_flow_ids(flow_id);
+    };
+  }
+
+  // TODO(altimin): Remove once converting a single usage in Chromium.
+  explicit constexpr Flow(uint64_t flow_id) : flow_id_(flow_id) {}
+
+  void operator()(EventContext& ctx) { ctx.event()->add_flow_ids(flow_id_); }
+
+ private:
+  uint64_t flow_id_;
+};
+
+// A helper to add a given |flow_id| as a terminating flow to TRACE_EVENT
+// inline.
+class TerminatingFlow {
+ public:
+  // See `Flow::ProcessScoped(uint64_t)`.
+  static PERFETTO_ALWAYS_INLINE inline std::function<void(EventContext&)>
+  ProcessScoped(uint64_t flow_id) {
+    return Global(flow_id ^ Track::process_uuid);
+  }
+
+  // See `Flow::FromPointer(void*)`.
+  static PERFETTO_ALWAYS_INLINE inline std::function<void(EventContext&)>
+  FromPointer(void* ptr) {
+    return ProcessScoped(reinterpret_cast<uintptr_t>(ptr));
+  }
+
+  // See `Flow::Global(uint64_t)`.
+  static PERFETTO_ALWAYS_INLINE inline std::function<void(EventContext&)>
+  Global(uint64_t flow_id) {
+    return [flow_id](perfetto::EventContext& ctx) {
+      ctx.event()->add_terminating_flow_ids(flow_id);
+    };
+  }
+};
+
+}  // namespace perfetto
+
+#endif  // INCLUDE_PERFETTO_TRACING_TRACK_EVENT_ARGS_H_
diff --git a/perfetto.rc b/perfetto.rc
index 55ff78f..a279736 100644
--- a/perfetto.rc
+++ b/perfetto.rc
@@ -90,3 +90,34 @@
 
 on property:persist.mm_events.enabled=false
     stop mm_events
+
+#############################################################################
+#  perfetto_trace_on_boot - Starts a perfetto trace on boot
+#############################################################################
+#
+# There are two separate actions (a trigger action and a start action) to make
+# sure that perfetto_trace_on_boot is started only once on boot (otherwise,
+# whenever persist.debug.perfetto.boottrace=1 is set, perfetto_trace_on_boot
+# would start immediately).
+#
+# persist.debug.perfetto.boottrace=1 can be manually set after boot (to record
+# a trace on the next reboot) and we don't want to immediately start a trace
+# when setting the debug property. So we turn "ro.persistent_properties.ready"
+# into a trigger, and then check whether we should start tracing when the
+# trigger fires.
+on perfetto_maybe_trace_on_boot && property:persist.debug.perfetto.boottrace=1 && property:persist.traced.enable=1
+    setprop persist.debug.perfetto.boottrace ""
+    rm /data/misc/perfetto-traces/boottrace.perfetto-trace
+    # Set by traced after listen()ing on the consumer socket. Without this,
+    # perfetto could try to connect to traced before traced is ready to listen.
+    wait_for_prop sys.trace.traced_started 1
+    start perfetto_trace_on_boot
+
+on property:ro.persistent_properties.ready=true
+    trigger perfetto_maybe_trace_on_boot
+
+service perfetto_trace_on_boot /system/bin/perfetto -c /data/misc/perfetto-configs/boottrace.pbtxt --txt -o /data/misc/perfetto-traces/boottrace.perfetto-trace
+    disabled
+    oneshot
+    user shell
+    group nobody
diff --git a/protos/perfetto/metrics/android/BUILD.gn b/protos/perfetto/metrics/android/BUILD.gn
index aa97665..9b0c5c0 100644
--- a/protos/perfetto/metrics/android/BUILD.gn
+++ b/protos/perfetto/metrics/android/BUILD.gn
@@ -21,6 +21,7 @@
   ]
   sources = [
     "batt_metric.proto",
+    "camera_metric.proto",
     "cpu_metric.proto",
     "display_metrics.proto",
     "dma_heap_metric.proto",
@@ -42,6 +43,7 @@
     "powrails_metric.proto",
     "process_metadata.proto",
     "profiler_smaps.proto",
+    "simpleperf.proto",
     "startup_metric.proto",
     "surfaceflinger.proto",
     "sysui_cuj_metrics.proto",
diff --git a/protos/perfetto/metrics/android/camera_metric.proto b/protos/perfetto/metrics/android/camera_metric.proto
new file mode 100644
index 0000000..932cd7e
--- /dev/null
+++ b/protos/perfetto/metrics/android/camera_metric.proto
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto2";
+
+package perfetto.protos;
+
+message AndroidCameraMetric {
+  message Counter {
+    optional double min = 1;
+    optional double max = 2;
+    optional double avg = 3;
+  }
+
+  // Counter for the sum of DMA and RSS across GCA, cameraserver
+  // and HAL. This provides a single number for the memory
+  // pressure using the camera is putting on the rest of the
+  // system.
+  //
+  // Note: this number assumes that all DMA pressure is coming
+  // from the camera as this is usually a pretty good
+  // approximation. Being more accurate here would increase the
+  // complexity of the metric significantly.
+  //
+  // Note: if there are multiple GCA/cameraserver/HAL processes
+  // in the trace, this metric will simply take the latest
+  // one in the trace and ignore the others.
+  optional Counter gc_rss_and_dma = 1;
+}
diff --git a/protos/perfetto/metrics/android/simpleperf.proto b/protos/perfetto/metrics/android/simpleperf.proto
new file mode 100644
index 0000000..7fcebe4
--- /dev/null
+++ b/protos/perfetto/metrics/android/simpleperf.proto
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto2";
+
+package perfetto.protos;
+
+// Metric that stores information related to atrace events generated by
+// simpleperf tool
+message AndroidSimpleperfMetric {
+  optional double urgent_ratio = 1;
+}
diff --git a/protos/perfetto/metrics/metrics.proto b/protos/perfetto/metrics/metrics.proto
index 5f2c801..36b131b 100644
--- a/protos/perfetto/metrics/metrics.proto
+++ b/protos/perfetto/metrics/metrics.proto
@@ -20,6 +20,7 @@
 
 import "protos/perfetto/metrics/android/batt_metric.proto";
 import "protos/perfetto/metrics/android/cpu_metric.proto";
+import "protos/perfetto/metrics/android/camera_metric.proto";
 import "protos/perfetto/metrics/android/display_metrics.proto";
 import "protos/perfetto/metrics/android/dma_heap_metric.proto";
 import "protos/perfetto/metrics/android/fastrpc_metric.proto";
@@ -39,6 +40,7 @@
 import "protos/perfetto/metrics/android/package_list.proto";
 import "protos/perfetto/metrics/android/powrails_metric.proto";
 import "protos/perfetto/metrics/android/profiler_smaps.proto";
+import "protos/perfetto/metrics/android/simpleperf.proto";
 import "protos/perfetto/metrics/android/startup_metric.proto";
 import "protos/perfetto/metrics/android/surfaceflinger.proto";
 import "protos/perfetto/metrics/android/sysui_cuj_metrics.proto";
@@ -91,7 +93,7 @@
 
 // Root message for all Perfetto-based metrics.
 //
-// Next id: 37
+// Next id: 39
 message TraceMetrics {
   reserved 4, 10, 13, 14, 16, 19;
 
@@ -184,6 +186,12 @@
   // Multiuser - metrics for switching users.
   optional AndroidMultiuserMetric android_multiuser = 36;
 
+  // Metrics related to simpleperf tool
+  optional AndroidSimpleperfMetric android_simpleperf = 37;
+
+  // Metrics for the Camera team.
+  optional AndroidCameraMetric android_camera = 38;
+
   // Demo extensions.
   extensions 450 to 499;
 
diff --git a/protos/perfetto/metrics/perfetto_merged_metrics.proto b/protos/perfetto/metrics/perfetto_merged_metrics.proto
index 15a119f..ff89c7b 100644
--- a/protos/perfetto/metrics/perfetto_merged_metrics.proto
+++ b/protos/perfetto/metrics/perfetto_merged_metrics.proto
@@ -56,6 +56,33 @@
 
 // End of protos/perfetto/metrics/android/batt_metric.proto
 
+// Begin of protos/perfetto/metrics/android/camera_metric.proto
+
+message AndroidCameraMetric {
+  message Counter {
+    optional double min = 1;
+    optional double max = 2;
+    optional double avg = 3;
+  }
+
+  // Counter for the sum of DMA and RSS across GCA, cameraserver
+  // and HAL. This provides a single number for the memory
+  // pressure using the camera is putting on the rest of the
+  // system.
+  //
+  // Note: this number assumes that all DMA pressure is coming
+  // from the camera as this is usually a pretty good
+  // approximation. Being more accurate here would increase the
+  // complexity of the metric significantly.
+  //
+  // Note: if there are multiple GCA/cameraserver/HAL processes
+  // in the trace, this metric will simply take the latest
+  // one in the trace and ignore the others.
+  optional Counter gc_rss_and_dma = 1;
+}
+
+// End of protos/perfetto/metrics/android/camera_metric.proto
+
 // Begin of protos/perfetto/metrics/android/cpu_metric.proto
 
 message AndroidCpuMetric {
@@ -791,6 +818,16 @@
 
 // End of protos/perfetto/metrics/android/profiler_smaps.proto
 
+// Begin of protos/perfetto/metrics/android/simpleperf.proto
+
+// Metric that stores information related to atrace events generated by
+// simpleperf tool
+message AndroidSimpleperfMetric {
+  optional double urgent_ratio = 1;
+}
+
+// End of protos/perfetto/metrics/android/simpleperf.proto
+
 // Begin of protos/perfetto/metrics/android/startup_metric.proto
 
 // Android app startup metrics.
@@ -1197,7 +1234,7 @@
 
 // Root message for all Perfetto-based metrics.
 //
-// Next id: 37
+// Next id: 39
 message TraceMetrics {
   reserved 4, 10, 13, 14, 16, 19;
 
@@ -1290,6 +1327,12 @@
   // Multiuser - metrics for switching users.
   optional AndroidMultiuserMetric android_multiuser = 36;
 
+  // Metrics related to simpleperf tool
+  optional AndroidSimpleperfMetric android_simpleperf = 37;
+
+  // Metrics for the Camera team.
+  optional AndroidCameraMetric android_camera = 38;
+
   // Demo extensions.
   extensions 450 to 499;
 
diff --git a/protos/third_party/chromium/chrome_track_event.proto b/protos/third_party/chromium/chrome_track_event.proto
index 1d832ff..850765b 100644
--- a/protos/third_party/chromium/chrome_track_event.proto
+++ b/protos/third_party/chromium/chrome_track_event.proto
@@ -307,9 +307,90 @@
   optional bool is_render_frame_proxy_live = 5;
 }
 
+message AndroidView {
+  optional int32 id = 1;
+  optional int32 parent_id = 2;
+  optional bool is_shown = 3;
+  optional bool is_dirty = 4;
+  optional string class_name = 5;
+  optional string resource_name = 6;
+}
+
+message AndroidActivity {
+  optional string name = 1;
+  repeated AndroidView view = 2;
+}
+
+message AndroidViewDump {
+  repeated AndroidActivity activity = 1;
+}
+
+message ParkableStringCompressInBackground {
+  // The size of the string that is being compressed, in bytes.
+  optional int32 size_bytes = 1;
+}
+
+message ParkableStringUnpark {
+  // The size of the string that is being unparked, in bytes.
+  optional int32 size_bytes = 1;
+
+  // The elapsed time since the string was written to disk (in seconds), or -1
+  // if it hadn't yet been written to disk.
+  optional int32 time_since_last_disk_write_sec = 2;
+}
+
+message ChromeSamplingProfilerSampleCollected {
+  optional int32 frame_count = 1;
+
+  enum WriteStatus {
+    WRITE_STATUS_NONE = 0;
+    WRITE_STATUS_BUFFERING_SAMPLE = 1;
+    WRITE_STATUS_WRITING_BUFFERED = 2;
+    WRITE_STATUS_WRITING_TO_TRACE = 3;
+  }
+  optional WriteStatus write_status = 2;
+}
+
+// Reports the latency caused by each breakdown in the
+// SendBeginMainFrameToCommit stage of the PipelineReporter.
+message SendBeginMainFrameToCommitBreakdown {
+  // Handle input events breakdown
+  optional uint64 handle_input_events_us = 1;
+
+  // Animate breakdown
+  optional uint64 animate_us = 2;
+
+  // Style update breakdown
+  optional uint64 style_update_us = 3;
+
+  // Layout update breakdown
+  optional uint64 layout_update_us = 4;
+
+  // Prepaint breakdown
+  optional uint64 prepaint_us = 5;
+
+  // Compositing inputs breakdown
+  optional uint64 compositing_inputs_us = 6;
+
+  // Compositing assignments breakdown
+  optional uint64 compositing_assignments_us = 7;
+
+  // Paint breakdown
+  optional uint64 paint_us = 8;
+
+  // Composite commit breakdown
+  optional uint64 composite_commit_us = 9;
+
+  // Update layers breakdown
+  optional uint64 update_layers_us = 10;
+
+  // Beginmainsenttostarted breakdown
+  optional uint64 begin_main_sent_to_started_us = 11;
+}
+
 message ChromeTrackEvent {
   // Extension range for Chrome: 1000-1999
-  // Next ID: 1022
+  // Next ID: 1026
   extend TrackEvent {
     optional ChromeAppState chrome_app_state = 1000;
 
@@ -356,5 +437,16 @@
     optional RenderViewHost render_view_host = 1020;
 
     optional RenderFrameProxyHost render_frame_proxy_host = 1021;
+
+    optional AndroidViewDump android_view_dump = 1022;
+
+    optional ParkableStringCompressInBackground
+        parkable_string_compress_in_background = 1023;
+    optional ParkableStringUnpark parkable_string_unpark = 1024;
+
+    optional ChromeSamplingProfilerSampleCollected
+        chrome_sampling_profiler_sample_completed = 1025;
+    optional SendBeginMainFrameToCommitBreakdown
+        send_begin_mainframe_to_commit_breakdown = 1026;
   }
 }
diff --git a/src/android_internal/atrace_hal.cc b/src/android_internal/atrace_hal.cc
index 486a44a..4a19d83 100644
--- a/src/android_internal/atrace_hal.cc
+++ b/src/android_internal/atrace_hal.cc
@@ -19,6 +19,8 @@
 #include <android/hardware/atrace/1.0/IAtraceDevice.h>
 #include <iostream>
 
+#include <string.h>
+
 namespace perfetto {
 namespace android_internal {
 
@@ -53,11 +55,9 @@
     for (int i = 0; i < *size_of_arr; ++i) {
       const TracingCategory& cat = r[i];
       TracingVendorCategory& result = categories[i];
-      strncpy(result.name, cat.name.c_str(), sizeof(result.name));
-      strncpy(result.description, cat.description.c_str(),
+      strlcpy(result.name, cat.name.c_str(), sizeof(result.name));
+      strlcpy(result.description, cat.description.c_str(),
               sizeof(result.description));
-      result.name[sizeof(result.name) - 1] = '\0';
-      result.description[sizeof(result.description) - 1] = '\0';
     }
   };
 
diff --git a/src/android_internal/power_stats.cc b/src/android_internal/power_stats.cc
index 34da342..f7badf6 100644
--- a/src/android_internal/power_stats.cc
+++ b/src/android_internal/power_stats.cc
@@ -164,12 +164,10 @@
         descriptor.index = rail_info.index;
         descriptor.sampling_rate = rail_info.samplingRate;
 
-        strncpy(descriptor.rail_name, rail_info.railName.c_str(),
+        strlcpy(descriptor.rail_name, rail_info.railName.c_str(),
                 sizeof(descriptor.rail_name));
-        strncpy(descriptor.subsys_name, rail_info.subsysName.c_str(),
+        strlcpy(descriptor.subsys_name, rail_info.subsysName.c_str(),
                 sizeof(descriptor.subsys_name));
-        descriptor.rail_name[sizeof(descriptor.rail_name) - 1] = '\0';
-        descriptor.subsys_name[sizeof(descriptor.subsys_name) - 1] = '\0';
       }
     }
   };
@@ -263,10 +261,8 @@
     auto& cur = descriptor[(*size_of_arr)++];
     cur.index = result.id;
     cur.sampling_rate = 0;
-    strncpy(cur.rail_name, result.name.c_str(), sizeof(cur.rail_name));
-    strncpy(cur.subsys_name, result.subsystem.c_str(), sizeof(cur.subsys_name));
-    cur.rail_name[sizeof(cur.rail_name) - 1] = '\0';
-    cur.subsys_name[sizeof(cur.subsys_name) - 1] = '\0';
+    strlcpy(cur.rail_name, result.name.c_str(), sizeof(cur.rail_name));
+    strlcpy(cur.subsys_name, result.subsystem.c_str(), sizeof(cur.subsys_name));
   }
   return true;
 }
@@ -333,10 +329,8 @@
     auto& cur = consumers[(*size_of_arr)++];
     cur.energy_consumer_id = result.id;
     cur.ordinal = result.ordinal;
-    strncpy(cur.type, aidl::toString(result.type).c_str(), sizeof(cur.type));
-    cur.type[sizeof(cur.type) - 1] = '\0';
-    strncpy(cur.name, result.name.c_str(), sizeof(cur.name));
-    cur.name[sizeof(cur.name) - 1] = '\0';
+    strlcpy(cur.type, aidl::toString(result.type).c_str(), sizeof(cur.type));
+    strlcpy(cur.name, result.name.c_str(), sizeof(cur.name));
   }
   return true;
 }
diff --git a/src/base/BUILD.gn b/src/base/BUILD.gn
index 2434460..5d5d645 100644
--- a/src/base/BUILD.gn
+++ b/src/base/BUILD.gn
@@ -31,10 +31,12 @@
     "../../include/perfetto/ext/base",
   ]
   sources = [
+    "crash_keys.cc",
     "ctrl_c_handler.cc",
     "event_fd.cc",
     "file_utils.cc",
     "getopt_compat.cc",
+    "log_ring_buffer.h",
     "logging.cc",
     "metatrace.cc",
     "paged_memory.cc",
diff --git a/src/base/crash_keys.cc b/src/base/crash_keys.cc
new file mode 100644
index 0000000..5c975df
--- /dev/null
+++ b/src/base/crash_keys.cc
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "perfetto/ext/base/crash_keys.h"
+
+#include <string.h>
+
+#include <atomic>
+#include <cinttypes>
+
+#include "perfetto/ext/base/string_utils.h"
+
+namespace perfetto {
+namespace base {
+
+namespace {
+
+constexpr size_t kMaxKeys = 32;
+
+std::atomic<CrashKey*> g_keys[kMaxKeys]{};
+std::atomic<uint32_t> g_num_keys{};
+}  // namespace
+
+void CrashKey::Register() {
+  // If doesn't matter if we fail below. If there are no slots left, don't
+  // keep trying re-registering on every Set(), the outcome won't change.
+
+  // If two threads raced on the Register(), avoid registering the key twice.
+  if (registered_.exchange(true))
+    return;
+
+  uint32_t slot = g_num_keys.fetch_add(1);
+  if (slot >= kMaxKeys) {
+    PERFETTO_LOG("Too many crash keys registered");
+    return;
+  }
+  g_keys[slot].store(this);
+}
+
+// Returns the number of chars written, without counting the \0.
+size_t CrashKey::ToString(char* dst, size_t len) {
+  if (len > 0)
+    *dst = '\0';
+  switch (type_) {
+    case Type::kUnset:
+      break;
+    case Type::kInt:
+      return SprintfTrunc(dst, len, "%s: %" PRId64 "\n", name_, int_value_);
+    case Type::kStr:
+      // Don't assume |str_value_| is properly null-terminated.
+      return SprintfTrunc(dst, len, "%s: %.*s\n", name_,
+                          int(sizeof(str_value_)), str_value_);
+  }
+  return 0;
+}
+
+void UnregisterAllCrashKeysForTesting() {
+  g_num_keys.store(0);
+  for (auto& key : g_keys)
+    key.store(nullptr);
+}
+
+size_t SerializeCrashKeys(char* dst, size_t len) {
+  size_t written = 0;
+  uint32_t num_keys = g_num_keys.load();
+  if (len > 0)
+    *dst = '\0';
+  for (uint32_t i = 0; i < num_keys && written < len; i++) {
+    CrashKey* key = g_keys[i].load();
+    if (!key)
+      continue;  // Can happen if we hit this between the add and the store.
+    written += key->ToString(dst + written, len - written);
+  }
+  PERFETTO_DCHECK(written <= len);
+  PERFETTO_DCHECK(len == 0 || dst[written] == '\0');
+  return written;
+}
+
+}  // namespace base
+}  // namespace perfetto
diff --git a/src/base/debug_crash_stack_trace.cc b/src/base/debug_crash_stack_trace.cc
index 2a55d2b..4a16f29 100644
--- a/src/base/debug_crash_stack_trace.cc
+++ b/src/base/debug_crash_stack_trace.cc
@@ -168,7 +168,7 @@
                                  const char* function) -> int {
       SymbolInfo* psym = reinterpret_cast<SymbolInfo*>(data);
       if (function)
-        strncpy(psym->sym_name, function, sizeof(psym->sym_name));
+        snprintf(psym->sym_name, sizeof(psym->sym_name), "%s", function);
       if (filename) {
         snprintf(psym->file_name, sizeof(psym->file_name), "%s:%d", filename,
                  lineno);
@@ -180,7 +180,7 @@
     Dl_info dl_info = {};
     int res = dladdr(reinterpret_cast<void*>(frames[i]), &dl_info);
     if (res && dl_info.dli_sname)
-      strncpy(sym.sym_name, dl_info.dli_sname, sizeof(sym.sym_name));
+      snprintf(sym.sym_name, sizeof(sym.sym_name), "%s", dl_info.dli_sname);
 #endif
 
     Print("\n#");
@@ -193,7 +193,7 @@
       char* demangled =
           abi::__cxa_demangle(sym.sym_name, g_demangled_name, &len, &ignored);
       if (demangled) {
-        strncpy(sym.sym_name, demangled, sizeof(sym.sym_name));
+        snprintf(sym.sym_name, sizeof(sym.sym_name), "%s", demangled);
         // In the exceptional case of demangling something > kDemangledNameLen,
         // __cxa_demangle will realloc(). In that case the malloc()-ed pointer
         // might be moved.
@@ -237,11 +237,9 @@
 }  // namespace
 
 namespace perfetto {
-// __attribute__((constructor)) causes a static initializer that automagically
-// early runs this function before the main().
-void PERFETTO_EXPORT __attribute__((constructor))
-EnableStacktraceOnCrashForDebug();
+namespace base {
 
+// The prototype for this function is in logging.h.
 void EnableStacktraceOnCrashForDebug() {
   if (g_sighandler_registered)
     return;
@@ -264,6 +262,7 @@
   // (ii) the output of death test is not visible.
   pthread_atfork(nullptr, nullptr, &RestoreSignalHandlers);
 }
+}  // namespace base
 }  // namespace perfetto
 
 #pragma GCC diagnostic pop
diff --git a/src/base/log_ring_buffer.h b/src/base/log_ring_buffer.h
new file mode 100644
index 0000000..43a06ab
--- /dev/null
+++ b/src/base/log_ring_buffer.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_BASE_LOG_RING_BUFFER_H_
+#define SRC_BASE_LOG_RING_BUFFER_H_
+
+#include <stddef.h>
+#include <stdio.h>
+
+#include <array>
+#include <atomic>
+
+#include "perfetto/ext/base/string_view.h"
+#include "perfetto/ext/base/thread_annotations.h"
+
+namespace perfetto {
+namespace base {
+
+// Defined out of line because a static constexpr requires static storage if
+// ODR-used, not worth adding a .cc file just for tests.
+constexpr size_t kLogRingBufEntries = 8;
+constexpr size_t kLogRingBufMsgLen = 256;
+
+// A static non-allocating ring-buffer to hold the most recent log events.
+// This class is really an implementation detail of logging.cc. The only reason
+// why is fully defined in a dedicated header is for allowing unittesting,
+// without leaking extra headers into logging.h (which is a high-fanout header).
+// This is used to report the last logs in a crash report when a CHECK/FATAL
+// is encountered.
+// This class has just an Append() method to insert events into the buffer and
+// a Read() to read the events in FIFO order. Read() is non-destructive.
+//
+// Thread safety considerations:
+// - The Append() method can be called concurrently by several threads, unless
+//   there are > kLogRingBufEntries concurrent threads. Even if that happens,
+//   case some events will contain a mix of strings but the behavior of
+//   futher Append() and Read() is still defined.
+// - The Read() method is not thread safe but it's fine in practice. Even if
+//   it's called concurrently with other Append(), it only causes some partial
+//   events to be emitted in output.
+// In both cases, we never rely purely on \0, all operations are size-bound.
+//
+// See logging_unittest.cc for tests.
+class LogRingBuffer {
+ public:
+  LogRingBuffer() = default;
+  LogRingBuffer(const LogRingBuffer&) = delete;
+  LogRingBuffer& operator=(const LogRingBuffer&) = delete;
+  LogRingBuffer(LogRingBuffer&&) = delete;
+  LogRingBuffer& operator=(LogRingBuffer&&) = delete;
+
+  // This takes three arguments because it fits its only caller (logging.cc).
+  // The args are just concatenated together (plus one space before the msg).
+  void Append(StringView tstamp, StringView source, StringView log_msg) {
+    // Reserve atomically a slot in the ring buffer, so any concurrent Append()
+    // won't overlap (unless too many concurrent Append() happen together).
+    // There is no strict synchronization here, |event_slot_| is atomic only for
+    // the sake of avoiding colliding on the same slot but does NOT guarantee
+    // full consistency and integrity of the log messages written in each slot.
+    // A release-store (or acq+rel) won't be enough for full consistency. Two
+    // threads that race on Append() and take the N+1 and N+2 slots could finish
+    // the write in reverse order. So Read() would need to synchronize with
+    // something else (either a per-slot atomic flag or with a second atomic
+    // counter which is incremented after the snprintf). Both options increase
+    // the cost of Append() with no huge benefits (90% of the perfetto services
+    // where we use it is single thread, and the log ring buffer is disabled
+    // on non-standalone builds like the SDK).
+    uint32_t slot = event_slot_.fetch_add(1, std::memory_order_relaxed);
+    slot = slot % kLogRingBufEntries;
+
+    char* const msg = events_[slot];
+    PERFETTO_ANNOTATE_BENIGN_RACE_SIZED(msg, kLogRingBufMsgLen,
+                                        "see comments in log_ring_buffer.h")
+    snprintf(msg, kLogRingBufMsgLen, "%.*s%.*s %.*s",
+             static_cast<int>(tstamp.size()), tstamp.data(),
+             static_cast<int>(source.size()), source.data(),
+             static_cast<int>(log_msg.size()), log_msg.data());
+  }
+
+  // Reads back the buffer in FIFO order, up to |len - 1| characters at most
+  // (the -1 is because a NUL terminator is always appended, unless |len| == 0).
+  // The string written in |dst| is guaranteed to be NUL-terminated, even if
+  // |len| < buffer contents length.
+  // Returns the number of bytes written in output, excluding the \0 terminator.
+  size_t Read(char* dst, size_t len) {
+    if (len == 0)
+      return 0;
+    // This is a relaxed-load because we don't need to fully synchronize on the
+    // writing path for the reasons described in the fetch_add() above.
+    const uint32_t event_slot = event_slot_.load(std::memory_order_relaxed);
+    size_t dst_written = 0;
+    for (uint32_t pos = 0; pos < kLogRingBufEntries; ++pos) {
+      const uint32_t slot = (event_slot + pos) % kLogRingBufEntries;
+      const char* src = events_[slot];
+      if (*src == '\0')
+        continue;  // Empty slot. Skip.
+      char* const wptr = dst + dst_written;
+      // |src| might not be null terminated. This can happen if some
+      // thread-race happened. Limit the copy length.
+      const size_t limit = std::min(len - dst_written, kLogRingBufMsgLen);
+      for (size_t i = 0; i < limit; ++i) {
+        const char c = src[i];
+        ++dst_written;
+        if (c == '\0' || i == limit - 1) {
+          wptr[i] = '\n';
+          break;
+        }
+        // Skip non-printable ASCII characters to avoid confusing crash reports.
+        // Note that this deliberately mangles \n. Log messages should not have
+        // a \n in the middle and are NOT \n terminated. The trailing \n between
+        // each line is appended by the if () branch above.
+        const bool is_printable = c >= ' ' && c <= '~';
+        wptr[i] = is_printable ? c : '?';
+      }
+    }
+    // Ensure that the output string is null-terminated.
+    PERFETTO_DCHECK(dst_written <= len);
+    if (dst_written == len) {
+      // In case of truncation we replace the last char with \0. But the return
+      // value is the number of chars without \0, hence the --.
+      dst[--dst_written] = '\0';
+    } else {
+      dst[dst_written] = '\0';
+    }
+    return dst_written;
+  }
+
+ private:
+  using EventBuf = char[kLogRingBufMsgLen];
+  EventBuf events_[kLogRingBufEntries]{};
+
+  static_assert((kLogRingBufEntries & (kLogRingBufEntries - 1)) == 0,
+                "kLogRingBufEntries must be a power of two");
+
+  // A monotonically increasing counter incremented on each event written.
+  // It determines which of the kLogRingBufEntries indexes in |events_| should
+  // be used next.
+  // It grows >> kLogRingBufEntries, it's supposed to be always used
+  // mod(kLogRingBufEntries). A static_assert in the .cc file ensures that
+  // kLogRingBufEntries is a power of two so wraps are aligned.
+  std::atomic<uint32_t> event_slot_{};
+};
+
+}  // namespace base
+}  // namespace perfetto
+
+#endif  // SRC_BASE_LOG_RING_BUFFER_H_
diff --git a/src/base/logging.cc b/src/base/logging.cc
index cc47b9b..7f54605 100644
--- a/src/base/logging.cc
+++ b/src/base/logging.cc
@@ -26,7 +26,16 @@
 #include <atomic>
 #include <memory>
 
+#include "perfetto/base/build_config.h"
 #include "perfetto/base/time.h"
+#include "perfetto/ext/base/crash_keys.h"
+#include "perfetto/ext/base/string_utils.h"
+#include "perfetto/ext/base/string_view.h"
+#include "src/base/log_ring_buffer.h"
+
+#if PERFETTO_ENABLE_LOG_RING_BUFFER() && PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
+#include <android/set_abort_message.h>
+#endif
 
 namespace perfetto {
 namespace base {
@@ -41,6 +50,28 @@
 
 std::atomic<LogMessageCallback> g_log_callback{};
 
+#if PERFETTO_BUILDFLAG(PERFETTO_STDERR_CRASH_DUMP)
+// __attribute__((constructor)) causes a static initializer that automagically
+// early runs this function before the main().
+void PERFETTO_EXPORT __attribute__((constructor)) InitDebugCrashReporter() {
+  // This function is defined in debug_crash_stack_trace.cc.
+  // The dynamic initializer is in logging.cc because logging.cc is included
+  // in virtually any target that depends on base. Having it in
+  // debug_crash_stack_trace.cc would require figuring out -Wl,whole-archive
+  // which is not worth it.
+  EnableStacktraceOnCrashForDebug();
+}
+#endif
+
+#if PERFETTO_ENABLE_LOG_RING_BUFFER()
+LogRingBuffer g_log_ring_buffer{};
+
+// This is global to avoid allocating memory or growing too much the stack
+// in MaybeSerializeLastLogsForCrashReporting(), which is called from
+// arbitrary code paths hitting PERFETTO_CHECK()/FATAL().
+char g_crash_buf[kLogRingBufEntries * kLogRingBufMsgLen];
+#endif
+
 }  // namespace
 
 void SetLogMessageCallback(LogMessageCallback callback) {
@@ -55,6 +86,7 @@
   char stack_buf[512];
   std::unique_ptr<char[]> large_buf;
   char* log_msg = &stack_buf[0];
+  size_t log_msg_len = 0;
 
   // By default use a stack allocated buffer because most log messages are quite
   // short. In rare cases they can be larger (e.g. --help). In those cases we
@@ -69,14 +101,18 @@
     // it. The code below will attach the filename and line, which is still
     // useful.
     if (res < 0) {
-      strncpy(log_msg, "[printf format error]", max_len);
+      snprintf(log_msg, max_len, "%s", "[printf format error]");
       break;
     }
 
     // if res == max_len, vsnprintf saturated the input buffer. Retry with a
     // larger buffer in that case (within reasonable limits).
-    if (res < static_cast<int>(max_len) || max_len >= 128 * 1024)
+    if (res < static_cast<int>(max_len) || max_len >= 128 * 1024) {
+      // In case of truncation vsnprintf returns the len that "would have been
+      // written if the string was longer", not the actual chars written.
+      log_msg_len = std::min(static_cast<size_t>(res), max_len - 1);
       break;
+    }
     max_len *= 4;
     large_buf.reset(new char[max_len]);
     log_msg = &large_buf[0];
@@ -114,44 +150,85 @@
 
   // Formats file.cc:line as a space-padded fixed width string. If the file name
   // |fname| is too long, truncate it on the left-hand side.
-  char line_str[10];
-  size_t line_len =
-      static_cast<size_t>(snprintf(line_str, sizeof(line_str), "%d", line));
+  StackString<10> line_str("%d", line);
 
   // 24 will be the width of the file.cc:line column in the log event.
-  char file_and_line[24];
+  static constexpr size_t kMaxNameAndLine = 24;
   size_t fname_len = strlen(fname);
-  size_t fname_max = sizeof(file_and_line) - line_len - 2;  // 2 = ':' + '\0'.
+  size_t fname_max = kMaxNameAndLine - line_str.len() - 2;  // 2 = ':' + '\0'.
   size_t fname_offset = fname_len <= fname_max ? 0 : fname_len - fname_max;
-  int len = snprintf(file_and_line, sizeof(file_and_line), "%s:%s",
-                     fname + fname_offset, line_str);
-  memset(&file_and_line[len], ' ', sizeof(file_and_line) - size_t(len));
-  file_and_line[sizeof(file_and_line) - 1] = '\0';
+  StackString<kMaxNameAndLine> file_and_line(
+      "%*s:%s", static_cast<int>(fname_max), &fname[fname_offset],
+      line_str.c_str());
 
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
   // Logcat has already timestamping, don't re-emit it.
   __android_log_print(ANDROID_LOG_DEBUG + level, "perfetto", "%s %s",
-                      file_and_line, log_msg);
+                      file_and_line.c_str(), log_msg);
 #endif
 
   // When printing on stderr, print also the timestamp. We don't really care
   // about the actual time. We just need some reference clock that can be used
   // to correlated events across differrent processses (e.g. traced and
   // traced_probes). The wall time % 1000 is good enough.
-  char timestamp[32];
   uint32_t t_ms = static_cast<uint32_t>(GetWallTimeMs().count());
   uint32_t t_sec = t_ms / 1000;
   t_ms -= t_sec * 1000;
   t_sec = t_sec % 1000;
-  snprintf(timestamp, sizeof(timestamp), "[%03u.%03u] ", t_sec, t_ms);
+  StackString<32> timestamp("[%03u.%03u] ", t_sec, t_ms);
 
   if (use_colors) {
-    fprintf(stderr, "%s%s%s%s %s%s%s\n", kLightGray, timestamp, file_and_line,
-            kReset, color, log_msg, kReset);
+    fprintf(stderr, "%s%s%s%s %s%s%s\n", kLightGray, timestamp.c_str(),
+            file_and_line.c_str(), kReset, color, log_msg, kReset);
   } else {
-    fprintf(stderr, "%s%s %s\n", timestamp, file_and_line, log_msg);
+    fprintf(stderr, "%s%s %s\n", timestamp.c_str(), file_and_line.c_str(),
+            log_msg);
   }
+
+#if PERFETTO_ENABLE_LOG_RING_BUFFER()
+  // Append the message to the ring buffer for crash reporting postmortems.
+  StringView timestamp_sv = timestamp.string_view();
+  StringView file_and_line_sv = file_and_line.string_view();
+  StringView log_msg_sv(log_msg, static_cast<size_t>(log_msg_len));
+  g_log_ring_buffer.Append(timestamp_sv, file_and_line_sv, log_msg_sv);
+#else
+  ignore_result(log_msg_len);
+#endif
 }
 
+#if PERFETTO_ENABLE_LOG_RING_BUFFER()
+void MaybeSerializeLastLogsForCrashReporting() {
+  // Keep this function minimal. This is called from the watchdog thread, often
+  // when the system is thrashing.
+
+  // This is racy because two threads could hit a CHECK/FATAL at the same time.
+  // But if that happens we have bigger problems, not worth designing around it.
+  // The behaviour is still defined in the race case (the string attached to
+  // the crash report will contain a mixture of log strings).
+  size_t wr = 0;
+  wr += SerializeCrashKeys(&g_crash_buf[wr], sizeof(g_crash_buf) - wr);
+  wr += g_log_ring_buffer.Read(&g_crash_buf[wr], sizeof(g_crash_buf) - wr);
+
+  // Read() null-terminates the string properly. This is just to avoid UB when
+  // two threads race on each other (T1 writes a shorter string, T2
+  // overwrites the \0 writing a longer string. T1 continues here before T2
+  // finishes writing the longer string with the \0 -> boom.
+  g_crash_buf[sizeof(g_crash_buf) - 1] = '\0';
+
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
+  // android_set_abort_message() will cause debuggerd to report the message
+  // in the tombstone and in the crash log in logcat.
+  // NOTE: android_set_abort_message() can be called only once. This should
+  // be called only when we are sure we are about to crash.
+  android_set_abort_message(g_crash_buf);
+#else
+  // Print out the message on stderr on Linux/Mac/Win.
+  fputs("\n-----BEGIN PERFETTO PRE-CRASH LOG-----\n", stderr);
+  fputs(g_crash_buf, stderr);
+  fputs("\n-----END PERFETTO PRE-CRASH LOG-----\n", stderr);
+#endif
+}
+#endif  // PERFETTO_ENABLE_LOG_RING_BUFFER
+
 }  // namespace base
 }  // namespace perfetto
diff --git a/src/base/logging_unittest.cc b/src/base/logging_unittest.cc
index 5349290..ee042bd 100644
--- a/src/base/logging_unittest.cc
+++ b/src/base/logging_unittest.cc
@@ -18,6 +18,14 @@
 
 #include <stdint.h>
 
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+#include <vector>
+
+#include "perfetto/ext/base/crash_keys.h"
+#include "perfetto/ext/base/string_utils.h"
+#include "src/base/log_ring_buffer.h"
 #include "test/gtest_and_gmock.h"
 
 namespace perfetto {
@@ -45,6 +53,205 @@
   ASSERT_STREQ(g_last_line, "");
 }
 
+TEST(LogRingBufferTest, SimpleCases) {
+  char buf[4096];
+  memset(buf, 'x', sizeof(buf));  // Deliberately not 0-initialized.
+
+  LogRingBuffer lrb;
+  EXPECT_EQ(0u, lrb.Read(buf, sizeof(buf)));
+  EXPECT_STREQ(buf, "");
+
+  // Append one entry and read back.
+  lrb.Append("tstamp1,", "src1.cc", "message1");
+  EXPECT_EQ(25u, lrb.Read(buf, sizeof(buf)));
+  EXPECT_STREQ(buf, "tstamp1,src1.cc message1\n");
+
+  lrb.Append("tstamp2,", "src2.cc", "message2");
+  EXPECT_EQ(50u, lrb.Read(buf, sizeof(buf)));
+  EXPECT_STREQ(buf, "tstamp1,src1.cc message1\ntstamp2,src2.cc message2\n");
+}
+
+TEST(LogRingBufferTest, Truncation) {
+  // Append a long entry that overflows the event slot.
+  std::string long_msg;
+  long_msg.resize(kLogRingBufMsgLen * 2);
+  for (size_t i = 0; i < long_msg.size(); i++)
+    long_msg[i] = static_cast<char>('a' + (i % 27));
+  LogRingBuffer lrb;
+  lrb.Append("A", "B", StringView(long_msg));
+
+  // Check that it gets truncated with no side effects.
+  char buf[4096];
+  memset(buf, 'x', sizeof(buf));  // Deliberately not 0-initialized.
+  auto expected = "AB " + long_msg.substr(0, kLogRingBufMsgLen - 4) + "\n";
+  EXPECT_EQ(expected.size(), lrb.Read(buf, sizeof(buf)));
+  EXPECT_EQ(buf, expected);
+
+  // Append a short message and check everything still works.
+  lrb.Append("X", "Y", "foo");
+  EXPECT_EQ(expected.size() + 7, lrb.Read(buf, sizeof(buf)));
+  EXPECT_EQ(buf, expected + "XY foo\n");
+}
+
+TEST(LogRingBufferTest, Wrapping) {
+  LogRingBuffer lrb;
+
+  std::vector<std::string> expected_logs;
+  for (uint32_t i = 0; i < 128; i++) {
+    std::string id = std::to_string(i);
+    std::string tstamp = "tstamp" + id + ",";
+    std::string src = "src";
+    std::string msg;
+    msg.resize(1 + (i % 16));
+    for (size_t c = 0; c < msg.size(); c++)
+      msg[c] = static_cast<char>('a' + c);
+    lrb.Append(StringView(tstamp), StringView(src), StringView(msg));
+    auto expected_log =
+        (tstamp + src + " " + msg).substr(0, kLogRingBufMsgLen) + "\n";
+    expected_logs.emplace_back(expected_log);
+  }
+
+  std::string expected;
+  for (size_t i = expected_logs.size() - kLogRingBufEntries;
+       i < expected_logs.size(); i++) {
+    expected += expected_logs[i];
+  }
+
+  char buf[kLogRingBufMsgLen * kLogRingBufEntries];
+  memset(buf, 'x', sizeof(buf));  // Deliberately not 0-initialized.
+  lrb.Read(buf, sizeof(buf));
+  EXPECT_EQ(buf, expected);
+
+  // Do a partial readback which will cause output truncation.
+  lrb.Read(buf, 127);
+  EXPECT_EQ(buf, expected.substr(0, 127 - 1));  // - 1 for the NUL terminator.
+}
+
+// Writes concurrently into the ring buffer and check that all the events are
+// seen in some order.
+TEST(LogRingBufferTest, MultiThreadedWrites) {
+  LogRingBuffer lrb;
+
+  std::vector<std::thread> threads;
+  const size_t kNumThreads = 8;
+
+  std::mutex mutex;
+  std::condition_variable cond;
+  bool sync_start = false;
+
+  auto thread_main = [&](size_t thread_idx) {
+    std::unique_lock<std::mutex> lock(mutex);
+    cond.wait(lock, [&] { return sync_start; });
+
+    std::string tstamp = "ts" + std::to_string(thread_idx) + ",";
+    std::string src = "src";
+    std::string msg(thread_idx + 1, '.');  // A variable number of dots.
+    lrb.Append(StringView(tstamp), StringView(src), StringView(msg));
+  };
+
+  std::vector<std::string> expected_events;
+  for (size_t i = 0; i < kNumThreads; i++) {
+    threads.emplace_back(thread_main, i);
+    std::string id = std::to_string(i);
+    expected_events.emplace_back("ts" + id + ",src " + std::string(i + 1, '.'));
+  }
+
+  // Unlock all the threads as close as possible to maximize races.
+  {
+    std::unique_lock<std::mutex> lock(mutex);
+    sync_start = true;
+    cond.notify_all();
+  }
+
+  for (auto& thread : threads)
+    thread.join();
+
+  char buf[kLogRingBufEntries * 40];
+  memset(buf, 'x', sizeof(buf));  // Deliberately not 0-initialized.
+  lrb.Read(buf, sizeof(buf));
+
+  std::vector<std::string> actual_events = SplitString(buf, "\n");
+  EXPECT_THAT(actual_events,
+              testing::UnorderedElementsAreArray(expected_events));
+}
+
+TEST(CrashKeysTest, SetClearAndLongKeys) {
+  UnregisterAllCrashKeysForTesting();
+
+  char buf[1024];
+  memset(buf, 'x', sizeof(buf));
+  EXPECT_EQ(0u, SerializeCrashKeys(buf, sizeof(buf)));
+  EXPECT_STREQ(buf, "");
+
+  CrashKey k1("key1");
+  CrashKey k2("key2");
+  CrashKey k3("key3");
+  CrashKey k4("key4");
+
+  k1.Set(0);
+  k1.Clear();
+
+  k2.Set(42);
+
+  k3.Set("xx");
+  k3.Clear();
+
+  k4.Set("value");
+
+  EXPECT_EQ(21u, SerializeCrashKeys(buf, sizeof(buf)));
+  EXPECT_STREQ(buf, "key2: 42\nkey4: value\n");
+
+  EXPECT_EQ(0u, SerializeCrashKeys(buf, 0));
+
+  EXPECT_EQ(0u, SerializeCrashKeys(buf, 1));
+  EXPECT_STREQ(buf, "");
+
+  // Test truncated output.
+  EXPECT_EQ(5u, SerializeCrashKeys(buf, 5 + 1));
+  EXPECT_STREQ(buf, "key2:");
+
+  k2.Clear();
+
+  std::string long_str(1024, 'x');
+  k4.Set(StringView(long_str));
+
+  EXPECT_EQ(6 + kCrashKeyMaxStrSize, SerializeCrashKeys(buf, sizeof(buf)));
+  std::string expected =
+      "key4: " + long_str.substr(0, kCrashKeyMaxStrSize - 1) + "\n";
+  EXPECT_EQ(buf, expected);
+
+  UnregisterAllCrashKeysForTesting();
+}
+
+TEST(CrashKeysTest, ScopedSet) {
+  UnregisterAllCrashKeysForTesting();
+
+  char buf[1024];
+  memset(buf, 'x', sizeof(buf));
+
+  CrashKey k1("key1");
+  CrashKey k2("key2");
+
+  auto scoped_key = k1.SetScoped(42);
+  EXPECT_GT(SerializeCrashKeys(buf, sizeof(buf)), 0u);
+  EXPECT_STREQ(buf, "key1: 42\n");
+
+  {
+    auto scoped_key2 = k2.SetScoped("foo");
+    EXPECT_GT(SerializeCrashKeys(buf, sizeof(buf)), 0u);
+    EXPECT_STREQ(buf, "key1: 42\nkey2: foo\n");
+  }
+
+  EXPECT_GT(SerializeCrashKeys(buf, sizeof(buf)), 0u);
+  EXPECT_STREQ(buf, "key1: 42\n");
+
+  k1.Clear();
+  EXPECT_EQ(0u, SerializeCrashKeys(buf, sizeof(buf)));
+  EXPECT_STREQ(buf, "");
+
+  UnregisterAllCrashKeysForTesting();
+}
+
 }  // namespace
 }  // namespace base
 }  // namespace perfetto
diff --git a/src/base/string_utils.cc b/src/base/string_utils.cc
index 9d5ae24..af663b8 100644
--- a/src/base/string_utils.cc
+++ b/src/base/string_utils.cc
@@ -17,7 +17,9 @@
 #include "perfetto/ext/base/string_utils.h"
 
 #include <locale.h>
+#include <stdarg.h>
 #include <string.h>
+
 #include <algorithm>
 
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_APPLE)
@@ -26,6 +28,7 @@
 
 #include <cinttypes>
 
+#include "perfetto/base/compiler.h"
 #include "perfetto/base/logging.h"
 
 namespace perfetto {
@@ -166,9 +169,8 @@
   size_t max_size = 11;  // Max uint32 is 0xFFFFFFFF + 1 for null byte.
   std::string buf;
   buf.resize(max_size);
-  auto final_size = snprintf(&buf[0], max_size, "0x%02x", number);
-  PERFETTO_DCHECK(final_size >= 0);
-  buf.resize(static_cast<size_t>(final_size));  // Cuts off the final null byte.
+  size_t final_len = SprintfTrunc(&buf[0], max_size, "0x%02x", number);
+  buf.resize(static_cast<size_t>(final_len));  // Cuts off the final null byte.
   return buf;
 }
 
@@ -180,9 +182,8 @@
   size_t max_size = 17;  // Max uint64 is FFFFFFFFFFFFFFFF + 1 for null byte.
   std::string buf;
   buf.resize(max_size);
-  auto final_size = snprintf(&buf[0], max_size, "%" PRIx64 "", number);
-  PERFETTO_DCHECK(final_size >= 0);
-  buf.resize(static_cast<size_t>(final_size));  // Cuts off the final null byte.
+  size_t final_len = SprintfTrunc(&buf[0], max_size, "%" PRIx64 "", number);
+  buf.resize(static_cast<size_t>(final_len));  // Cuts off the final null byte.
   return buf;
 }
 
@@ -252,5 +253,33 @@
   return out;
 }
 
+size_t SprintfTrunc(char* dst, size_t dst_size, const char* fmt, ...) {
+  if (PERFETTO_UNLIKELY(dst_size) == 0)
+    return 0;
+
+  va_list args;
+  va_start(args, fmt);
+  int src_size = vsnprintf(dst, dst_size, fmt, args);
+  va_end(args);
+
+  if (PERFETTO_UNLIKELY(src_size) <= 0) {
+    dst[0] = '\0';
+    return 0;
+  }
+
+  size_t res;
+  if (PERFETTO_LIKELY(src_size < static_cast<int>(dst_size))) {
+    // Most common case.
+    res = static_cast<size_t>(src_size);
+  } else {
+    // Truncation case.
+    res = dst_size - 1;
+  }
+
+  PERFETTO_DCHECK(res < dst_size);
+  PERFETTO_DCHECK(dst[res] == '\0');
+  return res;
+}
+
 }  // namespace base
 }  // namespace perfetto
diff --git a/src/base/string_utils_unittest.cc b/src/base/string_utils_unittest.cc
index 2c7108d..0787802 100644
--- a/src/base/string_utils_unittest.cc
+++ b/src/base/string_utils_unittest.cc
@@ -24,6 +24,13 @@
 namespace base {
 namespace {
 
+template <size_t N>
+struct UninitializedBuf {
+  UninitializedBuf() { memset(data, '?', sizeof(data)); }
+  operator char*() { return data; }
+  char data[N];
+};
+
 using testing::ElementsAre;
 
 TEST(StringUtilsTest, Lowercase) {
@@ -319,6 +326,126 @@
   EXPECT_EQ(Base64Encode(buffer.data(), buffer.size()), "+/A+B/w=");
 }
 
+TEST(StringUtilsTest, StringCopy) {
+  // Nothing should be written when |dst_size| = 0.
+  {
+    char dst[2] = {42, 43};
+    StringCopy(dst, "12345", 0);
+    EXPECT_EQ(42, dst[0]);
+    EXPECT_EQ(43, dst[1]);
+  }
+
+  // Nominal case, len(src) < sizeof(dst).
+  {
+    UninitializedBuf<10> dst;
+    StringCopy(dst, "1234567", sizeof(dst));
+    EXPECT_STREQ(dst, "1234567");
+  }
+
+  // Edge case where we perfectly fit including the \0.
+  {
+    UninitializedBuf<8> dst;
+    StringCopy(dst, "1234567", sizeof(dst));
+    EXPECT_STREQ(dst, "1234567");
+  }
+
+  // Edge case where |dst| is smaller by one char.
+  {
+    UninitializedBuf<8> dst;
+    StringCopy(dst, "12345678", sizeof(dst));
+    EXPECT_STREQ(dst, "1234567");
+  }
+
+  // Case when |dst| is smaller than |src|.
+  {
+    UninitializedBuf<3> dst;
+    StringCopy(dst, "12345678", sizeof(dst));
+    EXPECT_STREQ(dst, "12");
+  }
+}
+
+TEST(StringUtilsTest, SprintfTrunc) {
+  {
+    UninitializedBuf<3> dst;
+    ASSERT_EQ(0u, SprintfTrunc(dst, sizeof(dst), "%s", ""));
+    EXPECT_STREQ(dst, "");
+  }
+
+  {
+    char dst[3]{'O', 'K', '\0'};
+    ASSERT_EQ(0u, SprintfTrunc(dst, 0, "whatever"));
+    EXPECT_STREQ(dst, "OK");  // dst_size == 0 shouldn't touch the buffer.
+  }
+
+  {
+    UninitializedBuf<1> dst;
+    ASSERT_EQ(0u, SprintfTrunc(dst, sizeof(dst), "whatever"));
+    EXPECT_STREQ(dst, "");
+  }
+
+  {
+    UninitializedBuf<3> dst;
+    ASSERT_EQ(1u, SprintfTrunc(dst, sizeof(dst), "1"));
+    EXPECT_STREQ(dst, "1");
+  }
+
+  {
+    UninitializedBuf<3> dst;
+    ASSERT_EQ(2u, SprintfTrunc(dst, sizeof(dst), "12"));
+    EXPECT_STREQ(dst, "12");
+  }
+
+  {
+    UninitializedBuf<3> dst;
+    ASSERT_EQ(2u, SprintfTrunc(dst, sizeof(dst), "123"));
+    EXPECT_STREQ(dst, "12");
+  }
+
+  {
+    UninitializedBuf<3> dst;
+    ASSERT_EQ(2u, SprintfTrunc(dst, sizeof(dst), "1234"));
+    EXPECT_STREQ(dst, "12");
+  }
+
+  {
+    UninitializedBuf<11> dst;
+    ASSERT_EQ(10u, SprintfTrunc(dst, sizeof(dst), "a %d b %s", 42, "foo"));
+    EXPECT_STREQ(dst, "a 42 b foo");
+  }
+}
+
+TEST(StringUtilsTest, StackString) {
+  {
+    StackString<1> s("123");
+    EXPECT_EQ(0u, s.len());
+    EXPECT_STREQ("", s.c_str());
+  }
+
+  {
+    StackString<4> s("123");
+    EXPECT_EQ(3u, s.len());
+    EXPECT_STREQ("123", s.c_str());
+    EXPECT_EQ(s.ToStdString(), std::string(s.c_str()));
+    EXPECT_EQ(s.string_view().ToStdString(), s.ToStdString());
+  }
+
+  {
+    StackString<3> s("123");
+    EXPECT_EQ(2u, s.len());
+    EXPECT_STREQ("12", s.c_str());
+    EXPECT_EQ(s.ToStdString(), std::string(s.c_str()));
+    EXPECT_EQ(s.string_view().ToStdString(), s.ToStdString());
+  }
+
+  {
+    StackString<11> s("foo %d %s", 42, "bar!!!OVERFLOW");
+    EXPECT_EQ(10u, s.len());
+    EXPECT_STREQ("foo 42 bar", s.c_str());
+    EXPECT_EQ(s.ToStdString(), std::string(s.c_str()));
+    EXPECT_EQ(s.string_view().ToStdString(), s.ToStdString());
+  }
+}
+
 }  // namespace
 }  // namespace base
 }  // namespace perfetto
diff --git a/src/base/unix_socket_unittest.cc b/src/base/unix_socket_unittest.cc
index 9ef69c9..b16f3ff 100644
--- a/src/base/unix_socket_unittest.cc
+++ b/src/base/unix_socket_unittest.cc
@@ -318,10 +318,10 @@
     tx_task_runner.RunUntilCheckpoint("cli_connected");
 
     auto all_sent = tx_task_runner.CreateCheckpoint("all_sent");
-    char buf[1024 * 32] = {};
+    std::string buf(1024 * 32, '\0');
     tx_task_runner.PostTask([&cli, &buf, all_sent] {
-      for (size_t i = 0; i < kTotalBytes / sizeof(buf); i++)
-        cli->Send(buf, sizeof(buf));
+      for (size_t i = 0; i < kTotalBytes / buf.size(); i++)
+        cli->Send(buf.data(), buf.size());
       all_sent();
     });
     tx_task_runner.RunUntilCheckpoint("all_sent", kTimeoutMs);
@@ -834,11 +834,11 @@
 
   // Send something larger than send + recv kernel buffers combined to make
   // sendmsg block.
-  char send_buf[8192];
+  std::string send_buf(8192, '\0');
   // Make MSAN happy.
-  for (size_t i = 0; i < sizeof(send_buf); ++i)
+  for (size_t i = 0; i < send_buf.size(); ++i)
     send_buf[i] = static_cast<char>(i % 256);
-  char recv_buf[sizeof(send_buf)];
+  std::string recv_buf(send_buf.size(), '\0');
 
   // Need to install signal handler to cause the interrupt to happen.
   // man 3 pthread_kill:
@@ -855,15 +855,15 @@
 
   auto blocked_thread = pthread_self();
   std::thread th([blocked_thread, &recv_sock, &recv_buf] {
-    ssize_t rd = PERFETTO_EINTR(read(recv_sock.fd(), recv_buf, 1));
+    ssize_t rd = PERFETTO_EINTR(read(recv_sock.fd(), &recv_buf[0], 1));
     ASSERT_EQ(rd, 1);
     // We are now sure the other thread is in sendmsg, interrupt send.
     ASSERT_EQ(pthread_kill(blocked_thread, SIGWINCH), 0);
     // Drain the socket to allow SendMsgAllPosix to succeed.
     size_t offset = 1;
-    while (offset < sizeof(recv_buf)) {
+    while (offset < recv_buf.size()) {
       rd = PERFETTO_EINTR(
-          read(recv_sock.fd(), recv_buf + offset, sizeof(recv_buf) - offset));
+          read(recv_sock.fd(), &recv_buf[offset], recv_buf.size() - offset));
       ASSERT_GE(rd, 0);
       offset += static_cast<size_t>(rd);
     }
@@ -873,23 +873,23 @@
   // more complicated code-paths of SendMsgAllPosix.
   struct msghdr hdr = {};
   struct iovec iov[4];
-  static_assert(sizeof(send_buf) % base::ArraySize(iov) == 0,
-                "Cannot split buffer into even pieces.");
-  constexpr size_t kChunkSize = sizeof(send_buf) / base::ArraySize(iov);
+  ASSERT_EQ(send_buf.size() % base::ArraySize(iov), 0u)
+      << "Cannot split buffer into even pieces.";
+  const size_t kChunkSize = send_buf.size() / base::ArraySize(iov);
   for (size_t i = 0; i < base::ArraySize(iov); ++i) {
-    iov[i].iov_base = send_buf + i * kChunkSize;
+    iov[i].iov_base = &send_buf[i * kChunkSize];
     iov[i].iov_len = kChunkSize;
   }
   hdr.msg_iov = iov;
   hdr.msg_iovlen = base::ArraySize(iov);
 
   ASSERT_EQ(send_sock.SendMsgAllPosix(&hdr),
-            static_cast<ssize_t>(sizeof(send_buf)));
+            static_cast<ssize_t>(send_buf.size()));
   send_sock.Shutdown();
   th.join();
   // Make sure the re-entry logic was actually triggered.
   ASSERT_EQ(hdr.msg_iov, nullptr);
-  ASSERT_EQ(memcmp(send_buf, recv_buf, sizeof(send_buf)), 0);
+  ASSERT_EQ(memcmp(&send_buf[0], &recv_buf[0], send_buf.size()), 0);
 }
 #endif  // !OS_WIN
 
diff --git a/src/base/watchdog_posix.cc b/src/base/watchdog_posix.cc
index bd540a1..e765858 100644
--- a/src/base/watchdog_posix.cc
+++ b/src/base/watchdog_posix.cc
@@ -19,9 +19,15 @@
 #if PERFETTO_BUILDFLAG(PERFETTO_WATCHDOG)
 
 #include <fcntl.h>
+#include <poll.h>
 #include <signal.h>
 #include <stdint.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/timerfd.h>
+#include <unistd.h>
 
+#include <algorithm>
 #include <cinttypes>
 #include <fstream>
 #include <thread>
@@ -29,6 +35,7 @@
 #include "perfetto/base/build_config.h"
 #include "perfetto/base/logging.h"
 #include "perfetto/base/thread_utils.h"
+#include "perfetto/base/time.h"
 #include "perfetto/ext/base/file_utils.h"
 #include "perfetto/ext/base/scoped_file.h"
 #include "perfetto/ext/base/utils.h"
@@ -50,7 +57,6 @@
     total += array[i];
   }
   return static_cast<double>(total / size);
-
 }
 
 }  //  namespace
@@ -91,7 +97,16 @@
   }
   PERFETTO_DCHECK(enabled_);
   enabled_ = false;
-  exit_signal_.notify_one();
+
+  // Rearm the timer to 1ns from now. This will cause the watchdog thread to
+  // wakeup from the poll() and see |enabled_| == false.
+  // This code path is used only in tests. In production code the watchdog is
+  // a singleton and is never destroyed.
+  struct itimerspec ts {};
+  ts.it_value.tv_sec = 0;
+  ts.it_value.tv_nsec = 1;
+  timerfd_settime(*timer_fd_, /*flags=*/0, &ts, nullptr);
+
   thread_.join();
 }
 
@@ -100,11 +115,48 @@
   return watchdog;
 }
 
+// Can be called from any thread.
 Watchdog::Timer Watchdog::CreateFatalTimer(uint32_t ms) {
   if (!enabled_.load(std::memory_order_relaxed))
-    return Watchdog::Timer(0);
+    return Watchdog::Timer(this, 0);
 
-  return Watchdog::Timer(ms);
+  return Watchdog::Timer(this, ms);
+}
+
+// Can be called from any thread.
+void Watchdog::AddFatalTimer(TimerData timer) {
+  std::lock_guard<std::mutex> guard(mutex_);
+  timers_.emplace_back(std::move(timer));
+  RearmTimerFd_Locked();
+}
+
+// Can be called from any thread.
+void Watchdog::RemoveFatalTimer(TimerData timer) {
+  std::lock_guard<std::mutex> guard(mutex_);
+  for (auto it = timers_.begin(); it != timers_.end(); it++) {
+    if (*it == timer) {
+      timers_.erase(it);
+      break;  // Remove only one. Doesn't matter which one.
+    }
+  }
+  RearmTimerFd_Locked();
+}
+
+void Watchdog::RearmTimerFd_Locked() {
+  if (!enabled_)
+    return;
+  auto it = std::min_element(timers_.begin(), timers_.end());
+
+  // We use one timerfd to handle all the oustanding |timers_|. Keep it armed
+  // to the task expiring soonest.
+  struct itimerspec ts {};
+  if (it != timers_.end()) {
+    ts.it_value = ToPosixTimespec(it->deadline);
+  }
+  // If |timers_| is empty (it == end()) |ts.it_value| will remain
+  // zero-initialized and that will disarm the timer in the call below.
+  int res = timerfd_settime(*timer_fd_, TFD_TIMER_ABSTIME, &ts, nullptr);
+  PERFETTO_DCHECK(res == 0);
 }
 
 void Watchdog::Start() {
@@ -117,7 +169,15 @@
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) || \
     PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
     // Kick the thread to start running but only on Android or Linux.
+    timer_fd_.reset(
+        timerfd_create(CLOCK_MONOTONIC, TFD_CLOEXEC | TFD_NONBLOCK));
+    if (!timer_fd_) {
+      PERFETTO_PLOG(
+          "timerfd_create failed, the Perfetto watchdog is not available");
+      return;
+    }
     enabled_ = true;
+    RearmTimerFd_Locked();  // Deal with timers created before Start().
     thread_ = std::thread(&Watchdog::ThreadMain, this);
 #endif
   }
@@ -153,49 +213,124 @@
     return;
   }
 
-  std::unique_lock<std::mutex> guard(mutex_);
+  PERFETTO_DCHECK(timer_fd_);
+
+  constexpr uint8_t kFdCount = 1;
+  struct pollfd fds[kFdCount]{};
+  fds[0].fd = *timer_fd_;
+  fds[0].events = POLLIN;
+
   for (;;) {
-    exit_signal_.wait_for(guard,
-                          std::chrono::milliseconds(polling_interval_ms_));
+    // We use the poll() timeout to drive the periodic ticks for the cpu/memory
+    // checks. The only other case when the poll() unblocks is when we crash
+    // (or have to quit via enabled_ == false, but that happens only in tests).
+    auto ret = poll(fds, kFdCount, static_cast<int>(polling_interval_ms_));
     if (!enabled_)
       return;
-
-    lseek(stat_fd.get(), 0, SEEK_SET);
-
-    ProcStat stat;
-    if (!ReadProcStat(stat_fd.get(), &stat)) {
-      return;
+    if (ret < 0) {
+      if (errno == ENOMEM || errno == EINTR) {
+        // Should happen extremely rarely.
+        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        continue;
+      }
+      PERFETTO_FATAL("watchdog poll() failed");
     }
 
+    // If we get here either:
+    // 1. poll() timed out, in which case we should process cpu/mem guardrails.
+    // 2. A timer expired, in which case we shall crash.
+
+    uint64_t expired = 0;  // Must be exactly 8 bytes.
+    auto res = PERFETTO_EINTR(read(*timer_fd_, &expired, sizeof(expired)));
+    PERFETTO_DCHECK((res < 0 && (errno == EAGAIN)) ||
+                    (res == sizeof(expired) && expired > 0));
+    const auto now = GetWallTimeMs();
+
+    // Check if any of the timers expired.
+    int tid_to_kill = 0;
+    std::unique_lock<std::mutex> guard(mutex_);
+    for (const auto& timer : timers_) {
+      if (now >= timer.deadline) {
+        tid_to_kill = timer.thread_id;
+        break;
+      }
+    }
+    guard.unlock();
+
+    if (tid_to_kill)
+      SerializeLogsAndKillThread(tid_to_kill);
+
+    // Check CPU and memory guardrails (if enabled).
+    lseek(stat_fd.get(), 0, SEEK_SET);
+    ProcStat stat;
+    if (!ReadProcStat(stat_fd.get(), &stat))
+      continue;
     uint64_t cpu_time = stat.utime + stat.stime;
     uint64_t rss_bytes =
         static_cast<uint64_t>(stat.rss_pages) * base::GetSysPageSize();
 
-    CheckMemory(rss_bytes);
-    CheckCpu(cpu_time);
+    guard.lock();
+    bool threshold_exceeded = false;
+    threshold_exceeded |= CheckMemory_Locked(rss_bytes);
+    threshold_exceeded |= CheckCpu_Locked(cpu_time);
+    guard.unlock();
+
+    if (threshold_exceeded)
+      SerializeLogsAndKillThread(getpid());
   }
 }
 
-void Watchdog::CheckMemory(uint64_t rss_bytes) {
-  if (memory_limit_bytes_ == 0)
+void Watchdog::SerializeLogsAndKillThread(int tid) {
+  // We are about to die. Serialize the logs into the crash buffer so the
+  // debuggerd crash handler picks them up and attaches to the bugreport.
+  // In the case of a PERFETTO_CHECK/PERFETTO_FATAL this is done in logging.h.
+  // But in the watchdog case, we don't hit that codepath and must do ourselves.
+  MaybeSerializeLastLogsForCrashReporting();
+
+  // Send a SIGABRT to the thread that armed the timer. This is to see the
+  // callstack of the thread that is stuck in a long task rather than the
+  // watchdog thread.
+  if (syscall(__NR_tgkill, getpid(), tid, SIGABRT) < 0) {
+    // At this point the process must die. If for any reason the tgkill doesn't
+    // work (e.g. the thread has disappeared), force a crash from here.
+    abort();
+  }
+
+  if (disable_kill_failsafe_for_testing_)
     return;
 
+  // The tgkill() above will take some milliseconds to cause a crash, as it
+  // involves the kernel to queue the SIGABRT on the target thread (often the
+  // main thread, which is != watchdog thread) and do a scheduling round.
+  // If something goes wrong though (the target thread has signals masked or
+  // is stuck in an uninterruptible+wakekill syscall) force quit from this
+  // thread.
+  std::this_thread::sleep_for(std::chrono::seconds(10));
+  abort();
+}
+
+bool Watchdog::CheckMemory_Locked(uint64_t rss_bytes) {
+  if (memory_limit_bytes_ == 0)
+    return false;
+
   // Add the current stat value to the ring buffer and check that the mean
   // remains under our threshold.
   if (memory_window_bytes_.Push(rss_bytes)) {
-    if (memory_window_bytes_.Mean() > static_cast<double>(memory_limit_bytes_)) {
+    if (memory_window_bytes_.Mean() >
+        static_cast<double>(memory_limit_bytes_)) {
       PERFETTO_ELOG(
           "Memory watchdog trigger. Memory window of %f bytes is above the "
           "%" PRIu64 " bytes limit.",
           memory_window_bytes_.Mean(), memory_limit_bytes_);
-      kill(getpid(), SIGABRT);
+      return true;
     }
   }
+  return false;
 }
 
-void Watchdog::CheckCpu(uint64_t cpu_time) {
+bool Watchdog::CheckCpu_Locked(uint64_t cpu_time) {
   if (cpu_limit_percentage_ == 0)
-    return;
+    return false;
 
   // Add the cpu time to the ring buffer.
   if (cpu_window_time_ticks_.Push(cpu_time)) {
@@ -213,9 +348,10 @@
       PERFETTO_ELOG("CPU watchdog trigger. %f%% CPU use is above the %" PRIu32
                     "%% CPU limit.",
                     percentage, cpu_limit_percentage_);
-      kill(getpid(), SIGABRT);
+      return true;
     }
   }
+  return false;
 }
 
 uint32_t Watchdog::WindowTimeForRingBuffer(const WindowedInterval& window) {
@@ -249,36 +385,25 @@
   buffer_.reset(new_size == 0 ? nullptr : new uint64_t[new_size]());
 }
 
-Watchdog::Timer::Timer(uint32_t ms) {
+Watchdog::Timer::Timer(Watchdog* watchdog, uint32_t ms) : watchdog_(watchdog) {
   if (!ms)
     return;  // No-op timer created when the watchdog is disabled.
-
-  struct sigevent sev = {};
-  timer_t timerid;
-  sev.sigev_notify = SIGEV_THREAD_ID;
-#if defined(__GLIBC__)
-  sev._sigev_un._tid = base::GetThreadId();
-#else
-  sev.sigev_notify_thread_id = base::GetThreadId();
-#endif
-  sev.sigev_signo = SIGABRT;
-  PERFETTO_CHECK(timer_create(CLOCK_MONOTONIC, &sev, &timerid) != -1);
-  timerid_ = base::make_optional(timerid);
-  struct itimerspec its = {};
-  its.it_value.tv_sec = ms / 1000;
-  its.it_value.tv_nsec = 1000000L * (ms % 1000);
-  PERFETTO_CHECK(timer_settime(timerid_.value(), 0, &its, nullptr) != -1);
+  timer_data_.deadline = GetWallTimeMs() + std::chrono::milliseconds(ms);
+  timer_data_.thread_id = GetThreadId();
+  PERFETTO_DCHECK(watchdog_);
+  watchdog_->AddFatalTimer(timer_data_);
 }
 
 Watchdog::Timer::~Timer() {
-  if (timerid_) {
-    timer_delete(timerid_.value());
-  }
+  if (timer_data_.deadline.count())
+    watchdog_->RemoveFatalTimer(timer_data_);
 }
 
 Watchdog::Timer::Timer(Timer&& other) noexcept {
-  timerid_ = std::move(other.timerid_);
-  other.timerid_ = base::nullopt;
+  watchdog_ = std::move(other.watchdog_);
+  other.watchdog_ = nullptr;
+  timer_data_ = std::move(other.timer_data_);
+  other.timer_data_ = TimerData();
 }
 
 }  // namespace base
diff --git a/src/base/watchdog_unittest.cc b/src/base/watchdog_unittest.cc
index 81d0ac6..90c1e3f 100644
--- a/src/base/watchdog_unittest.cc
+++ b/src/base/watchdog_unittest.cc
@@ -39,7 +39,9 @@
 class TestWatchdog : public Watchdog {
  public:
   explicit TestWatchdog(uint32_t polling_interval_ms)
-      : Watchdog(polling_interval_ms) {}
+      : Watchdog(polling_interval_ms) {
+    disable_kill_failsafe_for_testing_ = true;
+  }
   ~TestWatchdog() override {}
 };
 
@@ -157,6 +159,7 @@
   };
 
   std::vector<std::thread> threads;
+
   for (size_t i = 0; i < 8; i++)
     threads.emplace_back(thread_fn, i);
 
diff --git a/src/ipc/host_impl.cc b/src/ipc/host_impl.cc
index 9467ca9..da1ce4c 100644
--- a/src/ipc/host_impl.cc
+++ b/src/ipc/host_impl.cc
@@ -21,6 +21,7 @@
 #include <utility>
 
 #include "perfetto/base/task_runner.h"
+#include "perfetto/ext/base/crash_keys.h"
 #include "perfetto/ext/base/utils.h"
 #include "perfetto/ext/ipc/service.h"
 #include "perfetto/ext/ipc/service_descriptor.h"
@@ -37,6 +38,8 @@
 constexpr base::SockFamily kHostSockFamily =
     kUseTCPSocket ? base::SockFamily::kInet : base::SockFamily::kUnix;
 
+base::CrashKey g_crash_key_uid("ipc_uid");
+
 uid_t GetPosixPeerUid(base::UnixSocket* sock) {
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_WIN)
   base::ignore_result(sock);
@@ -123,6 +126,9 @@
   ClientConnection* client = it->second;
   BufferedFrameDeserializer& frame_deserializer = client->frame_deserializer;
 
+  auto peer_uid = GetPosixPeerUid(client->sock.get());
+  auto scoped_key = g_crash_key_uid.SetScoped(static_cast<int64_t>(peer_uid));
+
   size_t rsize;
   do {
     auto buf = frame_deserializer.BeginReceive();
@@ -217,8 +223,9 @@
     });
   }
 
-  service->client_info_ =
-      ClientInfo(client->id, GetPosixPeerUid(client->sock.get()));
+  auto peer_uid = GetPosixPeerUid(client->sock.get());
+  auto scoped_key = g_crash_key_uid.SetScoped(static_cast<int64_t>(peer_uid));
+  service->client_info_ = ClientInfo(client->id, peer_uid);
   service->received_fd_ = &client->received_fd;
   method.invoker(service, *decoded_req_args, std::move(deferred_reply));
   service->received_fd_ = nullptr;
@@ -251,6 +258,9 @@
 
 // static
 void HostImpl::SendFrame(ClientConnection* client, const Frame& frame, int fd) {
+  auto peer_uid = GetPosixPeerUid(client->sock.get());
+  auto scoped_key = g_crash_key_uid.SetScoped(static_cast<int64_t>(peer_uid));
+
   std::string buf = BufferedFrameDeserializer::Serialize(frame);
 
   // When a new Client connects in OnNewClientConnection we set a timeout on
diff --git a/src/kallsyms/kernel_symbol_map.cc b/src/kallsyms/kernel_symbol_map.cc
index 0e4c374..ae8fff7 100644
--- a/src/kallsyms/kernel_symbol_map.cc
+++ b/src/kallsyms/kernel_symbol_map.cc
@@ -32,6 +32,7 @@
 #include <cinttypes>
 #include <functional>
 #include <map>
+#include <unordered_map>
 #include <utility>
 
 namespace perfetto {
@@ -243,8 +244,9 @@
     TokenId id = 0;
   };
 
-  // Note if changing the container: the code below relies on stable iterators.
-  using TokenMap = std::map<std::string, TokenInfo>;
+  // Note if changing the container: the code below doesn't rely on stable
+  // iterators, but relies on stable pointers.
+  using TokenMap = std::unordered_map<std::string, TokenInfo>;
   using TokenMapPtr = TokenMap::value_type*;
   TokenMap tokens;
 
diff --git a/src/kallsyms/kernel_symbol_map_benchmark.cc b/src/kallsyms/kernel_symbol_map_benchmark.cc
index 7c3e9a9..704fa82 100644
--- a/src/kallsyms/kernel_symbol_map_benchmark.cc
+++ b/src/kallsyms/kernel_symbol_map_benchmark.cc
@@ -19,6 +19,7 @@
 #include <benchmark/benchmark.h>
 
 #include "perfetto/base/logging.h"
+#include "perfetto/ext/base/file_utils.h"
 #include "perfetto/ext/base/utils.h"
 #include "src/base/test/utils.h"
 #include "src/kallsyms/kernel_symbol_map.h"
@@ -89,7 +90,7 @@
 
 }  // namespace
 
-static void BM_KallSyms(benchmark::State& state) {
+static void BM_KallSymsFind(benchmark::State& state) {
   perfetto::KernelSymbolMap::kTokenIndexSampling =
       static_cast<size_t>(state.range(0));
   perfetto::KernelSymbolMap::kSymIndexSampling =
@@ -113,4 +114,31 @@
   state.counters["mem"] = static_cast<double>(kallsyms.size_bytes());
 }
 
-BENCHMARK(BM_KallSyms)->Apply(BenchmarkArgs);
+BENCHMARK(BM_KallSymsFind)->Apply(BenchmarkArgs);
+
+static void BM_KallSymsLoad(benchmark::State& state) {
+  perfetto::KernelSymbolMap::kTokenIndexSampling =
+      static_cast<size_t>(state.range(0));
+  perfetto::KernelSymbolMap::kSymIndexSampling =
+      static_cast<size_t>(state.range(1));
+
+  // Don't run the benchmark on the CI as it requires pushing all test data,
+  // which slows down significantly the CI.
+  const bool skip = IsBenchmarkFunctionalOnly();
+
+  const std::string kallsyms_path = perfetto::base::GetTestDataPath("test/data/kallsyms.txt");
+  if (!skip) {
+    std::string tmp;
+    // Read the whole file once, so that it's cached.
+    PERFETTO_CHECK(perfetto::base::ReadFile(kallsyms_path, &tmp));
+  }
+
+  for (auto _ : state) {
+    perfetto::KernelSymbolMap kallsyms;
+    if (!skip) {
+      kallsyms.Parse(kallsyms_path);
+    }
+  }
+}
+
+BENCHMARK(BM_KallSymsLoad)->Apply(BenchmarkArgs);
diff --git a/src/profiling/common/proc_utils.cc b/src/profiling/common/proc_utils.cc
index fab2261..3ec1e55 100644
--- a/src/profiling/common/proc_utils.cc
+++ b/src/profiling/common/proc_utils.cc
@@ -23,24 +23,13 @@
 
 #include "perfetto/ext/base/file_utils.h"
 #include "perfetto/ext/base/optional.h"
+#include "perfetto/ext/base/string_utils.h"
 #include "perfetto/profiling/normalize.h"
 
 namespace perfetto {
 namespace profiling {
 namespace {
 
-bool GetProcFile(pid_t pid, const char* file, char* filename_buf, size_t size) {
-  ssize_t written = snprintf(filename_buf, size, "/proc/%d/%s", pid, file);
-  if (written < 0 || static_cast<size_t>(written) >= size) {
-    if (written < 0)
-      PERFETTO_ELOG("Failed to concatenate cmdline file.");
-    else
-      PERFETTO_ELOG("Overflow when concatenating cmdline file.");
-    return false;
-  }
-  return true;
-}
-
 base::Optional<uint32_t> ParseProcStatusSize(const std::string& status,
                                              const std::string& key) {
   auto entry_idx = status.find(key);
@@ -130,8 +119,7 @@
       return;
 
     char filename_buf[128];
-    if (!GetProcFile(pid, "cmdline", filename_buf, sizeof(filename_buf)))
-      return;
+    snprintf(filename_buf, sizeof(filename_buf), "/proc/%d/%s", pid, "cmdline");
     struct stat statbuf;
     // Check if we have permission to the process.
     if (stat(filename_buf, &statbuf) == 0)
diff --git a/src/profiling/memory/client.cc b/src/profiling/memory/client.cc
index cd06150..79e1a77 100644
--- a/src/profiling/memory/client.cc
+++ b/src/profiling/memory/client.cc
@@ -42,6 +42,7 @@
 #include "perfetto/base/time.h"
 #include "perfetto/ext/base/file_utils.h"
 #include "perfetto/ext/base/scoped_file.h"
+#include "perfetto/ext/base/string_utils.h"
 #include "perfetto/ext/base/unix_socket.h"
 #include "perfetto/ext/base/utils.h"
 #include "src/profiling/memory/sampler.h"
@@ -463,8 +464,7 @@
 
   HeapName hnr;
   hnr.heap_id = heap_id;
-  strncpy(&hnr.heap_name[0], heap_name, sizeof(hnr.heap_name));
-  hnr.heap_name[sizeof(hnr.heap_name) - 1] = '\0';
+  base::StringCopy(&hnr.heap_name[0], heap_name, sizeof(hnr.heap_name));
   hnr.sample_interval = interval;
 
   WireMessage msg = {};
diff --git a/src/profiling/memory/client_api.cc b/src/profiling/memory/client_api.cc
index c1b96a1..6ec2a61 100644
--- a/src/profiling/memory/client_api.cc
+++ b/src/profiling/memory/client_api.cc
@@ -34,6 +34,7 @@
 #include "perfetto/base/build_config.h"
 #include "perfetto/base/logging.h"
 #include "perfetto/ext/base/no_destructor.h"
+#include "perfetto/ext/base/string_utils.h"
 #include "perfetto/ext/base/unix_socket.h"
 #include "perfetto/ext/base/utils.h"
 
@@ -306,7 +307,7 @@
     perfetto::profiling::StartHeapprofdIfStatic();
 
   AHeapInfo& info = GetHeap(next_id);
-  strncpy(info.heap_name, heap_name, sizeof(info.heap_name));
+  perfetto::base::StringCopy(info.heap_name, heap_name, sizeof(info.heap_name));
   return &info;
 }
 
diff --git a/src/profiling/memory/client_api_factory_standalone.cc b/src/profiling/memory/client_api_factory_standalone.cc
index da2a083..799c44b 100644
--- a/src/profiling/memory/client_api_factory_standalone.cc
+++ b/src/profiling/memory/client_api_factory_standalone.cc
@@ -16,6 +16,7 @@
 
 #include "src/profiling/memory/client_api_factory.h"
 
+#include "perfetto/base/logging.h"
 #include "perfetto/ext/base/scoped_file.h"
 #include "perfetto/ext/base/unix_socket.h"
 #include "perfetto/ext/base/unix_task_runner.h"
@@ -45,8 +46,6 @@
 //   service. This happens in CreateClient.
 
 namespace perfetto {
-void EnableStacktraceOnCrashForDebug();
-
 namespace profiling {
 namespace {
 
@@ -111,7 +110,7 @@
 
   // On debug builds, we want to turn on crash reporting for heapprofd.
 #if PERFETTO_BUILDFLAG(PERFETTO_STDERR_CRASH_DUMP)
-  EnableStacktraceOnCrashForDebug();
+  base::EnableStacktraceOnCrashForDebug();
 #endif
 
   cli_sock.ReleaseFd();
diff --git a/src/profiling/memory/heapprofd_producer.cc b/src/profiling/memory/heapprofd_producer.cc
index fdfccfa..ab506de 100644
--- a/src/profiling/memory/heapprofd_producer.cc
+++ b/src/profiling/memory/heapprofd_producer.cc
@@ -192,9 +192,8 @@
                     HEAPPROFD_HEAP_NAME_SZ - 1);
       continue;
     }
-    strncpy(&cli_config->heaps[n].name[0], heap.c_str(),
-            sizeof(cli_config->heaps[0].name));
-    cli_config->heaps[n].name[sizeof(cli_config->heaps[0].name) - 1] = '\0';
+    base::StringCopy(&cli_config->heaps[n].name[0], heap.c_str(),
+                     sizeof(cli_config->heaps[n].name));
     cli_config->heaps[n].interval = interval;
     n++;
   }
diff --git a/src/profiling/perf/proc_descriptors.cc b/src/profiling/perf/proc_descriptors.cc
index b03d5d0..066cf18 100644
--- a/src/profiling/perf/proc_descriptors.cc
+++ b/src/profiling/perf/proc_descriptors.cc
@@ -21,6 +21,8 @@
 #include <sys/stat.h>
 #include <unistd.h>
 
+#include "perfetto/ext/base/string_utils.h"
+
 namespace perfetto {
 
 ProcDescriptorDelegate::~ProcDescriptorDelegate() {}
@@ -36,20 +38,19 @@
 }
 
 void DirectDescriptorGetter::GetDescriptorsForPid(pid_t pid) {
-  char dir_buf[128] = {};
-  snprintf(dir_buf, sizeof(dir_buf), "/proc/%d", pid);
-  auto dir_fd =
-      base::ScopedFile(open(dir_buf, O_DIRECTORY | O_RDONLY | O_CLOEXEC));
+  base::StackString<128> dir_buf("/proc/%d", pid);
+  auto dir_fd = base::ScopedFile(
+      open(dir_buf.c_str(), O_DIRECTORY | O_RDONLY | O_CLOEXEC));
   if (!dir_fd) {
     if (errno != ENOENT)  // not surprising if the process has quit
-      PERFETTO_PLOG("Failed to open [%s]", dir_buf);
+      PERFETTO_PLOG("Failed to open [%s]", dir_buf.c_str());
 
     return;
   }
 
   struct stat stat_buf;
   if (fstat(dir_fd.get(), &stat_buf) == -1) {
-    PERFETTO_PLOG("Failed to stat [%s]", dir_buf);
+    PERFETTO_PLOG("Failed to stat [%s]", dir_buf.c_str());
     return;
   }
 
@@ -57,7 +58,7 @@
       base::ScopedFile{openat(dir_fd.get(), "maps", O_RDONLY | O_CLOEXEC)};
   if (!maps_fd) {
     if (errno != ENOENT)  // not surprising if the process has quit
-      PERFETTO_PLOG("Failed to open %s/maps", dir_buf);
+      PERFETTO_PLOG("Failed to open %s/maps", dir_buf.c_str());
 
     return;
   }
@@ -66,7 +67,7 @@
       base::ScopedFile{openat(dir_fd.get(), "mem", O_RDONLY | O_CLOEXEC)};
   if (!mem_fd) {
     if (errno != ENOENT)  // not surprising if the process has quit
-      PERFETTO_PLOG("Failed to open %s/mem", dir_buf);
+      PERFETTO_PLOG("Failed to open %s/mem", dir_buf.c_str());
 
     return;
   }
diff --git a/src/protozero/proto_decoder.cc b/src/protozero/proto_decoder.cc
index 97ac429..c13be9a 100644
--- a/src/protozero/proto_decoder.cc
+++ b/src/protozero/proto_decoder.cc
@@ -189,17 +189,26 @@
     res = ParseOneField(cur, end_);
     PERFETTO_DCHECK(res.parse_res != ParseFieldResult::kOk || res.next != cur);
     cur = res.next;
-    if (PERFETTO_UNLIKELY(res.parse_res == ParseFieldResult::kSkip)) {
+    if (PERFETTO_UNLIKELY(res.parse_res == ParseFieldResult::kSkip))
       continue;
-    } else if (PERFETTO_UNLIKELY(res.parse_res == ParseFieldResult::kAbort)) {
+    if (PERFETTO_UNLIKELY(res.parse_res == ParseFieldResult::kAbort))
       break;
-    }
+
     PERFETTO_DCHECK(res.parse_res == ParseFieldResult::kOk);
     PERFETTO_DCHECK(res.field.valid());
     auto field_id = res.field.id();
     if (PERFETTO_UNLIKELY(field_id >= num_fields_))
       continue;
 
+    // There are two reasons why we might want to expand the heap capacity:
+    // 1. We are writing a non-repeated field, which has an id >
+    //    INITIAL_STACK_CAPACITY. In this case ExpandHeapStorage() ensures to
+    //    allocate at least (num_fields_ + 1) slots.
+    // 2. We are writing a repeated field but ran out of capacity.
+    if (PERFETTO_UNLIKELY(field_id >= size_ || size_ >= capacity_))
+      ExpandHeapStorage();
+
+    PERFETTO_DCHECK(field_id < size_);
     Field* fld = &fields_[field_id];
     if (PERFETTO_LIKELY(!fld->valid())) {
       // This is the first time we see this field.
@@ -214,12 +223,7 @@
       //    supposed to return the last value of X, not the first one.
       // This is so that the RepeatedFieldIterator will iterate in the right
       // order, see comments on RepeatedFieldIterator.
-      if (PERFETTO_UNLIKELY(size_ >= capacity_)) {
-        ExpandHeapStorage();
-        // ExpandHeapStorage moves fields_ so we need to update the ptr to fld:
-        fld = &fields_[field_id];
-        PERFETTO_DCHECK(size_ < capacity_);
-      }
+      PERFETTO_DCHECK(size_ < capacity_);
       fields_[size_++] = *fld;
       *fld = std::move(res.field);
     }
@@ -228,17 +232,31 @@
 }
 
 void TypedProtoDecoderBase::ExpandHeapStorage() {
-  uint32_t new_capacity = capacity_ * 2;
-  PERFETTO_CHECK(new_capacity > size_);
+  // When we expand the heap we must ensure that we have at very last capacity
+  // to deal with all known fields plus at least one repeated field. We go +32
+  // to avoid trivial re-allocations when dealing with repeated fields of a
+  // message that has > INITIAL_STACK_CAPACITY fields.
+  const uint32_t min_capacity = num_fields_ + 32;  // Any number >= +1 will do.
+  const uint32_t new_capacity = std::max(capacity_ * 2, min_capacity);
+  PERFETTO_CHECK(new_capacity > size_ && new_capacity > num_fields_);
   std::unique_ptr<Field[]> new_storage(new Field[new_capacity]);
 
   static_assert(std::is_trivially_copyable<Field>::value,
                 "Field must be trivially copyable");
+
+  // Zero-initialize the slots for known field IDs slots, as they can be
+  // randomly accessed. Instead, there is no need to initialize the repeated
+  // slots, because they are written linearly with no gaps and are always
+  // initialized before incrementing |size_|.
+  const uint32_t new_size = std::max(size_, num_fields_);
+  memset(&new_storage[size_], 0, sizeof(Field) * (new_size - size_));
+
   memcpy(&new_storage[0], fields_, sizeof(Field) * size_);
 
   heap_storage_ = std::move(new_storage);
   fields_ = &heap_storage_[0];
   capacity_ = new_capacity;
+  size_ = new_size;
 }
 
 }  // namespace protozero
diff --git a/src/protozero/proto_decoder_unittest.cc b/src/protozero/proto_decoder_unittest.cc
index 5f40a89..0991f88 100644
--- a/src/protozero/proto_decoder_unittest.cc
+++ b/src/protozero/proto_decoder_unittest.cc
@@ -72,9 +72,10 @@
   wptr = WriteVarInt(MakeTagLengthDelimited(2), wptr);
   wptr = WriteVarInt(kPayloadSize, wptr);
   message->AppendRawProtoBytes(raw, static_cast<size_t>(wptr - raw));
-  const uint8_t padding[1024 * 128]{};
-  for (size_t i = 0; i < kPayloadSize / sizeof(padding); i++)
-    message->AppendRawProtoBytes(padding, sizeof(padding));
+  const size_t kPaddingSize = 1024 * 128;
+  std::unique_ptr<uint8_t[]> padding(new uint8_t[kPaddingSize]());
+  for (size_t i = 0; i < kPayloadSize / kPaddingSize; i++)
+    message->AppendRawProtoBytes(padding.get(), kPaddingSize);
 
   // Append another valid field.
   message->AppendVarInt(/*field_id=*/3, 13);
diff --git a/src/trace_processor/dynamic/experimental_flamegraph_generator.cc b/src/trace_processor/dynamic/experimental_flamegraph_generator.cc
index 960e0d7..559e3ca 100644
--- a/src/trace_processor/dynamic/experimental_flamegraph_generator.cc
+++ b/src/trace_processor/dynamic/experimental_flamegraph_generator.cc
@@ -16,6 +16,9 @@
 
 #include "src/trace_processor/dynamic/experimental_flamegraph_generator.h"
 
+#include <unordered_set>
+
+#include "perfetto/ext/base/string_splitter.h"
 #include "perfetto/ext/base/string_utils.h"
 
 #include "src/trace_processor/importers/proto/heap_graph_tracker.h"
@@ -68,6 +71,10 @@
     return c.col_idx == static_cast<uint32_t>(T::ColumnIndex::upid) &&
            c.op == FilterOp::kEq;
   };
+  auto upid_group_fn = [](const Constraint& c) {
+    return c.col_idx == static_cast<uint32_t>(T::ColumnIndex::upid_group) &&
+           c.op == FilterOp::kEq;
+  };
   auto profile_type_fn = [](const Constraint& c) {
     return c.col_idx == static_cast<uint32_t>(T::ColumnIndex::profile_type) &&
            c.op == FilterOp::kEq;
@@ -79,6 +86,7 @@
 
   auto ts_it = std::find_if(cs.begin(), cs.end(), ts_fn);
   auto upid_it = std::find_if(cs.begin(), cs.end(), upid_fn);
+  auto upid_group_it = std::find_if(cs.begin(), cs.end(), upid_group_fn);
   auto profile_type_it = std::find_if(cs.begin(), cs.end(), profile_type_fn);
   auto focus_str_it = std::find_if(cs.begin(), cs.end(), focus_str_fn);
 
@@ -86,7 +94,7 @@
   // allow the constraint set to be chosen when we have an equality constraint
   // on upid and a constraint on ts.
   PERFETTO_CHECK(ts_it != cs.end());
-  PERFETTO_CHECK(upid_it != cs.end());
+  PERFETTO_CHECK(upid_it != cs.end() || upid_group_it != cs.end());
   PERFETTO_CHECK(profile_type_it != cs.end());
 
   std::string profile_name(profile_type_it->value.AsString());
@@ -110,11 +118,19 @@
     }
   }
 
-  auto upid = static_cast<UniquePid>(upid_it->value.AsLong());
+  base::Optional<UniquePid> upid;
+  base::Optional<std::string> upid_group;
+  if (upid_it != cs.end()) {
+    upid = static_cast<UniquePid>(upid_it->value.AsLong());
+  } else {
+    upid_group = upid_group_it->value.AsString();
+  }
+
   std::string focus_str =
       focus_str_it != cs.end() ? focus_str_it->value.AsString() : "";
   return ExperimentalFlamegraphGenerator::InputValues{
-      profile_type, ts, std::move(time_constraints), upid, focus_str};
+      profile_type, ts,         std::move(time_constraints),
+      upid,         upid_group, focus_str};
 }
 
 class Matcher {
@@ -287,6 +303,13 @@
   };
   bool has_upid_cs = std::find_if(cs.begin(), cs.end(), upid_fn) != cs.end();
 
+  auto upid_group_fn = [](const QueryConstraints::Constraint& c) {
+    return c.column == static_cast<int>(T::ColumnIndex::upid_group) &&
+           c.op == SQLITE_INDEX_CONSTRAINT_EQ;
+  };
+  bool has_upid_group_cs =
+      std::find_if(cs.begin(), cs.end(), upid_group_fn) != cs.end();
+
   auto profile_type_fn = [](const QueryConstraints::Constraint& c) {
     return c.column == static_cast<int>(T::ColumnIndex::profile_type) &&
            c.op == SQLITE_INDEX_CONSTRAINT_EQ;
@@ -294,7 +317,7 @@
   bool has_profile_type_cs =
       std::find_if(cs.begin(), cs.end(), profile_type_fn) != cs.end();
 
-  return has_ts_cs && has_upid_cs && has_profile_type_cs
+  return has_ts_cs && (has_upid_cs || has_upid_group_cs) && has_profile_type_cs
              ? util::OkStatus()
              : util::ErrStatus("Failed to find required constraints");
 }
@@ -308,13 +331,14 @@
   std::unique_ptr<tables::ExperimentalFlamegraphNodesTable> table;
   if (values.profile_type == ProfileType::kGraph) {
     auto* tracker = HeapGraphTracker::GetOrCreate(context_);
-    table = tracker->BuildFlamegraph(values.ts, values.upid);
+    table = tracker->BuildFlamegraph(values.ts, *values.upid);
   } else if (values.profile_type == ProfileType::kNative) {
     table = BuildNativeHeapProfileFlamegraph(context_->storage.get(),
-                                             values.upid, values.ts);
+                                             *values.upid, values.ts);
   } else if (values.profile_type == ProfileType::kPerf) {
     table = BuildNativeCallStackSamplingFlamegraph(
-        context_->storage.get(), values.upid, values.time_constraints);
+        context_->storage.get(), values.upid, values.upid_group,
+        values.time_constraints);
   }
   if (!values.focus_str.empty()) {
     table =
diff --git a/src/trace_processor/dynamic/experimental_flamegraph_generator.h b/src/trace_processor/dynamic/experimental_flamegraph_generator.h
index 78afe81..652193c 100644
--- a/src/trace_processor/dynamic/experimental_flamegraph_generator.h
+++ b/src/trace_processor/dynamic/experimental_flamegraph_generator.h
@@ -36,7 +36,8 @@
     ProfileType profile_type;
     int64_t ts;
     std::vector<TimeConstraints> time_constraints;
-    UniquePid upid;
+    base::Optional<UniquePid> upid;
+    base::Optional<std::string> upid_group;
     std::string focus_str;
   };
 
diff --git a/src/trace_processor/export_json.cc b/src/trace_processor/export_json.cc
index 01f6617..3bfc0fb 100644
--- a/src/trace_processor/export_json.cc
+++ b/src/trace_processor/export_json.cc
@@ -564,6 +564,8 @@
           return base::Uint64ToHexString(variadic.pointer_value);
         case Variadic::kBool:
           return variadic.bool_value;
+        case Variadic::kNull:
+          return base::Uint64ToHexString(0);
         case Variadic::kJson:
           Json::CharReaderBuilder b;
           auto reader = std::unique_ptr<Json::CharReader>(b.newCharReader());
@@ -1414,17 +1416,17 @@
               storage_->symbol_table().name()[*opt_symbol_set_id]);
         }
 
-        char frame_entry[1024];
-        snprintf(frame_entry, sizeof(frame_entry), "%s - %s [%s]\n",
-                 (symbol_name.empty()
-                      ? base::Uint64ToHexString(
-                            static_cast<uint64_t>(frames.rel_pc()[frame_row]))
-                            .c_str()
-                      : symbol_name.c_str()),
-                 GetNonNullString(storage_, mappings.name()[mapping_row]),
-                 GetNonNullString(storage_, mappings.build_id()[mapping_row]));
+        base::StackString<1024> frame_entry(
+            "%s - %s [%s]\n",
+            (symbol_name.empty()
+                 ? base::Uint64ToHexString(
+                       static_cast<uint64_t>(frames.rel_pc()[frame_row]))
+                       .c_str()
+                 : symbol_name.c_str()),
+            GetNonNullString(storage_, mappings.name()[mapping_row]),
+            GetNonNullString(storage_, mappings.build_id()[mapping_row]));
 
-        callstack.emplace_back(frame_entry);
+        callstack.emplace_back(frame_entry.ToStdString());
 
         opt_callsite_id = callsites.parent_id()[callsite_row];
       }
diff --git a/src/trace_processor/importers/common/global_args_tracker.h b/src/trace_processor/importers/common/global_args_tracker.h
index 94b960e..f13eeb8 100644
--- a/src/trace_processor/importers/common/global_args_tracker.h
+++ b/src/trace_processor/importers/common/global_args_tracker.h
@@ -73,6 +73,9 @@
         case Variadic::Type::kJson:
           hash.Update(arg.value.json_value.raw_id());
           break;
+        case Variadic::Type::kNull:
+          hash.Update(0);
+          break;
       }
       return hash.digest();
     }
@@ -150,6 +153,8 @@
         case Variadic::Type::kJson:
           row.string_value = arg.value.json_value;
           break;
+        case Variadic::Type::kNull:
+          break;
       }
       row.value_type = context_->storage->GetIdForVariadicType(arg.value.type);
       arg_table->Insert(row);
diff --git a/src/trace_processor/importers/ftrace/ftrace_parser.cc b/src/trace_processor/importers/ftrace/ftrace_parser.cc
index 425a0ed..c25859e 100644
--- a/src/trace_processor/importers/ftrace/ftrace_parser.cc
+++ b/src/trace_processor/importers/ftrace/ftrace_parser.cc
@@ -56,6 +56,7 @@
 #include "protos/perfetto/trace/ftrace/systrace.pbzero.h"
 #include "protos/perfetto/trace/ftrace/task.pbzero.h"
 #include "protos/perfetto/trace/ftrace/thermal.pbzero.h"
+#include "protos/perfetto/trace/ftrace/vmscan.pbzero.h"
 #include "protos/perfetto/trace/ftrace/workqueue.pbzero.h"
 #include "protos/perfetto/trace/interned_data/interned_data.pbzero.h"
 
@@ -125,6 +126,14 @@
       workqueue_id_(context_->storage->InternString("workqueue")),
       irq_id_(context_->storage->InternString("irq")),
       ret_arg_id_(context_->storage->InternString("ret")),
+      direct_reclaim_nr_reclaimed_id_(
+          context->storage->InternString("direct_reclaim_nr_reclaimed")),
+      direct_reclaim_order_id_(
+          context->storage->InternString("direct_reclaim_order")),
+      direct_reclaim_may_writepage_id_(
+          context->storage->InternString("direct_reclaim_may_writepage")),
+      direct_reclaim_gfp_flags_id_(
+          context->storage->InternString("direct_reclaim_gfp_flags")),
       vec_arg_id_(context->storage->InternString("vec")),
       gpu_mem_total_name_id_(context->storage->InternString("GPU Memory")),
       gpu_mem_total_unit_id_(context->storage->InternString(
@@ -498,6 +507,14 @@
         ParseScmCallEnd(ts, pid, data);
         break;
       }
+      case FtraceEvent::kMmVmscanDirectReclaimBeginFieldNumber: {
+        ParseDirectReclaimBegin(ts, pid, data);
+        break;
+      }
+      case FtraceEvent::kMmVmscanDirectReclaimEndFieldNumber: {
+        ParseDirectReclaimEnd(ts, pid, data);
+        break;
+      }
       case FtraceEvent::kWorkqueueExecuteStartFieldNumber: {
         ParseWorkqueueExecuteStart(ts, pid, data, seq_state);
         break;
@@ -877,14 +894,14 @@
   StringId change_name_id = ion_change_unknown_id_;
 
   if (ion.has_heap_name()) {
-    char counter_name[255];
     base::StringView heap_name = ion.heap_name();
-    snprintf(counter_name, sizeof(counter_name), "mem.ion.%.*s",
-             int(heap_name.size()), heap_name.data());
-    global_name_id = context_->storage->InternString(counter_name);
-    snprintf(counter_name, sizeof(counter_name), "mem.ion_change.%.*s",
-             int(heap_name.size()), heap_name.data());
-    change_name_id = context_->storage->InternString(counter_name);
+    base::StackString<255> ion_name("mem.ion.%.*s", int(heap_name.size()),
+                                    heap_name.data());
+    global_name_id = context_->storage->InternString(ion_name.string_view());
+
+    base::StackString<255> change_name("mem.ion_change.%.*s",
+                                       int(heap_name.size()), heap_name.data());
+    change_name_id = context_->storage->InternString(change_name.string_view());
   }
 
   // Push the global counter.
@@ -1236,11 +1253,10 @@
                              base::StringView clock_name,
                              base::StringView subtitle,
                              uint64_t rate) {
-  char counter_name[255];
-  snprintf(counter_name, sizeof(counter_name), "%.*s %.*s",
-           int(clock_name.size()), clock_name.data(), int(subtitle.size()),
-           subtitle.data());
-  StringId name = context_->storage->InternString(counter_name);
+  base::StackString<255> counter_name("%.*s %.*s", int(clock_name.size()),
+                                      clock_name.data(), int(subtitle.size()),
+                                      subtitle.data());
+  StringId name = context_->storage->InternString(counter_name.c_str());
   TrackId track = context_->track_tracker->InternGlobalCounterTrack(name);
   context_->event_tracker->PushCounter(timestamp, static_cast<double>(rate),
                                        track);
@@ -1268,6 +1284,50 @@
   context_->slice_tracker->End(timestamp, track_id);
 }
 
+void FtraceParser::ParseDirectReclaimBegin(int64_t timestamp,
+                                           uint32_t pid,
+                                           ConstBytes blob) {
+  UniqueTid utid = context_->process_tracker->GetOrCreateThread(pid);
+  TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+  protos::pbzero::MmVmscanDirectReclaimBeginFtraceEvent::Decoder
+      direct_reclaim_begin(blob.data, blob.size);
+
+  StringId name_id =
+      context_->storage->InternString("mm_vmscan_direct_reclaim");
+
+  auto args_inserter = [this, &direct_reclaim_begin](
+                           ArgsTracker::BoundInserter* inserter) {
+    inserter->AddArg(direct_reclaim_order_id_,
+                     Variadic::Integer(direct_reclaim_begin.order()));
+    inserter->AddArg(direct_reclaim_may_writepage_id_,
+                     Variadic::Integer(direct_reclaim_begin.may_writepage()));
+    inserter->AddArg(
+        direct_reclaim_gfp_flags_id_,
+        Variadic::UnsignedInteger(direct_reclaim_begin.gfp_flags()));
+  };
+  context_->slice_tracker->Begin(timestamp, track_id, kNullStringId, name_id,
+                                 args_inserter);
+}
+
+void FtraceParser::ParseDirectReclaimEnd(int64_t timestamp,
+                                         uint32_t pid,
+                                         ConstBytes blob) {
+  protos::pbzero::ScmCallEndFtraceEvent::Decoder evt(blob.data, blob.size);
+  UniqueTid utid = context_->process_tracker->GetOrCreateThread(pid);
+  TrackId track_id = context_->track_tracker->InternThreadTrack(utid);
+  protos::pbzero::MmVmscanDirectReclaimEndFtraceEvent::Decoder
+      direct_reclaim_end(blob.data, blob.size);
+
+  auto args_inserter =
+      [this, &direct_reclaim_end](ArgsTracker::BoundInserter* inserter) {
+        inserter->AddArg(
+            direct_reclaim_nr_reclaimed_id_,
+            Variadic::UnsignedInteger(direct_reclaim_end.nr_reclaimed()));
+      };
+  context_->slice_tracker->End(timestamp, track_id, kNullStringId,
+                               kNullStringId, args_inserter);
+}
+
 void FtraceParser::ParseWorkqueueExecuteStart(
     int64_t timestamp,
     uint32_t pid,
@@ -1285,10 +1345,8 @@
     name_id = context_->storage->InternString(
         base::StringView(reinterpret_cast<const char*>(str.data), str.size));
   } else {
-    char slice_name[255];
-    snprintf(slice_name, base::ArraySize(slice_name), "%#" PRIx64,
-             evt.function());
-    name_id = context_->storage->InternString(base::StringView(slice_name));
+    base::StackString<255> slice_name("%#" PRIx64, evt.function());
+    name_id = context_->storage->InternString(slice_name.string_view());
   }
 
   UniqueTid utid = context_->process_tracker->GetOrCreateThread(pid);
@@ -1310,14 +1368,15 @@
                                         int64_t timestamp,
                                         protozero::ConstBytes blob) {
   protos::pbzero::IrqHandlerEntryFtraceEvent::Decoder evt(blob.data, blob.size);
-  char track_name[255];
-  snprintf(track_name, sizeof(track_name), "Irq Cpu %d", cpu);
-  StringId track_name_id = context_->storage->InternString(track_name);
-  char slice_name[255];
+  base::StackString<255> track_name("Irq Cpu %d", cpu);
+  StringId track_name_id =
+      context_->storage->InternString(track_name.string_view());
+
   base::StringView irq_name = evt.name();
-  snprintf(slice_name, sizeof(slice_name), "IRQ (%.*s)", int(irq_name.size()),
-           irq_name.data());
-  StringId slice_name_id = context_->storage->InternString(slice_name);
+  base::StackString<255> slice_name("IRQ (%.*s)", int(irq_name.size()),
+                                    irq_name.data());
+  StringId slice_name_id =
+      context_->storage->InternString(slice_name.string_view());
   TrackId track = context_->track_tracker->InternCpuTrack(track_name_id, cpu);
   context_->slice_tracker->Begin(timestamp, track, irq_id_, slice_name_id);
 }
@@ -1326,14 +1385,13 @@
                                        int64_t timestamp,
                                        protozero::ConstBytes blob) {
   protos::pbzero::IrqHandlerExitFtraceEvent::Decoder evt(blob.data, blob.size);
-  char track_name[255];
-  snprintf(track_name, sizeof(track_name), "Irq Cpu %d", cpu);
-  StringId track_name_id = context_->storage->InternString(track_name);
+  base::StackString<255> track_name("Irq Cpu %d", cpu);
+  StringId track_name_id =
+      context_->storage->InternString(track_name.string_view());
   TrackId track = context_->track_tracker->InternCpuTrack(track_name_id, cpu);
-  char status[255];
-  snprintf(status, sizeof(status), "%s",
-           evt.ret() == 1 ? "handled" : "unhandled");
-  StringId status_id = context_->storage->InternString(status);
+
+  base::StackString<255> status("%s", evt.ret() == 1 ? "handled" : "unhandled");
+  StringId status_id = context_->storage->InternString(status.string_view());
   auto args_inserter = [this,
                         &status_id](ArgsTracker::BoundInserter* inserter) {
     inserter->AddArg(ret_arg_id_, Variadic::String(status_id));
@@ -1345,9 +1403,9 @@
                                      int64_t timestamp,
                                      protozero::ConstBytes blob) {
   protos::pbzero::SoftirqEntryFtraceEvent::Decoder evt(blob.data, blob.size);
-  char track_name[255];
-  snprintf(track_name, sizeof(track_name), "SoftIrq Cpu %d", cpu);
-  StringId track_name_id = context_->storage->InternString(track_name);
+  base::StackString<255> track_name("SoftIrq Cpu %d", cpu);
+  StringId track_name_id =
+      context_->storage->InternString(track_name.string_view());
   auto num_actions = sizeof(kActionNames) / sizeof(*kActionNames);
   if (evt.vec() >= num_actions) {
     PERFETTO_DFATAL("No action name at index %d for softirq event.", evt.vec());
@@ -1363,9 +1421,9 @@
                                     int64_t timestamp,
                                     protozero::ConstBytes blob) {
   protos::pbzero::SoftirqExitFtraceEvent::Decoder evt(blob.data, blob.size);
-  char track_name[255];
-  snprintf(track_name, sizeof(track_name), "SoftIrq Cpu %d", cpu);
-  StringId track_name_id = context_->storage->InternString(track_name);
+  base::StackString<255> track_name("SoftIrq Cpu %d", cpu);
+  StringId track_name_id =
+      context_->storage->InternString(track_name.string_view());
   TrackId track = context_->track_tracker->InternCpuTrack(track_name_id, cpu);
   auto vec = evt.vec();
   auto args_inserter = [this, vec](ArgsTracker::BoundInserter* inserter) {
@@ -1419,11 +1477,10 @@
                                            protozero::ConstBytes blob) {
   protos::pbzero::ThermalTemperatureFtraceEvent::Decoder evt(blob.data,
                                                              blob.size);
-  char counter_name[255];
   base::StringView thermal_zone = evt.thermal_zone();
-  snprintf(counter_name, sizeof(counter_name), "%.*s Temperature",
-           int(thermal_zone.size()), thermal_zone.data());
-  StringId name = context_->storage->InternString(counter_name);
+  base::StackString<255> counter_name(
+      "%.*s Temperature", int(thermal_zone.size()), thermal_zone.data());
+  StringId name = context_->storage->InternString(counter_name.string_view());
   TrackId track = context_->track_tracker->InternGlobalCounterTrack(name);
   context_->event_tracker->PushCounter(timestamp, evt.temp(), track);
 }
@@ -1431,11 +1488,10 @@
 void FtraceParser::ParseCdevUpdate(int64_t timestamp,
                                    protozero::ConstBytes blob) {
   protos::pbzero::CdevUpdateFtraceEvent::Decoder evt(blob.data, blob.size);
-  char counter_name[255];
   base::StringView type = evt.type();
-  snprintf(counter_name, sizeof(counter_name), "%.*s Cooling Device",
-           int(type.size()), type.data());
-  StringId name = context_->storage->InternString(counter_name);
+  base::StackString<255> counter_name("%.*s Cooling Device", int(type.size()),
+                                      type.data());
+  StringId name = context_->storage->InternString(counter_name.string_view());
   TrackId track = context_->track_tracker->InternGlobalCounterTrack(name);
   context_->event_tracker->PushCounter(
       timestamp, static_cast<double>(evt.target()), track);
@@ -1476,18 +1532,16 @@
   if (0 <= evt.cid() && evt.cid() < static_cast<int32_t>(kFastRpcCounterSize)) {
     name = fast_rpc_delta_names_[static_cast<size_t>(evt.cid())];
   } else {
-    char str[64];
-    snprintf(str, sizeof(str), "mem.fastrpc[%" PRId32 "]", evt.cid());
-    name = context_->storage->InternString(str);
+    base::StackString<64> str("mem.fastrpc[%" PRId32 "]", evt.cid());
+    name = context_->storage->InternString(str.string_view());
   }
 
   StringId total_name;
   if (0 <= evt.cid() && evt.cid() < static_cast<int32_t>(kFastRpcCounterSize)) {
     total_name = fast_rpc_total_names_[static_cast<size_t>(evt.cid())];
   } else {
-    char str[64];
-    snprintf(str, sizeof(str), "mem.fastrpc[%" PRId32 "]", evt.cid());
-    total_name = context_->storage->InternString(str);
+    base::StackString<64> str("mem.fastrpc[%" PRId32 "]", evt.cid());
+    total_name = context_->storage->InternString(str.string_view());
   }
 
   // Push the global counter.
diff --git a/src/trace_processor/importers/ftrace/ftrace_parser.h b/src/trace_processor/importers/ftrace/ftrace_parser.h
index 79e1c65..5b5f64b 100644
--- a/src/trace_processor/importers/ftrace/ftrace_parser.h
+++ b/src/trace_processor/importers/ftrace/ftrace_parser.h
@@ -120,6 +120,12 @@
                          uint32_t pid,
                          protozero::ConstBytes);
   void ParseScmCallEnd(int64_t timestamp, uint32_t pid, protozero::ConstBytes);
+  void ParseDirectReclaimBegin(int64_t timestamp,
+                               uint32_t pid,
+                               protozero::ConstBytes);
+  void ParseDirectReclaimEnd(int64_t timestamp,
+                             uint32_t pid,
+                             protozero::ConstBytes);
   void ParseWorkqueueExecuteStart(int64_t timestamp,
                                   uint32_t pid,
                                   protozero::ConstBytes,
@@ -174,6 +180,10 @@
   const StringId workqueue_id_;
   const StringId irq_id_;
   const StringId ret_arg_id_;
+  const StringId direct_reclaim_nr_reclaimed_id_;
+  const StringId direct_reclaim_order_id_;
+  const StringId direct_reclaim_may_writepage_id_;
+  const StringId direct_reclaim_gfp_flags_id_;
   const StringId vec_arg_id_;
   const StringId gpu_mem_total_name_id_;
   const StringId gpu_mem_total_unit_id_;
diff --git a/src/trace_processor/importers/ftrace/ftrace_tokenizer.cc b/src/trace_processor/importers/ftrace/ftrace_tokenizer.cc
index 7253e8a..989a558 100644
--- a/src/trace_processor/importers/ftrace/ftrace_tokenizer.cc
+++ b/src/trace_processor/importers/ftrace/ftrace_tokenizer.cc
@@ -65,11 +65,6 @@
   }
 
   uint32_t cpu = decoder.cpu();
-  if (PERFETTO_UNLIKELY(cpu > kMaxCpus)) {
-    PERFETTO_ELOG("CPU larger than kMaxCpus (%u > %zu)", cpu, kMaxCpus);
-    return base::OkStatus();
-  }
-
   ClockTracker::ClockId clock_id;
   switch (decoder.ftrace_clock()) {
     case FtraceClock::FTRACE_CLOCK_UNSPECIFIED:
diff --git a/src/trace_processor/importers/ftrace/sched_event_tracker.cc b/src/trace_processor/importers/ftrace/sched_event_tracker.cc
index 247c08e..2fc7fc3 100644
--- a/src/trace_processor/importers/ftrace/sched_event_tracker.cc
+++ b/src/trace_processor/importers/ftrace/sched_event_tracker.cc
@@ -58,6 +58,10 @@
         context->storage->InternString(waking_descriptor->fields[i].name);
   }
   sched_waking_id_ = context->storage->InternString(waking_descriptor->name);
+
+  // Pre-allocate space for 128 CPUs, which should be enough for most hosts.
+  // It's OK if this number is too small, the vector will be grown on-demand.
+  pending_sched_per_cpu_.reserve(128);
 }
 
 SchedEventTracker::~SchedEventTracker() = default;
@@ -81,7 +85,6 @@
     return;
   }
   context_->event_tracker->UpdateMaxTimestamp(ts);
-  PERFETTO_DCHECK(cpu < kMaxCpus);
 
   StringId next_comm_id = context_->storage->InternString(next_comm);
   UniqueTid next_utid = context_->process_tracker->UpdateThreadName(
@@ -89,7 +92,7 @@
 
   // First use this data to close the previous slice.
   bool prev_pid_match_prev_next_pid = false;
-  auto* pending_sched = &pending_sched_per_cpu_[cpu];
+  auto* pending_sched = PendingSchedByCPU(cpu);
   uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
   if (pending_slice_idx < std::numeric_limits<uint32_t>::max()) {
     prev_pid_match_prev_next_pid = prev_pid == pending_sched->last_pid;
@@ -135,12 +138,11 @@
     return;
   }
   context_->event_tracker->UpdateMaxTimestamp(ts);
-  PERFETTO_DCHECK(cpu < kMaxCpus);
 
   UniqueTid next_utid = context_->process_tracker->UpdateThreadName(
       next_pid, next_comm_id, ThreadNamePriority::kFtrace);
 
-  auto* pending_sched = &pending_sched_per_cpu_[cpu];
+  auto* pending_sched = PendingSchedByCPU(cpu);
 
   // If we're processing the first compact event for this cpu, don't start a
   // slice since we're missing the "prev_*" fields. The successive events will
@@ -275,14 +277,13 @@
     return;
   }
   context_->event_tracker->UpdateMaxTimestamp(ts);
-  PERFETTO_DCHECK(cpu < kMaxCpus);
 
   // We infer the task that emitted the event (i.e. common_pid) from the
   // scheduling slices. Drop the event if we haven't seen any sched_switch
   // events for this cpu yet.
   // Note that if sched_switch wasn't enabled, we will have to skip all
   // compact waking events.
-  auto* pending_sched = &pending_sched_per_cpu_[cpu];
+  auto* pending_sched = PendingSchedByCPU(cpu);
   if (pending_sched->last_utid == std::numeric_limits<UniqueTid>::max()) {
     context_->storage->IncrementStats(stats::compact_sched_waking_skipped);
     return;
@@ -339,7 +340,11 @@
     slices->mutable_end_state()->Set(row, id);
   }
 
-  pending_sched_per_cpu_ = {};
+  // Re-initialize the pending_sched_per_cpu_ vector with default values, we do
+  // this instead of calling .clear() to avoid having to frequently resize the
+  // vector.
+  std::fill(pending_sched_per_cpu_.begin(), pending_sched_per_cpu_.end(),
+            PendingSchedInfo{});
 }
 
 }  // namespace trace_processor
diff --git a/src/trace_processor/importers/ftrace/sched_event_tracker.h b/src/trace_processor/importers/ftrace/sched_event_tracker.h
index a4e8602..eae4bac 100644
--- a/src/trace_processor/importers/ftrace/sched_event_tracker.h
+++ b/src/trace_processor/importers/ftrace/sched_event_tracker.h
@@ -110,8 +110,16 @@
 
   void ClosePendingSlice(uint32_t slice_idx, int64_t ts, int64_t prev_state);
 
-  // Infromation retained from the preceding sched_switch seen on a given cpu.
-  std::array<PendingSchedInfo, kMaxCpus> pending_sched_per_cpu_{};
+  // Information retained from the preceding sched_switch seen on a given cpu.
+  std::vector<PendingSchedInfo> pending_sched_per_cpu_;
+
+  // Get the sched info for the given CPU, resizing the vector if necessary.
+  PendingSchedInfo* PendingSchedByCPU(uint32_t cpu) {
+    if (PERFETTO_UNLIKELY(cpu >= pending_sched_per_cpu_.size())) {
+      pending_sched_per_cpu_.resize(cpu + 1);
+    }
+    return &pending_sched_per_cpu_[cpu];
+  }
 
   static constexpr uint8_t kSchedSwitchMaxFieldId = 7;
   std::array<StringId, kSchedSwitchMaxFieldId + 1> sched_switch_field_ids_;
diff --git a/src/trace_processor/importers/memory_tracker/memory_allocator_node_id.cc b/src/trace_processor/importers/memory_tracker/memory_allocator_node_id.cc
index a52c7fa..6fe4d4a 100644
--- a/src/trace_processor/importers/memory_tracker/memory_allocator_node_id.cc
+++ b/src/trace_processor/importers/memory_tracker/memory_allocator_node_id.cc
@@ -21,6 +21,7 @@
 #include <cinttypes>
 
 #include "perfetto/base/logging.h"
+#include "perfetto/ext/base/string_utils.h"
 
 namespace perfetto {
 namespace trace_processor {
@@ -33,9 +34,8 @@
   size_t max_size = 19;  // Max uint64 is 0xFFFFFFFFFFFFFFFF + 1 for null byte.
   std::string buf;
   buf.resize(max_size);
-  auto final_size = snprintf(&buf[0], max_size, "%" PRIu64, id_);
-  PERFETTO_DCHECK(final_size >= 0);
-  buf.resize(static_cast<size_t>(final_size));  // Cuts off the final null byte.
+  size_t final_size = base::SprintfTrunc(&buf[0], max_size, "%" PRIu64, id_);
+  buf.resize(final_size);  // Cuts off the final null byte.
   return buf;
 }
 
diff --git a/src/trace_processor/importers/ninja/ninja_log_parser.cc b/src/trace_processor/importers/ninja/ninja_log_parser.cc
index af668f1..1e90419 100644
--- a/src/trace_processor/importers/ninja/ninja_log_parser.cc
+++ b/src/trace_processor/importers/ninja/ninja_log_parser.cc
@@ -167,9 +167,8 @@
     } else {
       // All workers are busy, allocate a new one.
       uint32_t worker_id = ++last_worker_id;
-      char name[32];
-      snprintf(name, sizeof(name), "Worker %zu", workers.size() + 1);
-      StringId name_id = ctx_->storage->InternString(name);
+      base::StackString<32> name("Worker %zu", workers.size() + 1);
+      StringId name_id = ctx_->storage->InternString(name.string_view());
       auto utid = ctx_->process_tracker->UpdateThread(worker_id, job.build_id);
       ctx_->process_tracker->UpdateThreadNameByUtid(utid, name_id,
                                                     ThreadNamePriority::kOther);
diff --git a/src/trace_processor/importers/proto/android_probes_parser.cc b/src/trace_processor/importers/proto/android_probes_parser.cc
index c001cb5..882191d 100644
--- a/src/trace_processor/importers/proto/android_probes_parser.cc
+++ b/src/trace_processor/importers/proto/android_probes_parser.cc
@@ -17,6 +17,7 @@
 #include "src/trace_processor/importers/proto/android_probes_parser.h"
 
 #include "perfetto/base/logging.h"
+#include "perfetto/ext/base/string_utils.h"
 #include "perfetto/ext/traced/sys_stats_counters.h"
 #include "src/trace_processor/importers/common/args_tracker.h"
 #include "src/trace_processor/importers/common/clock_tracker.h"
@@ -140,24 +141,27 @@
   char* arg_str = &arg_msg[0];
   *arg_str = '\0';
   auto arg_avail = [&arg_msg, &arg_str]() {
-    return sizeof(arg_msg) - static_cast<size_t>(arg_str - arg_msg);
+    size_t used = static_cast<size_t>(arg_str - arg_msg);
+    PERFETTO_CHECK(used <= sizeof(arg_msg));
+    return sizeof(arg_msg) - used;
   };
   for (auto it = evt.args(); it; ++it) {
     protos::pbzero::AndroidLogPacket::LogEvent::Arg::Decoder arg(*it);
     if (!arg.has_name())
       continue;
-    arg_str +=
-        snprintf(arg_str, arg_avail(),
-                 " %.*s=", static_cast<int>(arg.name().size), arg.name().data);
+    arg_str += base::SprintfTrunc(arg_str, arg_avail(),
+                                  " %.*s=", static_cast<int>(arg.name().size),
+                                  arg.name().data);
     if (arg.has_string_value()) {
-      arg_str += snprintf(arg_str, arg_avail(), "\"%.*s\"",
-                          static_cast<int>(arg.string_value().size),
-                          arg.string_value().data);
+      arg_str += base::SprintfTrunc(arg_str, arg_avail(), "\"%.*s\"",
+                                    static_cast<int>(arg.string_value().size),
+                                    arg.string_value().data);
     } else if (arg.has_int_value()) {
-      arg_str += snprintf(arg_str, arg_avail(), "%" PRId64, arg.int_value());
+      arg_str +=
+          base::SprintfTrunc(arg_str, arg_avail(), "%" PRId64, arg.int_value());
     } else if (arg.has_float_value()) {
-      arg_str += snprintf(arg_str, arg_avail(), "%f",
-                          static_cast<double>(arg.float_value()));
+      arg_str += base::SprintfTrunc(arg_str, arg_avail(), "%f",
+                                    static_cast<double>(arg.float_value()));
     }
   }
 
diff --git a/src/trace_processor/importers/proto/flamegraph_construction_algorithms.cc b/src/trace_processor/importers/proto/flamegraph_construction_algorithms.cc
index 5882e12..1579f4a 100644
--- a/src/trace_processor/importers/proto/flamegraph_construction_algorithms.cc
+++ b/src/trace_processor/importers/proto/flamegraph_construction_algorithms.cc
@@ -16,6 +16,12 @@
 
 #include "flamegraph_construction_algorithms.h"
 
+#include <set>
+#include <unordered_set>
+
+#include "perfetto/ext/base/string_splitter.h"
+#include "perfetto/ext/base/string_utils.h"
+
 namespace perfetto {
 namespace trace_processor {
 
@@ -85,7 +91,8 @@
 
 static FlamegraphTableAndMergedCallsites BuildFlamegraphTableTreeStructure(
     TraceStorage* storage,
-    UniquePid upid,
+    base::Optional<UniquePid> upid,
+    base::Optional<std::string> upid_group,
     base::Optional<int64_t> timestamp,
     StringId profile_type) {
   const tables::StackProfileCallsiteTable& callsites_tbl =
@@ -141,7 +148,12 @@
         if (timestamp) {
           row.ts = *timestamp;
         }
-        row.upid = upid;
+        if (upid) {
+          row.upid = *upid;
+        }
+        if (upid_group) {
+          row.upid_group = storage->InternString(base::StringView(*upid_group));
+        }
         row.profile_type = profile_type;
         row.name = merged_callsite.frame_name;
         row.map_name = merged_callsite.mapping_name;
@@ -322,8 +334,8 @@
   }
   StringId profile_type = storage->InternString("native");
   FlamegraphTableAndMergedCallsites table_and_callsites =
-      BuildFlamegraphTableTreeStructure(
-          storage, upid, base::make_optional(timestamp), profile_type);
+      BuildFlamegraphTableTreeStructure(storage, upid, base::nullopt, timestamp,
+                                        profile_type);
   return BuildFlamegraphTableHeapSizeAndCount(
       std::move(table_and_callsites.tbl),
       table_and_callsites.callsite_to_merged_callsite, filtered);
@@ -332,17 +344,37 @@
 std::unique_ptr<tables::ExperimentalFlamegraphNodesTable>
 BuildNativeCallStackSamplingFlamegraph(
     TraceStorage* storage,
-    UniquePid upid,
+    base::Optional<UniquePid> upid,
+    base::Optional<std::string> upid_group,
     const std::vector<TimeConstraints>& time_constraints) {
-  // 1.Create set of all utids mapped to the given upid
+  // 1.Extract required upids from input.
+  std::unordered_set<UniquePid> upids;
+  if (upid) {
+    upids.insert(*upid);
+  } else {
+    for (base::StringSplitter sp(*upid_group, ','); sp.Next();) {
+      base::Optional<uint32_t> maybe = base::CStringToUInt32(sp.cur_token());
+      if (maybe) {
+        upids.insert(*maybe);
+      }
+    }
+  }
+
+  // 2.Create set of all utids mapped to the given vector of upids
   std::set<tables::ThreadTable::Id> utids;
-  RowMap threads_in_pid_rm = storage->thread_table().FilterToRowMap(
-      {storage->thread_table().upid().eq(upid)});
+  RowMap threads_in_pid_rm;
+  for (uint32_t i = 0; i < storage->thread_table().row_count(); ++i) {
+    base::Optional<uint32_t> row_upid = storage->thread_table().upid()[i];
+    if (row_upid && upids.count(*row_upid) > 0) {
+      threads_in_pid_rm.Insert(i);
+    }
+  }
+
   for (auto it = threads_in_pid_rm.IterateRows(); it; it.Next()) {
     utids.insert(storage->thread_table().id()[it.row()]);
   }
 
-  // 2.Get all row indices in perf_sample that correspond to the requested utids
+  // 3.Get all row indices in perf_sample that correspond to the requested utids
   std::vector<uint32_t> cs_rows;
   for (uint32_t i = 0; i < storage->perf_sample_table().row_count(); ++i) {
     if (utids.find(static_cast<tables::ThreadTable::Id>(
@@ -351,15 +383,16 @@
     }
   }
 
-  // 3.Filter rows that correspond to the selected utids
+  // 4.Filter rows that correspond to the selected utids
   RowMap filtered_rm = RowMap(std::move(cs_rows));
   Table filtered = storage->perf_sample_table().Apply(std::move(filtered_rm));
 
-  // 4.Filter rows by time constraints
+  // 5.Filter rows by time constraints
   for (const auto& tc : time_constraints) {
     if (!(tc.op == FilterOp::kGt || tc.op == FilterOp::kLt ||
           tc.op == FilterOp::kGe || tc.op == FilterOp::kLe)) {
-      PERFETTO_FATAL("Filter operation %d not permitted for perf.", tc.op);
+      PERFETTO_FATAL("Filter operation %d not permitted for perf.",
+                     static_cast<int>(tc.op));
     }
     Constraint cs = Constraint{
         static_cast<uint32_t>(tables::PerfSampleTable::ColumnIndex::ts), tc.op,
@@ -372,8 +405,8 @@
   }
   StringId profile_type = storage->InternString("perf");
   FlamegraphTableAndMergedCallsites table_and_callsites =
-      BuildFlamegraphTableTreeStructure(storage, upid, base::nullopt,
-                                        profile_type);
+      BuildFlamegraphTableTreeStructure(storage, upid, upid_group,
+                                        base::nullopt, profile_type);
   return BuildFlamegraphTableCallstackSizeAndCount(
       std::move(table_and_callsites.tbl),
       table_and_callsites.callsite_to_merged_callsite, filtered);
diff --git a/src/trace_processor/importers/proto/flamegraph_construction_algorithms.h b/src/trace_processor/importers/proto/flamegraph_construction_algorithms.h
index f5cf8b1..b175565 100644
--- a/src/trace_processor/importers/proto/flamegraph_construction_algorithms.h
+++ b/src/trace_processor/importers/proto/flamegraph_construction_algorithms.h
@@ -17,8 +17,6 @@
 #ifndef SRC_TRACE_PROCESSOR_IMPORTERS_PROTO_FLAMEGRAPH_CONSTRUCTION_ALGORITHMS_H_
 #define SRC_TRACE_PROCESSOR_IMPORTERS_PROTO_FLAMEGRAPH_CONSTRUCTION_ALGORITHMS_H_
 
-#include <set>
-
 #include "src/trace_processor/storage/trace_storage.h"
 
 namespace perfetto {
@@ -38,7 +36,8 @@
 std::unique_ptr<tables::ExperimentalFlamegraphNodesTable>
 BuildNativeCallStackSamplingFlamegraph(
     TraceStorage* storage,
-    UniquePid upid,
+    base::Optional<UniquePid> upid,
+    base::Optional<std::string> upid_group,
     const std::vector<TimeConstraints>& time_constraints);
 }  // namespace trace_processor
 }  // namespace perfetto
diff --git a/src/trace_processor/importers/proto/gpu_event_parser.cc b/src/trace_processor/importers/proto/gpu_event_parser.cc
index 8211b6c..414a820 100644
--- a/src/trace_processor/importers/proto/gpu_event_parser.cc
+++ b/src/trace_processor/importers/proto/gpu_event_parser.cc
@@ -238,9 +238,8 @@
     if (stage_id < gpu_render_stage_ids_.size()) {
       stage_name = gpu_render_stage_ids_[static_cast<size_t>(stage_id)].first;
     } else {
-      char buffer[64];
-      snprintf(buffer, sizeof(buffer), "render stage(%" PRIu64 ")", stage_id);
-      stage_name = context_->storage->InternString(buffer);
+      base::StackString<64> name("render stage(%" PRIu64 ")", stage_id);
+      stage_name = context_->storage->InternString(name.string_view());
     }
   }
   return stage_name;
diff --git a/src/trace_processor/importers/proto/heap_graph_tracker_unittest.cc b/src/trace_processor/importers/proto/heap_graph_tracker_unittest.cc
index 26b0ca5..d1293fe 100644
--- a/src/trace_processor/importers/proto/heap_graph_tracker_unittest.cc
+++ b/src/trace_processor/importers/proto/heap_graph_tracker_unittest.cc
@@ -28,18 +28,18 @@
 using ::testing::UnorderedElementsAre;
 
 TEST(HeapGraphTrackerTest, PackageFromLocationApp) {
-  TraceStorage storage;
+  std::unique_ptr<TraceStorage> storage(new TraceStorage());
   EXPECT_EQ(
-      PackageFromLocation(&storage,
+      PackageFromLocation(storage.get(),
                           "/data/app/~~ASDFGH1234QWerT==/"
                           "com.twitter.android-MNBVCX7890SDTst6==/test.apk"),
       "com.twitter.android");
   EXPECT_EQ(PackageFromLocation(
-                &storage,
+                storage.get(),
                 "/data/app/com.google.android.webview-6XfQhnaSkFwGK0sYL9is0G==/"
                 "base.apk"),
             "com.google.android.webview");
-  EXPECT_EQ(PackageFromLocation(&storage,
+  EXPECT_EQ(PackageFromLocation(storage.get(),
                                 "/data/app/"
                                 "com.google.android.apps.wellbeing-"
                                 "qfQCaB4uJ7P0OPpZQqOu0Q==/oat/arm64/base.odex"),
diff --git a/src/trace_processor/importers/proto/metadata_tracker.cc b/src/trace_processor/importers/proto/metadata_tracker.cc
index a0f9c3d..a71bfb2 100644
--- a/src/trace_processor/importers/proto/metadata_tracker.cc
+++ b/src/trace_processor/importers/proto/metadata_tracker.cc
@@ -110,6 +110,7 @@
     case Variadic::Type::kPointer:
     case Variadic::Type::kUint:
     case Variadic::Type::kReal:
+    case Variadic::Type::kNull:
       PERFETTO_FATAL("Unsupported value type");
   }
 }
diff --git a/src/trace_processor/importers/proto/perf_sample_tracker.cc b/src/trace_processor/importers/proto/perf_sample_tracker.cc
index 13e9b19..042f33c 100644
--- a/src/trace_processor/importers/proto/perf_sample_tracker.cc
+++ b/src/trace_processor/importers/proto/perf_sample_tracker.cc
@@ -19,6 +19,7 @@
 
 #include <cinttypes>
 
+#include "perfetto/ext/base/string_utils.h"
 #include "src/trace_processor/importers/common/track_tracker.h"
 #include "src/trace_processor/storage/trace_storage.h"
 #include "src/trace_processor/types/trace_processor_context.h"
@@ -72,11 +73,10 @@
     PerfEvents::RawEvent::Decoder raw(timebase.raw_event());
     // This doesn't follow any pre-existing naming scheme, but aims to be a
     // short-enough default that is distinguishable.
-    char buf[128] = {};
-    snprintf(buf, sizeof(buf),
-             "raw.0x%" PRIx32 ".0x%" PRIx64 ".0x%" PRIx64 ".0x%" PRIx64,
-             raw.type(), raw.config(), raw.config1(), raw.config2());
-    return context->storage->InternString(buf);
+    base::StackString<128> name(
+        "raw.0x%" PRIx32 ".0x%" PRIx64 ".0x%" PRIx64 ".0x%" PRIx64, raw.type(),
+        raw.config(), raw.config1(), raw.config2());
+    return context->storage->InternString(name.string_view());
   }
 
   PERFETTO_DLOG("Could not name the perf timebase counter");
diff --git a/src/trace_processor/importers/proto/system_probes_parser.cc b/src/trace_processor/importers/proto/system_probes_parser.cc
index 214c0fa..c4fe3f5 100644
--- a/src/trace_processor/importers/proto/system_probes_parser.cc
+++ b/src/trace_processor/importers/proto/system_probes_parser.cc
@@ -124,10 +124,10 @@
     auto key = static_cast<base::StringView>(vm.key());
     // Append " Frequency" to align names with FtraceParser::ParseClockSetRate
     base::StringView devfreq_subtitle("Frequency");
-    char counter_name[255];
-    snprintf(counter_name, sizeof(counter_name), "%.*s %.*s", int(key.size()),
-             key.data(), int(devfreq_subtitle.size()), devfreq_subtitle.data());
-    StringId name = context_->storage->InternString(counter_name);
+    base::StackString<255> counter_name(
+        "%.*s %.*s", int(key.size()), key.data(), int(devfreq_subtitle.size()),
+        devfreq_subtitle.data());
+    StringId name = context_->storage->InternString(counter_name.string_view());
     TrackId track = context_->track_tracker->InternGlobalCounterTrack(name);
     context_->event_tracker->PushCounter(ts, static_cast<double>(vm.value()),
                                          track);
diff --git a/src/trace_processor/importers/proto/track_event_parser.cc b/src/trace_processor/importers/proto/track_event_parser.cc
index 6be84c5..5b14db0 100644
--- a/src/trace_processor/importers/proto/track_event_parser.cc
+++ b/src/trace_processor/importers/proto/track_event_parser.cc
@@ -116,6 +116,11 @@
                                     base::StringView(key.key), &storage_,
                                     &inserter_);
   }
+  void AddNull(const Key& key) final {
+    inserter_.AddArg(storage_.InternString(base::StringView(key.flat_key)),
+                     storage_.InternString(base::StringView(key.key)),
+                     Variadic::Null());
+  }
 
   size_t GetArrayEntryIndex(const std::string& array_key) final {
     return inserter_.GetNextArrayEntryIndex(
diff --git a/src/trace_processor/metrics/BUILD.gn b/src/trace_processor/metrics/BUILD.gn
index 8311cde..72eb2e4 100644
--- a/src/trace_processor/metrics/BUILD.gn
+++ b/src/trace_processor/metrics/BUILD.gn
@@ -19,6 +19,7 @@
   "trace_metadata.sql",
   "trace_stats.sql",
   "android/android_batt.sql",
+  "android/android_camera.sql",
   "android/android_cpu.sql",
   "android/android_surfaceflinger.sql",
   "android/android_cpu_agg.sql",
@@ -38,6 +39,7 @@
   "android/android_lmk.sql",
   "android/android_powrails.sql",
   "android/android_proxy_power.sql",
+  "android/android_simpleperf.sql",
   "android/android_startup_launches.sql",
   "android/android_startup.sql",
   "android/android_package_list.sql",
diff --git a/src/trace_processor/metrics/android/android_camera.sql b/src/trace_processor/metrics/android/android_camera.sql
new file mode 100644
index 0000000..f8ba2af
--- /dev/null
+++ b/src/trace_processor/metrics/android/android_camera.sql
@@ -0,0 +1,117 @@
+--
+-- Copyright 2019 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+-- This gives us access to the RSS breakdowns.
+SELECT RUN_METRIC('android/process_mem.sql');
+
+-- Compute DMA spans.
+SELECT RUN_METRIC('android/global_counter_span_view.sql',
+  'table_name', 'dma',
+   'counter_name', 'mem.dma_heap');
+
+-- RSS of GCA.
+DROP VIEW IF EXISTS rss_gca;
+CREATE VIEW rss_gca AS
+SELECT ts, dur, rss_val AS gca_rss_val
+FROM rss_and_swap_span
+JOIN (
+  SELECT max(start_ts), upid
+  FROM process
+  WHERE name LIKE '%GoogleCamera%'
+  LIMIT 1
+) AS gca USING (upid);
+
+-- RSS of camera HAL.
+DROP VIEW IF EXISTS rss_camera_hal;
+CREATE VIEW rss_camera_hal AS
+SELECT ts, dur, rss_val AS hal_rss_val
+FROM rss_and_swap_span
+JOIN (
+  SELECT max(start_ts), upid
+  FROM process
+  WHERE name LIKE '%camera.provider%'
+  LIMIT 1
+) AS hal USING (upid);
+
+-- RSS of cameraserver.
+DROP VIEW IF EXISTS rss_cameraserver;
+CREATE VIEW rss_cameraserver AS
+SELECT ts, dur, rss_val AS cameraserver_rss_val
+FROM rss_and_swap_span
+JOIN (
+  SELECT max(start_ts), upid
+  FROM process
+  WHERE name = 'cameraserver'
+  LIMIT 1
+) AS cameraserver USING (upid);
+
+-- RSS of GCA + HAL.
+DROP TABLE IF EXISTS rss_gca_hal;
+CREATE VIRTUAL TABLE rss_gca_hal
+USING SPAN_OUTER_JOIN(rss_gca, rss_camera_hal);
+
+-- RSS of GCA + HAL + cameraserver.
+DROP TABLE IF EXISTS rss_all_camera;
+CREATE VIRTUAL TABLE rss_all_camera
+USING SPAN_OUTER_JOIN(rss_gca_hal, rss_cameraserver);
+
+-- RSS of GCA + HAL + cameraserver + DMA.
+DROP TABLE IF EXISTS rss_and_dma_all_camera_join;
+CREATE VIRTUAL TABLE rss_and_dma_all_camera_join
+USING SPAN_OUTER_JOIN(dma_span, rss_all_camera);
+
+DROP VIEW IF EXISTS rss_and_dma_all_camera_span;
+CREATE VIEW rss_and_dma_all_camera_span AS
+SELECT
+  ts,
+  dur,
+  CAST(
+    IFNULL(gca_rss_val, 0) +
+    IFNULL(hal_rss_val, 0) +
+    IFNULL(cameraserver_rss_val, 0) +
+    IFNULL(dma_val, 0) AS int) AS rss_and_dma_val
+FROM rss_and_dma_all_camera_join;
+
+-- we are dividing and casting to real when calculating avg_value
+-- to avoid issues such as the one in b/203613535
+DROP VIEW IF EXISTS rss_and_dma_all_camera_stats;
+CREATE VIEW rss_and_dma_all_camera_stats AS
+SELECT
+  MIN(rss_and_dma_val) AS min_value,
+  MAX(rss_and_dma_val) AS max_value,
+  SUM(rss_and_dma_val * dur / 1e3) / SUM(dur / 1e3) AS avg_value
+FROM rss_and_dma_all_camera_span;
+
+DROP VIEW IF EXISTS android_camera_event;
+CREATE VIEW android_camera_event AS
+SELECT
+  'counter' AS track_type,
+  'Camera Memory' AS track_name,
+  ts,
+  rss_and_dma_val AS value
+FROM rss_and_dma_all_camera_span;
+
+DROP VIEW IF EXISTS android_camera_output;
+CREATE VIEW android_camera_output AS
+SELECT
+  AndroidCameraMetric(
+    'gc_rss_and_dma', AndroidCameraMetric_Counter(
+      'min', CAST(min_value AS real),
+      'max', CAST(max_value AS real),
+      'avg', CAST(avg_value AS real)
+    )
+  )
+FROM rss_and_dma_all_camera_stats;
diff --git a/src/trace_processor/metrics/android/android_simpleperf.sql b/src/trace_processor/metrics/android/android_simpleperf.sql
new file mode 100644
index 0000000..fa58575
--- /dev/null
+++ b/src/trace_processor/metrics/android/android_simpleperf.sql
@@ -0,0 +1,40 @@
+--
+-- Copyright 2021 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+
+
+-- Find all counters from track that satisfies regex 'slc/qurg2_(wr|rd):lvl=0x(0|1|3|7)%'
+DROP VIEW IF EXISTS all_qurg2;
+CREATE VIEW all_qurg2 AS
+SELECT
+  ts,
+  track_id,
+  name,
+  value
+FROM counters
+WHERE name LIKE 'slc/qurg2\___:lvl=0x_%' ESCAPE '\';
+
+-- Find all counters from track that satisfies regex 'slc/qurg2_(wr|rd):lvl=0x(1|3|7)%'
+DROP VIEW IF EXISTS non_zero_qurg2;
+CREATE VIEW non_zero_qurg2 AS
+SELECT
+  *
+FROM all_qurg2
+WHERE name NOT LIKE 'slc/qurg2\___:lvl=0x0%' ESCAPE '\';
+
+DROP VIEW IF EXISTS android_simpleperf_output;
+CREATE VIEW android_simpleperf_output AS
+SELECT AndroidSimpleperfMetric(
+  'urgent_ratio', (SELECT sum(value) FROM non_zero_qurg2) / (SELECT sum(value) FROM all_qurg2)
+);
diff --git a/src/trace_processor/metrics/android/android_sysui_cuj_jank_query.sql b/src/trace_processor/metrics/android/android_sysui_cuj_jank_query.sql
index 0ed0465..1a4e691 100644
--- a/src/trace_processor/metrics/android/android_sysui_cuj_jank_query.sql
+++ b/src/trace_processor/metrics/android/android_sysui_cuj_jank_query.sql
@@ -33,7 +33,7 @@
 
 DROP VIEW IF EXISTS {{table_name_prefix}}_per_cuj_output_data;
 CREATE VIEW {{table_name_prefix}}_per_cuj_output_data AS
-SELECT SUM(dur) as dur_sum
+SELECT SUM(dur) as dur_sum, MAX(dur) as dur_max
 FROM {{table_name_prefix}}_cuj_join_table;
 
 DROP VIEW IF EXISTS {{table_name_prefix}}_per_frame_output_data;
@@ -43,7 +43,8 @@
 f.vsync,
 f.dur_frame,
 f.app_missed,
-SUM(jt.dur) as dur_sum
+SUM(jt.dur) as dur_sum,
+MAX(jt.dur) as dur_max
 FROM android_sysui_cuj_missed_frames f
 JOIN {{table_name_prefix}}_frame_join_table jt USING (frame_number)
 GROUP BY f.frame_number, f.vsync, f.dur_frame, f.app_missed;
diff --git a/src/trace_processor/metrics/android/process_mem.sql b/src/trace_processor/metrics/android/process_mem.sql
index af2d726..908f11d 100644
--- a/src/trace_processor/metrics/android/process_mem.sql
+++ b/src/trace_processor/metrics/android/process_mem.sql
@@ -68,16 +68,20 @@
 DROP VIEW IF EXISTS rss_and_swap_span;
 CREATE VIEW rss_and_swap_span AS
 SELECT
-ts, dur, upid,
-CAST(IFNULL(file_rss_val, 0) AS INT) file_rss_val,
-CAST(IFNULL(anon_rss_val, 0) AS INT) anon_rss_val,
-CAST(IFNULL(shmem_rss_val, 0) AS INT) shmem_rss_val,
-CAST(IFNULL(swap_val, 0) AS INT) swap_val,
-CAST(
-  IFNULL(anon_rss_val, 0)
-  + IFNULL(swap_val, 0)
-  + IFNULL(file_rss_val, 0)
-  + IFNULL(shmem_rss_val, 0) AS int) AS rss_and_swap_val
+  ts, dur, upid,
+  CAST(IFNULL(file_rss_val, 0) AS INT) file_rss_val,
+  CAST(IFNULL(anon_rss_val, 0) AS INT) anon_rss_val,
+  CAST(IFNULL(shmem_rss_val, 0) AS INT) shmem_rss_val,
+  CAST(IFNULL(swap_val, 0) AS INT) swap_val,
+  CAST(
+    IFNULL(anon_rss_val, 0) +
+    IFNULL(file_rss_val, 0) +
+    IFNULL(shmem_rss_val, 0) AS int) AS rss_val,
+  CAST(
+    IFNULL(anon_rss_val, 0)
+    + IFNULL(swap_val, 0)
+    + IFNULL(file_rss_val, 0)
+    + IFNULL(shmem_rss_val, 0) AS int) AS rss_and_swap_val
 FROM rss_and_swap_join;
 
 -- If we have dalvik events enabled (for ART trace points) we can construct the java heap timeline.
diff --git a/src/trace_processor/metrics/metrics.cc b/src/trace_processor/metrics/metrics.cc
index 6c09c58..f0c706b 100644
--- a/src/trace_processor/metrics/metrics.cc
+++ b/src/trace_processor/metrics/metrics.cc
@@ -685,8 +685,12 @@
   }
 
   for (const auto& query : base::SplitString(sql, ";\n")) {
+    const auto& trimmed = base::TrimLeading(query);
+    if (trimmed.empty())
+      continue;
+
     std::string buffer;
-    int ret = TemplateReplace(query, substitutions, &buffer);
+    int ret = TemplateReplace(trimmed, substitutions, &buffer);
     if (ret) {
       char* error = sqlite3_mprintf(
           "RUN_METRIC: Error when performing substitutions: %s", query.c_str());
@@ -777,12 +781,13 @@
       return base::ErrStatus("Unknown metric %s", name.c_str());
 
     const auto& sql_metric = *metric_it;
-    auto queries = base::SplitString(sql_metric.sql, ";\n");
-    for (const auto& query : queries) {
-      PERFETTO_DLOG("Executing query: %s", query.c_str());
-      auto prep_it = tp->ExecuteQuery(query);
-      prep_it.Next();
-      RETURN_IF_ERROR(prep_it.Status());
+    for (const auto& outer : base::SplitString(sql_metric.sql, ";\n")) {
+      for (const auto& query : base::SplitString(outer, ";\r\n")) {
+        PERFETTO_DLOG("Executing query: %s", query.c_str());
+        auto prep_it = tp->ExecuteQuery(query);
+        prep_it.Next();
+        RETURN_IF_ERROR(prep_it.Status());
+      }
     }
 
     auto output_query =
diff --git a/src/trace_processor/python/perfetto/trace_processor/api.py b/src/trace_processor/python/perfetto/trace_processor/api.py
index 8a31b81..1bf499f 100644
--- a/src/trace_processor/python/perfetto/trace_processor/api.py
+++ b/src/trace_processor/python/perfetto/trace_processor/api.py
@@ -112,27 +112,26 @@
         import numpy as np
         import pandas as pd
 
-        df = pd.DataFrame(columns=self.__column_names)
-
         # Populate the dataframe with the query results
+        rows = []
         for i in range(0, self.__count):
           row = []
           base_cell_index = i * self.__column_count
-          for num, column_name in enumerate(self.__column_names):
+          for num in range(len(self.__column_names)):
             col_type = self.__cells[base_cell_index + num]
             if col_type == TraceProcessor.QUERY_CELL_INVALID_FIELD_ID:
               raise TraceProcessorException('Invalid cell type')
-            elif col_type != TraceProcessor.QUERY_CELL_NULL_FIELD_ID:
+
+            if col_type == TraceProcessor.QUERY_CELL_NULL_FIELD_ID:
+              row.append(None)
+            else:
               col_index = self.__data_lists_index[col_type]
               self.__data_lists_index[col_type] += 1
               row.append(self.__data_lists[col_type][col_index])
-            else:
-              row.append(None)
+          rows.append(row)
 
-          df.loc[-1] = row
-          df.index = df.index + 1
-        ordered_df = df.reset_index(drop=True)
-        return ordered_df
+        df = pd.DataFrame(rows, columns=self.__column_names)
+        return df.where(df.notnull(), None).reset_index(drop=True)
 
       except ModuleNotFoundError:
         raise TraceProcessorException(
diff --git a/src/trace_processor/python/perfetto/trace_processor/metrics.descriptor b/src/trace_processor/python/perfetto/trace_processor/metrics.descriptor
index c909308..6bb7dbf 100644
--- a/src/trace_processor/python/perfetto/trace_processor/metrics.descriptor
+++ b/src/trace_processor/python/perfetto/trace_processor/metrics.descriptor
Binary files differ
diff --git a/src/trace_processor/python/perfetto/trace_processor/metrics.descriptor.sha1 b/src/trace_processor/python/perfetto/trace_processor/metrics.descriptor.sha1
index 8f474c4..b00e60a 100644
--- a/src/trace_processor/python/perfetto/trace_processor/metrics.descriptor.sha1
+++ b/src/trace_processor/python/perfetto/trace_processor/metrics.descriptor.sha1
@@ -2,5 +2,5 @@
 // SHA1(tools/gen_binary_descriptors)
 // 9fc6d77de57ec76a80b76aa282f4c7cf5ce55eec
 // SHA1(protos/perfetto/metrics/metrics.proto)
-// d861d7b0dd079940872852cbb4a4dfc3d67ac672
+// 8130cb1a62e55e42bb97a682ca0503161460b4ee
   
\ No newline at end of file
diff --git a/src/trace_processor/python/setup.py b/src/trace_processor/python/setup.py
index c826824..3c1598e 100644
--- a/src/trace_processor/python/setup.py
+++ b/src/trace_processor/python/setup.py
@@ -5,13 +5,13 @@
     packages=['perfetto', 'perfetto.trace_processor'],
     package_data={'perfetto.trace_processor': ['*.descriptor']},
     include_package_data=True,
-    version='0.2.9',
+    version='0.3.0',
     license='apache-2.0',
     description='Python API for Perfetto\'s Trace Processor',
     author='Perfetto',
     author_email='perfetto-pypi@google.com',
     url='https://perfetto.dev/',
-    download_url='https://github.com/google/perfetto/archive/v6.0.tar.gz',
+    download_url='https://github.com/google/perfetto/archive/refs/tags/v20.1.tar.gz',
     keywords=['trace processor', 'tracing', 'perfetto'],
     install_requires=[
         'protobuf',
diff --git a/src/trace_processor/rpc/httpd.cc b/src/trace_processor/rpc/httpd.cc
index b46bd87..654c566 100644
--- a/src/trace_processor/rpc/httpd.cc
+++ b/src/trace_processor/rpc/httpd.cc
@@ -351,8 +351,8 @@
 
   if (req.uri == "/rpc") {
     // Start the chunked reply.
-    strncpy(transfer_encoding_hdr, "Transfer-Encoding: chunked",
-            sizeof(transfer_encoding_hdr));
+    base::StringCopy(transfer_encoding_hdr, "Transfer-Encoding: chunked",
+                     sizeof(transfer_encoding_hdr));
     base::UnixSocket* cli_sock = client->sock.get();
     HttpReply(cli_sock, "200 OK", headers, nullptr, kOmitContentLength);
 
@@ -405,8 +405,8 @@
     std::vector<uint8_t> response;
 
     // Start the chunked reply.
-    strncpy(transfer_encoding_hdr, "Transfer-Encoding: chunked",
-            sizeof(transfer_encoding_hdr));
+    base::StringCopy(transfer_encoding_hdr, "Transfer-Encoding: chunked",
+                     sizeof(transfer_encoding_hdr));
     base::UnixSocket* cli_sock = client->sock.get();
     HttpReply(cli_sock, "200 OK", headers, nullptr, kOmitContentLength);
 
diff --git a/src/trace_processor/sqlite/BUILD.gn b/src/trace_processor/sqlite/BUILD.gn
index 71b6918..e19fbbf 100644
--- a/src/trace_processor/sqlite/BUILD.gn
+++ b/src/trace_processor/sqlite/BUILD.gn
@@ -51,6 +51,7 @@
       "../importers/common",
       "../storage",
       "../types",
+      "../util",
     ]
   }
 
diff --git a/src/trace_processor/sqlite/query_constraints.cc b/src/trace_processor/sqlite/query_constraints.cc
index 9ff242f..942fe47 100644
--- a/src/trace_processor/sqlite/query_constraints.cc
+++ b/src/trace_processor/sqlite/query_constraints.cc
@@ -21,6 +21,7 @@
 #include <string>
 
 #include "perfetto/ext/base/string_splitter.h"
+#include "perfetto/ext/base/string_utils.h"
 
 namespace perfetto {
 namespace trace_processor {
@@ -100,9 +101,7 @@
   // The last char is a "," so overwriting with the null terminator on purpose.
   SqliteString result(
       static_cast<char*>(sqlite3_malloc(static_cast<int>(str_result.size()))));
-  strncpy(result.get(), str_result.c_str(), str_result.size());
-  (*result)[str_result.size() - 1] = '\0';
-
+  base::StringCopy(result.get(), str_result.c_str(), str_result.size());
   return result;
 }
 
diff --git a/src/trace_processor/sqlite/span_join_operator_table.cc b/src/trace_processor/sqlite/span_join_operator_table.cc
index 13fea5e..37a41ba 100644
--- a/src/trace_processor/sqlite/span_join_operator_table.cc
+++ b/src/trace_processor/sqlite/span_join_operator_table.cc
@@ -29,6 +29,7 @@
 #include "perfetto/ext/base/string_view.h"
 #include "src/trace_processor/sqlite/sqlite_utils.h"
 #include "src/trace_processor/tp_metatrace.h"
+#include "src/trace_processor/util/status_macros.h"
 
 namespace perfetto {
 namespace trace_processor {
@@ -144,10 +145,6 @@
     partitioning_ = t1_desc.IsPartitioned()
                         ? PartitioningType::kSamePartitioning
                         : PartitioningType::kNoPartitioning;
-    if (partitioning_ == PartitioningType::kNoPartitioning && IsOuterJoin()) {
-      return util::ErrStatus(
-          "SPAN_JOIN: Outer join not supported for no partition tables");
-    }
   } else if (t1_desc.IsPartitioned() && t2_desc.IsPartitioned()) {
     return util::ErrStatus(
         "SPAN_JOIN: mismatching partitions between the two tables; "
@@ -155,10 +152,6 @@
         t1_desc.partition_col.c_str(), t1_desc.name.c_str(),
         t2_desc.partition_col.c_str(), t2_desc.name.c_str());
   } else {
-    if (IsOuterJoin()) {
-      return util::ErrStatus(
-          "SPAN_JOIN: Outer join not supported for mixed partitioned tables");
-    }
     partitioning_ = PartitioningType::kMixedPartitioning;
   }
 
@@ -421,16 +414,24 @@
                                           FilterHistory) {
   PERFETTO_TP_TRACE("SPAN_JOIN_XFILTER");
 
-  util::Status status =
-      t1_.Initialize(qc, argv, Query::InitialEofBehavior::kTreatAsEof);
+  bool t1_partitioned_mixed =
+      t1_.definition()->IsPartitioned() &&
+      table_->partitioning_ == PartitioningType::kMixedPartitioning;
+  auto t1_eof = table_->IsOuterJoin() && !t1_partitioned_mixed
+                    ? Query::InitialEofBehavior::kTreatAsMissingPartitionShadow
+                    : Query::InitialEofBehavior::kTreatAsEof;
+  util::Status status = t1_.Initialize(qc, argv, t1_eof);
   if (!status.ok())
     return SQLITE_ERROR;
 
-  status = t2_.Initialize(
-      qc, argv,
-      table_->IsLeftJoin()
+  bool t2_partitioned_mixed =
+      t2_.definition()->IsPartitioned() &&
+      table_->partitioning_ == PartitioningType::kMixedPartitioning;
+  auto t2_eof =
+      (table_->IsLeftJoin() || table_->IsOuterJoin()) && !t2_partitioned_mixed
           ? Query::InitialEofBehavior::kTreatAsMissingPartitionShadow
-          : Query::InitialEofBehavior::kTreatAsEof);
+          : Query::InitialEofBehavior::kTreatAsEof;
+  status = t2_.Initialize(qc, argv, t2_eof);
   if (!status.ok())
     return SQLITE_ERROR;
 
@@ -636,9 +637,7 @@
 }
 
 util::Status SpanJoinOperatorTable::Query::Next() {
-  util::Status status = NextSliceState();
-  if (!status.ok())
-    return status;
+  RETURN_IF_ERROR(NextSliceState());
   return FindNextValidSlice();
 }
 
@@ -671,9 +670,7 @@
   // This has proved to be a lot cleaner to implement than trying to choose
   // when to emit and not emit shadows directly.
   while (!IsEof() && !IsValidSlice()) {
-    util::Status status = NextSliceState();
-    if (!status.ok())
-      return status;
+    RETURN_IF_ERROR(NextSliceState());
   }
   return util::OkStatus();
 }
@@ -682,9 +679,7 @@
   switch (state_) {
     case State::kReal: {
       // Forward the cursor to figure out where the next slice should be.
-      util::Status status = CursorNext();
-      if (!status.ok())
-        return status;
+      RETURN_IF_ERROR(CursorNext());
 
       // Depending on the next slice, we can do two things here:
       // 1. If the next slice is on the same partition, we can just emit a
@@ -763,9 +758,7 @@
   if (res != SQLITE_OK)
     return util::ErrStatus("%s", sqlite3_errmsg(db_));
 
-  util::Status status = CursorNext();
-  if (!status.ok())
-    return status;
+  RETURN_IF_ERROR(CursorNext());
 
   // Setup the first slice as a missing partition shadow from the lowest
   // partition until the first slice partition. We will handle finding the real
@@ -798,13 +791,17 @@
       res = sqlite3_step(stmt);
       row_type = sqlite3_column_type(stmt, partition_idx);
     } while (res == SQLITE_ROW && row_type == SQLITE_NULL);
+
+    if (res == SQLITE_ROW && row_type != SQLITE_INTEGER) {
+      return util::ErrStatus("SPAN_JOIN: partition is not an int");
+    }
   } else {
     res = sqlite3_step(stmt);
   }
   cursor_eof_ = res != SQLITE_ROW;
   return res == SQLITE_ROW || res == SQLITE_DONE
              ? util::OkStatus()
-             : util::ErrStatus("%s", sqlite3_errmsg(db_));
+             : util::ErrStatus("SPAN_JOIN: %s", sqlite3_errmsg(db_));
 }
 
 std::string SpanJoinOperatorTable::Query::CreateSqlQuery(
diff --git a/src/trace_processor/sqlite/span_join_operator_table.h b/src/trace_processor/sqlite/span_join_operator_table.h
index ebfb867..6fba96d 100644
--- a/src/trace_processor/sqlite/span_join_operator_table.h
+++ b/src/trace_processor/sqlite/span_join_operator_table.h
@@ -39,7 +39,7 @@
 // Implements the SPAN JOIN operation between two tables on a particular column.
 //
 // Span:
-// A span is a row with a timestamp and a duration. It can is used to model
+// A span is a row with a timestamp and a duration. It is used to model
 // operations which run for a particular *span* of time.
 //
 // We draw spans like so (time on the x-axis):
diff --git a/src/trace_processor/sqlite/sqlite_raw_table.cc b/src/trace_processor/sqlite/sqlite_raw_table.cc
index 9594887..ec4c263 100644
--- a/src/trace_processor/sqlite/sqlite_raw_table.cc
+++ b/src/trace_processor/sqlite/sqlite_raw_table.cc
@@ -515,6 +515,9 @@
       writer_->AppendString(str.c_str(), str.size());
       break;
     }
+    case Variadic::kNull:
+      writer_->AppendLiteral("[NULL]");
+      break;
   }
 }
 
@@ -555,10 +558,8 @@
 
   auto str = serializer_.SerializeToString(row);
   if (str.get() == nullptr) {
-    char buffer[1024];
-    snprintf(buffer, base::ArraySize(buffer),
-             "to_ftrace: Cannot serialize row with id %u", row);
-    sqlite3_result_error(ctx, buffer, -1);
+    base::StackString<128> err("to_ftrace: Cannot serialize row id %u", row);
+    sqlite3_result_error(ctx, err.c_str(), -1);
     return;
   }
 
diff --git a/src/trace_processor/sqlite/sqlite_utils.h b/src/trace_processor/sqlite/sqlite_utils.h
index 889ad95..750fb5c 100644
--- a/src/trace_processor/sqlite/sqlite_utils.h
+++ b/src/trace_processor/sqlite/sqlite_utils.h
@@ -370,11 +370,12 @@
 
   // Support names which are table valued functions with arguments.
   std::string table_name = raw_table_name.substr(0, raw_table_name.find('('));
-  int n = snprintf(sql, sizeof(sql), kRawSql, table_name.c_str());
-  PERFETTO_DCHECK(n >= 0 || static_cast<size_t>(n) < sizeof(sql));
+  size_t n = base::SprintfTrunc(sql, sizeof(sql), kRawSql, table_name.c_str());
+  PERFETTO_DCHECK(n > 0);
 
   sqlite3_stmt* raw_stmt = nullptr;
-  int err = sqlite3_prepare_v2(db, sql, n, &raw_stmt, nullptr);
+  int err =
+      sqlite3_prepare_v2(db, sql, static_cast<int>(n), &raw_stmt, nullptr);
   if (err != SQLITE_OK) {
     return util::ErrStatus("Preparing database failed");
   }
diff --git a/src/trace_processor/storage/trace_storage.h b/src/trace_processor/storage/trace_storage.h
index e2af2ce..f092c74 100644
--- a/src/trace_processor/storage/trace_storage.h
+++ b/src/trace_processor/storage/trace_storage.h
@@ -688,6 +688,8 @@
         v.json_value = opt_value ? *opt_value : kNullStringId;
         break;
       }
+      case Variadic::Type::kNull:
+        break;
     }
     return v;
   }
diff --git a/src/trace_processor/tables/profiler_tables.h b/src/trace_processor/tables/profiler_tables.h
index a42a6d9..74cc789 100644
--- a/src/trace_processor/tables/profiler_tables.h
+++ b/src/trace_processor/tables/profiler_tables.h
@@ -290,7 +290,8 @@
   C(int64_t, cumulative_alloc_size)                                       \
   C(base::Optional<ExperimentalFlamegraphNodesTable::Id>, parent_id)      \
   C(base::Optional<StringPool::Id>, source_file)                          \
-  C(base::Optional<uint32_t>, line_number)
+  C(base::Optional<uint32_t>, line_number)                                \
+  C(base::Optional<StringPool::Id>, upid_group)
 
 PERFETTO_TP_TABLE(PERFETTO_TP_EXPERIMENTAL_FLAMEGRAPH_NODES);
 
diff --git a/src/trace_processor/tables/slice_tables.h b/src/trace_processor/tables/slice_tables.h
index 8605ea9..9b00bb9 100644
--- a/src/trace_processor/tables/slice_tables.h
+++ b/src/trace_processor/tables/slice_tables.h
@@ -26,6 +26,8 @@
 
 // @name slice
 // @tablegroup Events
+// @param ts timestamp of the start of the slice (in nanoseconds)
+// @param dur duration of the slice (in nanoseconds)
 // @param arg_set_id {@joinable args.arg_set_id}
 #define PERFETTO_TP_SLICE_TABLE_DEF(NAME, PARENT, C) \
   NAME(SliceTable, "internal_slice")                 \
@@ -44,6 +46,7 @@
 PERFETTO_TP_TABLE(PERFETTO_TP_SLICE_TABLE_DEF);
 
 // @tablegroup Events
+// @param ts timestamp of the start of the slice (in nanoseconds)
 // @param arg_set_id {@joinable args.arg_set_id}
 #define PERFETTO_TP_INSTANT_TABLE_DEF(NAME, PARENT, C) \
   NAME(InstantTable, "instant")                        \
@@ -57,6 +60,8 @@
 PERFETTO_TP_TABLE(PERFETTO_TP_INSTANT_TABLE_DEF);
 
 // @tablegroup Events
+// @param ts timestamp of the start of the slice (in nanoseconds)
+// @param dur duration of the slice (in nanoseconds)
 // @param utid {@joinable thread.utid}
 #define PERFETTO_TP_SCHED_SLICE_TABLE_DEF(NAME, PARENT, C) \
   NAME(SchedSliceTable, "sched_slice")                     \
diff --git a/src/trace_processor/trace_processor_impl.cc b/src/trace_processor/trace_processor_impl.cc
index ef98469..6d02dfa 100644
--- a/src/trace_processor/trace_processor_impl.cc
+++ b/src/trace_processor/trace_processor_impl.cc
@@ -599,6 +599,9 @@
     case Variadic::kReal:
       sqlite3_result_double(ctx, opt_value->real_value);
       break;
+    case Variadic::kNull:
+      sqlite3_result_null(ctx);
+      break;
   }
 }
 
diff --git a/src/trace_processor/trace_processor_shell.cc b/src/trace_processor/trace_processor_shell.cc
index 8caa693..5454b7e 100644
--- a/src/trace_processor/trace_processor_shell.cc
+++ b/src/trace_processor/trace_processor_shell.cc
@@ -651,10 +651,10 @@
                            base::TimeNanos t_load,
                            base::TimeNanos t_run) {
   char buf[128];
-  int count = snprintf(buf, sizeof(buf), "%" PRId64 ",%" PRId64,
-                       static_cast<int64_t>(t_load.count()),
-                       static_cast<int64_t>(t_run.count()));
-  if (count < 0) {
+  size_t count = base::SprintfTrunc(buf, sizeof(buf), "%" PRId64 ",%" PRId64,
+                                    static_cast<int64_t>(t_load.count()),
+                                    static_cast<int64_t>(t_run.count()));
+  if (count == 0) {
     return util::ErrStatus("Failed to write perf data");
   }
 
@@ -662,7 +662,7 @@
   if (!fd) {
     return util::ErrStatus("Failed to open perf file");
   }
-  base::WriteAll(fd.get(), buf, static_cast<size_t>(count));
+  base::WriteAll(fd.get(), buf, count);
   return util::OkStatus();
 }
 
diff --git a/src/trace_processor/types/variadic.h b/src/trace_processor/types/variadic.h
index 7a929e5..8cf7cac 100644
--- a/src/trace_processor/types/variadic.h
+++ b/src/trace_processor/types/variadic.h
@@ -32,11 +32,12 @@
     kPointer,
     kBool,
     kJson,
-    kMaxType = kJson,
+    kNull,
+    kMaxType = kNull,
   };
 
   static constexpr const char* const kTypeNames[] = {
-      "int", "uint", "string", "real", "pointer", "bool", "json"};
+      "int", "uint", "string", "real", "pointer", "bool", "json", "null"};
 
   static Variadic Integer(int64_t int_value) {
     Variadic variadic;
@@ -95,6 +96,12 @@
     return variadic;
   }
 
+  static Variadic Null() {
+    Variadic variadic;
+    variadic.type = Type::kNull;
+    return variadic;
+  }
+
   // Used in tests.
   bool operator==(const Variadic& other) const {
     if (type == other.type) {
@@ -113,6 +120,8 @@
           return bool_value == other.bool_value;
         case kJson:
           return json_value == other.json_value;
+        case kNull:
+          return true;
       }
     }
     return false;
diff --git a/src/trace_processor/util/debug_annotation_parser_unittest.cc b/src/trace_processor/util/debug_annotation_parser_unittest.cc
index e3f3380..b23518a 100644
--- a/src/trace_processor/util/debug_annotation_parser_unittest.cc
+++ b/src/trace_processor/util/debug_annotation_parser_unittest.cc
@@ -99,6 +99,12 @@
     return true;
   }
 
+  void AddNull(const Key& key) override {
+    std::stringstream ss;
+    ss << key.flat_key << " " << key.key << " [NULL]";
+    args_.push_back(ss.str());
+  }
+
   size_t GetArrayEntryIndex(const std::string& array_key) final {
     return array_indices_[array_key];
   }
diff --git a/src/trace_processor/util/proto_to_args_parser.cc b/src/trace_processor/util/proto_to_args_parser.cc
index 96e066d..a17b5dc 100644
--- a/src/trace_processor/util/proto_to_args_parser.cc
+++ b/src/trace_processor/util/proto_to_args_parser.cc
@@ -104,9 +104,12 @@
 
   std::unordered_map<size_t, int> repeated_field_index;
 
+  bool empty_message = true;
+
   protozero::ProtoDecoder decoder(cb);
   for (protozero::Field f = decoder.ReadField(); f.valid();
        f = decoder.ReadField()) {
+    empty_message = false;
     auto field = descriptor.FindFieldByTag(f.id());
     if (!field) {
       // Unknown field, possibly an unknown extension.
@@ -131,6 +134,10 @@
     }
   }
 
+  if (empty_message) {
+    delegate.AddNull(key_prefix_);
+  }
+
   return base::OkStatus();
 }
 
diff --git a/src/trace_processor/util/proto_to_args_parser.h b/src/trace_processor/util/proto_to_args_parser.h
index 629ab1f..2ec4f1c 100644
--- a/src/trace_processor/util/proto_to_args_parser.h
+++ b/src/trace_processor/util/proto_to_args_parser.h
@@ -86,6 +86,7 @@
     // Returns whether an entry was added or not.
     virtual bool AddJson(const Key& key,
                          const protozero::ConstChars& value) = 0;
+    virtual void AddNull(const Key& key) = 0;
 
     virtual size_t GetArrayEntryIndex(const std::string& array_key) = 0;
     virtual size_t IncrementArrayEntryIndex(const std::string& array_key) = 0;
diff --git a/src/trace_processor/util/proto_to_args_parser_unittest.cc b/src/trace_processor/util/proto_to_args_parser_unittest.cc
index 8171742..47d2d27 100644
--- a/src/trace_processor/util/proto_to_args_parser_unittest.cc
+++ b/src/trace_processor/util/proto_to_args_parser_unittest.cc
@@ -99,6 +99,12 @@
     return true;
   }
 
+  void AddNull(const Key& key) override {
+    std::stringstream ss;
+    ss << key.flat_key << " " << key.key << " [NULL]";
+    args_.push_back(ss.str());
+  }
+
   size_t GetArrayEntryIndex(const std::string&) final { return 0; }
 
   size_t IncrementArrayEntryIndex(const std::string&) final { return 0; }
@@ -439,6 +445,29 @@
   EXPECT_THAT(args(), testing::ElementsAre("arg arg override-for-field"));
 }
 
+TEST_F(ProtoToArgsParserTest, EmptyMessage) {
+  using namespace protozero::test::protos::pbzero;
+  protozero::HeapBuffered<NestedA> msg{kChunkSize, kChunkSize};
+  msg->set_super_nested();
+
+  auto binary_proto = msg.SerializeAsArray();
+
+  DescriptorPool pool;
+  auto status = pool.AddFromFileDescriptorSet(kTestMessagesDescriptor.data(),
+                                              kTestMessagesDescriptor.size());
+  ASSERT_TRUE(status.ok()) << "Failed to parse kTestMessagesDescriptor: "
+                           << status.message();
+
+  ProtoToArgsParser parser(pool);
+  status = parser.ParseMessage(
+      protozero::ConstBytes{binary_proto.data(), binary_proto.size()},
+      ".protozero.test.protos.NestedA", nullptr, *this);
+  EXPECT_TRUE(status.ok())
+      << "InternProtoFieldsIntoArgsTable failed with error: "
+      << status.message();
+  EXPECT_THAT(args(), testing::ElementsAre("super_nested super_nested [NULL]"));
+}
+
 }  // namespace
 }  // namespace util
 }  // namespace trace_processor
diff --git a/src/trace_processor/util/proto_to_json.cc b/src/trace_processor/util/proto_to_json.cc
index ea57dd6..700357f 100644
--- a/src/trace_processor/util/proto_to_json.cc
+++ b/src/trace_processor/util/proto_to_json.cc
@@ -53,9 +53,8 @@
 
           // Print |c| as a hex character. We reserve 3 bytes of space: 2 for
           // the hex code and one for the null terminator.
-          char buf[3];
-          snprintf(buf, sizeof(buf), "%02X", c);
-          ret += buf;
+          base::StackString<3> buf("%02X", c);
+          ret += buf.c_str();
         } else {
           // Everything else can be passed through directly.
           ret += c;
diff --git a/src/traced/probes/ftrace/cpu_reader.cc b/src/traced/probes/ftrace/cpu_reader.cc
index c5f9168..21139c6 100644
--- a/src/traced/probes/ftrace/cpu_reader.cc
+++ b/src/traced/probes/ftrace/cpu_reader.cc
@@ -25,6 +25,7 @@
 
 #include "perfetto/base/build_config.h"
 #include "perfetto/base/logging.h"
+#include "perfetto/ext/base/crash_keys.h"
 #include "perfetto/ext/base/metatrace.h"
 #include "perfetto/ext/base/optional.h"
 #include "perfetto/ext/base/utils.h"
@@ -61,6 +62,8 @@
 constexpr uint32_t kTypeTimeExtend = 30;
 constexpr uint32_t kTypeTimeStamp = 31;
 
+base::CrashKey g_crash_key_cpu("ftrace_cpu");
+
 struct EventHeader {
   uint32_t type_or_length : 5;
   uint32_t time_delta : 27;
@@ -160,6 +163,7 @@
     size_t max_pages,
     const std::set<FtraceDataSource*>& started_data_sources) {
   PERFETTO_DCHECK(max_pages > 0 && parsing_buf_size_pages > 0);
+  auto scoped_key = g_crash_key_cpu.SetScoped(static_cast<int>(cpu_));
   metatrace::ScopedEvent evt(metatrace::TAG_FTRACE,
                              metatrace::FTRACE_CPU_READ_CYCLE);
 
diff --git a/src/traced/probes/ftrace/cpu_reader_benchmark.cc b/src/traced/probes/ftrace/cpu_reader_benchmark.cc
index 6a647a9..f6f7038 100644
--- a/src/traced/probes/ftrace/cpu_reader_benchmark.cc
+++ b/src/traced/probes/ftrace/cpu_reader_benchmark.cc
@@ -328,7 +328,8 @@
   while (state.KeepRunning()) {
     writer.Reset(&stream);
 
-    CompactSchedBuffer compact_buffer;
+    std::unique_ptr<CompactSchedBuffer> compact_buffer(
+        new CompactSchedBuffer());
     const uint8_t* parse_pos = page.get();
     perfetto::base::Optional<CpuReader::PageHeader> page_header =
         CpuReader::ParsePageHeader(&parse_pos, table->page_header_size_len());
@@ -337,7 +338,7 @@
       return;
 
     CpuReader::ParsePagePayload(parse_pos, &page_header.value(), table,
-                                &ds_config, &compact_buffer, &writer,
+                                &ds_config, compact_buffer.get(), &writer,
                                 &metadata);
 
     metadata.Clear();
diff --git a/src/traced/probes/ftrace/cpu_reader_unittest.cc b/src/traced/probes/ftrace/cpu_reader_unittest.cc
index 2669d4e..ba5cc87 100644
--- a/src/traced/probes/ftrace/cpu_reader_unittest.cc
+++ b/src/traced/probes/ftrace/cpu_reader_unittest.cc
@@ -386,7 +386,7 @@
       table->EventToFtraceId(GroupAndName("ftrace", "print")));
 
   FtraceMetadata metadata{};
-  CompactSchedBuffer compact_buffer;
+  std::unique_ptr<CompactSchedBuffer> compact_buffer(new CompactSchedBuffer());
   const uint8_t* parse_pos = page.get();
   base::Optional<CpuReader::PageHeader> page_header =
       CpuReader::ParsePageHeader(&parse_pos, table->page_header_size_len());
@@ -399,7 +399,7 @@
   EXPECT_TRUE(parse_pos + page_header->size < page_end);
 
   size_t evt_bytes = CpuReader::ParsePagePayload(
-      parse_pos, &page_header.value(), table, &ds_config, &compact_buffer,
+      parse_pos, &page_header.value(), table, &ds_config, compact_buffer.get(),
       bundle_provider.writer(), &metadata);
 
   EXPECT_EQ(evt_bytes, 44ul);
@@ -514,7 +514,8 @@
       table->EventToFtraceId(GroupAndName("ftrace", "print")));
 
   FtraceMetadata metadata{};
-  CompactSchedBuffer compact_buffer;
+  std::unique_ptr<CompactSchedBuffer> compact_buffer(new CompactSchedBuffer());
+
   const uint8_t* parse_pos = page.get();
   base::Optional<CpuReader::PageHeader> page_header =
       CpuReader::ParsePageHeader(&parse_pos, table->page_header_size_len());
@@ -526,7 +527,7 @@
   EXPECT_TRUE(parse_pos + page_header->size < page_end);
 
   CpuReader::ParsePagePayload(parse_pos, &page_header.value(), table,
-                              &ds_config, &compact_buffer,
+                              &ds_config, compact_buffer.get(),
                               bundle_provider.writer(), &metadata);
 
   auto bundle = bundle_provider.ParseProto();
@@ -563,7 +564,7 @@
       table->EventToFtraceId(GroupAndName("ftrace", "print")));
 
   FtraceMetadata metadata{};
-  CompactSchedBuffer compact_buffer;
+  std::unique_ptr<CompactSchedBuffer> compact_buffer(new CompactSchedBuffer());
   const uint8_t* parse_pos = page.get();
   base::Optional<CpuReader::PageHeader> page_header =
       CpuReader::ParsePageHeader(&parse_pos, table->page_header_size_len());
@@ -575,7 +576,7 @@
   EXPECT_TRUE(parse_pos + page_header->size < page_end);
 
   size_t evt_bytes = CpuReader::ParsePagePayload(
-      parse_pos, &page_header.value(), table, &ds_config, &compact_buffer,
+      parse_pos, &page_header.value(), table, &ds_config, compact_buffer.get(),
       bundle_provider.writer(), &metadata);
 
   ASSERT_EQ(0u, evt_bytes);
@@ -601,7 +602,7 @@
   FtraceDataSourceConfig ds_config = EmptyConfig();
 
   FtraceMetadata metadata{};
-  CompactSchedBuffer compact_buffer;
+  std::unique_ptr<CompactSchedBuffer> compact_buffer(new CompactSchedBuffer());
   const uint8_t* parse_pos = page.get();
   base::Optional<CpuReader::PageHeader> page_header =
       CpuReader::ParsePageHeader(&parse_pos, table->page_header_size_len());
@@ -610,7 +611,7 @@
   EXPECT_FALSE(page_header->lost_events);
 
   size_t evt_bytes = CpuReader::ParsePagePayload(
-      parse_pos, &page_header.value(), table, &ds_config, &compact_buffer,
+      parse_pos, &page_header.value(), table, &ds_config, compact_buffer.get(),
       bundle_provider.writer(), &metadata);
 
   EXPECT_LT(0u, evt_bytes);
@@ -666,7 +667,7 @@
       table->EventToFtraceId(GroupAndName("ftrace", "print")));
 
   FtraceMetadata metadata{};
-  CompactSchedBuffer compact_buffer;
+  std::unique_ptr<CompactSchedBuffer> compact_buffer(new CompactSchedBuffer());
   const uint8_t* parse_pos = page.get();
   base::Optional<CpuReader::PageHeader> page_header =
       CpuReader::ParsePageHeader(&parse_pos, table->page_header_size_len());
@@ -678,7 +679,7 @@
   EXPECT_TRUE(parse_pos + page_header->size < page_end);
 
   size_t evt_bytes = CpuReader::ParsePagePayload(
-      parse_pos, &page_header.value(), table, &ds_config, &compact_buffer,
+      parse_pos, &page_header.value(), table, &ds_config, compact_buffer.get(),
       bundle_provider.writer(), &metadata);
 
   EXPECT_LT(0u, evt_bytes);
@@ -775,7 +776,7 @@
       table->EventToFtraceId(GroupAndName("sched", "sched_switch")));
 
   FtraceMetadata metadata{};
-  CompactSchedBuffer compact_buffer;
+  std::unique_ptr<CompactSchedBuffer> compact_buffer(new CompactSchedBuffer());
   const uint8_t* parse_pos = page.get();
   base::Optional<CpuReader::PageHeader> page_header =
       CpuReader::ParsePageHeader(&parse_pos, table->page_header_size_len());
@@ -787,7 +788,7 @@
   EXPECT_TRUE(parse_pos + page_header->size < page_end);
 
   size_t evt_bytes = CpuReader::ParsePagePayload(
-      parse_pos, &page_header.value(), table, &ds_config, &compact_buffer,
+      parse_pos, &page_header.value(), table, &ds_config, compact_buffer.get(),
       bundle_provider.writer(), &metadata);
 
   EXPECT_LT(0u, evt_bytes);
@@ -825,7 +826,7 @@
       table->EventToFtraceId(GroupAndName("sched", "sched_switch")));
 
   FtraceMetadata metadata{};
-  CompactSchedBuffer compact_buffer;
+  std::unique_ptr<CompactSchedBuffer> compact_buffer(new CompactSchedBuffer());
   const uint8_t* parse_pos = page.get();
   base::Optional<CpuReader::PageHeader> page_header =
       CpuReader::ParsePageHeader(&parse_pos, table->page_header_size_len());
@@ -837,7 +838,7 @@
   EXPECT_TRUE(parse_pos + page_header->size < page_end);
 
   size_t evt_bytes = CpuReader::ParsePagePayload(
-      parse_pos, &page_header.value(), table, &ds_config, &compact_buffer,
+      parse_pos, &page_header.value(), table, &ds_config, compact_buffer.get(),
       bundle_provider.writer(), &metadata);
 
   EXPECT_LT(0u, evt_bytes);
@@ -850,11 +851,11 @@
   bundle_provider.ResetWriter();
 
   // Instead, sched switch fields were buffered:
-  EXPECT_LT(0u, compact_buffer.sched_switch().size());
-  EXPECT_LT(0u, compact_buffer.interner().interned_comms_size());
+  EXPECT_LT(0u, compact_buffer->sched_switch().size());
+  EXPECT_LT(0u, compact_buffer->interner().interned_comms_size());
 
   // Write the buffer out & check the serialized format:
-  compact_buffer.WriteAndReset(bundle_provider.writer());
+  compact_buffer->WriteAndReset(bundle_provider.writer());
   bundle_provider.writer()->Finalize();
   bundle = bundle_provider.ParseProto();
   ASSERT_TRUE(bundle);
@@ -1167,9 +1168,10 @@
 
   // Prepare a buffer with 8 contiguous pages, with the above contents.
   static constexpr size_t kTestPages = 8;
-  uint8_t buf[base::kPageSize * kTestPages] = {};
+
+  std::unique_ptr<uint8_t[]> buf(new uint8_t[base::kPageSize * kTestPages]());
   for (size_t i = 0; i < kTestPages; i++) {
-    void* dest = buf + (i * base::kPageSize);
+    void* dest = buf.get() + (i * base::kPageSize);
     memcpy(dest, static_cast<const void*>(test_page_order[i]), base::kPageSize);
   }
 
@@ -1182,8 +1184,8 @@
 
   TraceWriterForTesting trace_writer;
   CpuReader::ProcessPagesForDataSource(
-      &trace_writer, &metadata, /*cpu=*/1, &ds_config, buf, kTestPages, table,
-      /*symbolizer=*/nullptr, protos::pbzero::FTRACE_CLOCK_UNSPECIFIED);
+      &trace_writer, &metadata, /*cpu=*/1, &ds_config, buf.get(), kTestPages,
+      table, /*symbolizer=*/nullptr, protos::pbzero::FTRACE_CLOCK_UNSPECIFIED);
 
   // Each packet should contain the parsed contents of a contiguous run of pages
   // without data loss.
@@ -1395,7 +1397,7 @@
       table->EventToFtraceId(GroupAndName("sched", "sched_switch")));
 
   FtraceMetadata metadata{};
-  CompactSchedBuffer compact_buffer;
+  std::unique_ptr<CompactSchedBuffer> compact_buffer(new CompactSchedBuffer());
   const uint8_t* parse_pos = page.get();
   base::Optional<CpuReader::PageHeader> page_header =
       CpuReader::ParsePageHeader(&parse_pos, table->page_header_size_len());
@@ -1407,7 +1409,7 @@
   EXPECT_TRUE(parse_pos + page_header->size < page_end);
 
   size_t evt_bytes = CpuReader::ParsePagePayload(
-      parse_pos, &page_header.value(), table, &ds_config, &compact_buffer,
+      parse_pos, &page_header.value(), table, &ds_config, compact_buffer.get(),
       bundle_provider.writer(), &metadata);
 
   ASSERT_LT(0u, evt_bytes);
@@ -1884,7 +1886,7 @@
       table->EventToFtraceId(GroupAndName("sched", "sched_switch")));
 
   FtraceMetadata metadata{};
-  CompactSchedBuffer compact_buffer;
+  std::unique_ptr<CompactSchedBuffer> compact_buffer(new CompactSchedBuffer());
   const uint8_t* parse_pos = page.get();
   base::Optional<CpuReader::PageHeader> page_header =
       CpuReader::ParsePageHeader(&parse_pos, table->page_header_size_len());
@@ -1896,7 +1898,7 @@
   EXPECT_TRUE(parse_pos + page_header->size < page_end);
 
   size_t evt_bytes = CpuReader::ParsePagePayload(
-      parse_pos, &page_header.value(), table, &ds_config, &compact_buffer,
+      parse_pos, &page_header.value(), table, &ds_config, compact_buffer.get(),
       bundle_provider.writer(), &metadata);
 
   EXPECT_LT(0u, evt_bytes);
@@ -1973,14 +1975,14 @@
       table->EventToFtraceId(GroupAndName("power", "suspend_resume")));
 
   FtraceMetadata metadata{};
-  CompactSchedBuffer compact_buffer;
+  std::unique_ptr<CompactSchedBuffer> compact_buffer(new CompactSchedBuffer());
   const uint8_t* parse_pos = page.get();
   base::Optional<CpuReader::PageHeader> page_header =
       CpuReader::ParsePageHeader(&parse_pos, table->page_header_size_len());
   ASSERT_TRUE(page_header.has_value());
 
   CpuReader::ParsePagePayload(
-      parse_pos, &page_header.value(), table, &ds_config, &compact_buffer,
+      parse_pos, &page_header.value(), table, &ds_config, compact_buffer.get(),
       bundle_provider.writer(), &metadata);
   auto bundle = bundle_provider.ParseProto();
   ASSERT_TRUE(bundle);
@@ -2417,7 +2419,7 @@
       table->EventToFtraceId(GroupAndName("sched", "sched_switch")));
 
   FtraceMetadata metadata{};
-  CompactSchedBuffer compact_buffer;
+  std::unique_ptr<CompactSchedBuffer> compact_buffer(new CompactSchedBuffer());
   const uint8_t* parse_pos = page.get();
   base::Optional<CpuReader::PageHeader> page_header =
       CpuReader::ParsePageHeader(&parse_pos, table->page_header_size_len());
@@ -2429,7 +2431,7 @@
   EXPECT_TRUE(parse_pos + page_header->size < page_end);
 
   size_t evt_bytes = CpuReader::ParsePagePayload(
-      parse_pos, &page_header.value(), table, &ds_config, &compact_buffer,
+      parse_pos, &page_header.value(), table, &ds_config, compact_buffer.get(),
       bundle_provider.writer(), &metadata);
 
   EXPECT_LT(0u, evt_bytes);
diff --git a/src/traced/probes/ftrace/event_info.cc b/src/traced/probes/ftrace/event_info.cc
index 9422d52..9d6ac9d 100644
--- a/src/traced/probes/ftrace/event_info.cc
+++ b/src/traced/probes/ftrace/event_info.cc
@@ -6218,6 +6218,30 @@
        338,
        kUnsetSize},
       {"scm_call_end", "scm", {}, kUnsetFtraceId, 339, kUnsetSize},
+      {"mm_vmscan_direct_reclaim_begin",
+       "vmscan",
+       {
+           {kUnsetOffset, kUnsetSize, FtraceFieldType::kInvalidFtraceFieldType,
+            "order", 1, ProtoSchemaType::kInt32,
+            TranslationStrategy::kInvalidTranslationStrategy},
+           {kUnsetOffset, kUnsetSize, FtraceFieldType::kInvalidFtraceFieldType,
+            "may_writepage", 2, ProtoSchemaType::kInt32,
+            TranslationStrategy::kInvalidTranslationStrategy},
+           {kUnsetOffset, kUnsetSize, FtraceFieldType::kInvalidFtraceFieldType,
+            "gfp_flags", 3, ProtoSchemaType::kInt32,
+            TranslationStrategy::kInvalidTranslationStrategy},
+       },
+       kUnsetFtraceId,
+       338,
+       kUnsetSize},
+      {"mm_vmscan_direct_reclaim_end",
+       "vmscan",
+       {{kUnsetOffset, kUnsetSize, FtraceFieldType::kInvalidFtraceFieldType,
+         "nr_reclaimed", 1, ProtoSchemaType::kUint64,
+         TranslationStrategy::kInvalidTranslationStrategy}},
+       kUnsetFtraceId,
+       339,
+       kUnsetSize},
       {"tracing_mark_write",
        "sde",
        {
diff --git a/src/traced/probes/ftrace/ftrace_config_muxer.cc b/src/traced/probes/ftrace/ftrace_config_muxer.cc
index 75c4466..a19564f 100644
--- a/src/traced/probes/ftrace/ftrace_config_muxer.cc
+++ b/src/traced/probes/ftrace/ftrace_config_muxer.cc
@@ -24,6 +24,7 @@
 #include <algorithm>
 #include <iterator>
 
+#include "perfetto/base/compiler.h"
 #include "perfetto/ext/base/utils.h"
 #include "protos/perfetto/trace/ftrace/sched.pbzero.h"
 #include "src/traced/probes/ftrace/atrace_wrapper.h"
@@ -92,6 +93,14 @@
   *out = std::move(v);
 }
 
+// This is just to reduce binary size and stack frame size of the insertions.
+// It effectively undoes STL's set::insert inlining.
+void PERFETTO_NO_INLINE InsertEvent(const char* group,
+                                    const char* name,
+                                    std::set<GroupAndName>* dst) {
+  dst->insert(GroupAndName(group, name));
+}
+
 }  // namespace
 
 std::set<GroupAndName> FtraceConfigMuxer::GetFtraceEvents(
@@ -122,7 +131,7 @@
     }
   }
   if (RequiresAtrace(request)) {
-    events.insert(GroupAndName("ftrace", "print"));
+    InsertEvent("ftrace", "print", &events);
 
     // Ideally we should keep this code in sync with:
     // platform/frameworks/native/cmds/atrace/atrace.cpp
@@ -132,53 +141,53 @@
     for (const std::string& category : request.atrace_categories()) {
       if (category == "gfx") {
         AddEventGroup(table, "mdss", &events);
-        events.insert(GroupAndName("mdss", "rotator_bw_ao_as_context"));
-        events.insert(GroupAndName("mdss", "mdp_trace_counter"));
-        events.insert(GroupAndName("mdss", "tracing_mark_write"));
-        events.insert(GroupAndName("mdss", "mdp_cmd_wait_pingpong"));
-        events.insert(GroupAndName("mdss", "mdp_cmd_kickoff"));
-        events.insert(GroupAndName("mdss", "mdp_cmd_release_bw"));
-        events.insert(GroupAndName("mdss", "mdp_cmd_readptr_done"));
-        events.insert(GroupAndName("mdss", "mdp_cmd_pingpong_done"));
-        events.insert(GroupAndName("mdss", "mdp_misr_crc"));
-        events.insert(GroupAndName("mdss", "mdp_compare_bw"));
-        events.insert(GroupAndName("mdss", "mdp_perf_update_bus"));
-        events.insert(GroupAndName("mdss", "mdp_video_underrun_done"));
-        events.insert(GroupAndName("mdss", "mdp_commit"));
-        events.insert(GroupAndName("mdss", "mdp_mixer_update"));
-        events.insert(GroupAndName("mdss", "mdp_perf_prefill_calc"));
-        events.insert(GroupAndName("mdss", "mdp_perf_set_ot"));
-        events.insert(GroupAndName("mdss", "mdp_perf_set_wm_levels"));
-        events.insert(GroupAndName("mdss", "mdp_perf_set_panic_luts"));
-        events.insert(GroupAndName("mdss", "mdp_perf_set_qos_luts"));
-        events.insert(GroupAndName("mdss", "mdp_sspp_change"));
-        events.insert(GroupAndName("mdss", "mdp_sspp_set"));
+        InsertEvent("mdss", "rotator_bw_ao_as_context", &events);
+        InsertEvent("mdss", "mdp_trace_counter", &events);
+        InsertEvent("mdss", "tracing_mark_write", &events);
+        InsertEvent("mdss", "mdp_cmd_wait_pingpong", &events);
+        InsertEvent("mdss", "mdp_cmd_kickoff", &events);
+        InsertEvent("mdss", "mdp_cmd_release_bw", &events);
+        InsertEvent("mdss", "mdp_cmd_readptr_done", &events);
+        InsertEvent("mdss", "mdp_cmd_pingpong_done", &events);
+        InsertEvent("mdss", "mdp_misr_crc", &events);
+        InsertEvent("mdss", "mdp_compare_bw", &events);
+        InsertEvent("mdss", "mdp_perf_update_bus", &events);
+        InsertEvent("mdss", "mdp_video_underrun_done", &events);
+        InsertEvent("mdss", "mdp_commit", &events);
+        InsertEvent("mdss", "mdp_mixer_update", &events);
+        InsertEvent("mdss", "mdp_perf_prefill_calc", &events);
+        InsertEvent("mdss", "mdp_perf_set_ot", &events);
+        InsertEvent("mdss", "mdp_perf_set_wm_levels", &events);
+        InsertEvent("mdss", "mdp_perf_set_panic_luts", &events);
+        InsertEvent("mdss", "mdp_perf_set_qos_luts", &events);
+        InsertEvent("mdss", "mdp_sspp_change", &events);
+        InsertEvent("mdss", "mdp_sspp_set", &events);
         AddEventGroup(table, "mali", &events);
-        events.insert(GroupAndName("mali", "tracing_mark_write"));
+        InsertEvent("mali", "tracing_mark_write", &events);
 
         AddEventGroup(table, "sde", &events);
-        events.insert(GroupAndName("sde", "tracing_mark_write"));
-        events.insert(GroupAndName("sde", "sde_perf_update_bus"));
-        events.insert(GroupAndName("sde", "sde_perf_set_qos_luts"));
-        events.insert(GroupAndName("sde", "sde_perf_set_ot"));
-        events.insert(GroupAndName("sde", "sde_perf_set_danger_luts"));
-        events.insert(GroupAndName("sde", "sde_perf_crtc_update"));
-        events.insert(GroupAndName("sde", "sde_perf_calc_crtc"));
-        events.insert(GroupAndName("sde", "sde_evtlog"));
-        events.insert(GroupAndName("sde", "sde_encoder_underrun"));
-        events.insert(GroupAndName("sde", "sde_cmd_release_bw"));
+        InsertEvent("sde", "tracing_mark_write", &events);
+        InsertEvent("sde", "sde_perf_update_bus", &events);
+        InsertEvent("sde", "sde_perf_set_qos_luts", &events);
+        InsertEvent("sde", "sde_perf_set_ot", &events);
+        InsertEvent("sde", "sde_perf_set_danger_luts", &events);
+        InsertEvent("sde", "sde_perf_crtc_update", &events);
+        InsertEvent("sde", "sde_perf_calc_crtc", &events);
+        InsertEvent("sde", "sde_evtlog", &events);
+        InsertEvent("sde", "sde_encoder_underrun", &events);
+        InsertEvent("sde", "sde_cmd_release_bw", &events);
 
         AddEventGroup(table, "dpu", &events);
-        events.insert(GroupAndName("dpu", "tracing_mark_write"));
+        InsertEvent("dpu", "tracing_mark_write", &events);
 
         AddEventGroup(table, "g2d", &events);
-        events.insert(GroupAndName("g2d", "tracing_mark_write"));
-        events.insert(GroupAndName("g2d", "g2d_perf_update_qos"));
+        InsertEvent("g2d", "tracing_mark_write", &events);
+        InsertEvent("g2d", "g2d_perf_update_qos", &events);
         continue;
       }
 
       if (category == "ion") {
-        events.insert(GroupAndName("kmem", "ion_alloc_buffer_start"));
+        InsertEvent("kmem", "ion_alloc_buffer_start", &events);
         continue;
       }
 
@@ -186,102 +195,102 @@
       // is high-volume, but mostly redundant when sched_waking is also enabled.
       // The event can still be enabled explicitly when necessary.
       if (category == "sched") {
-        events.insert(GroupAndName("sched", "sched_switch"));
-        events.insert(GroupAndName("sched", "sched_waking"));
-        events.insert(GroupAndName("sched", "sched_blocked_reason"));
-        events.insert(GroupAndName("sched", "sched_cpu_hotplug"));
-        events.insert(GroupAndName("sched", "sched_pi_setprio"));
-        events.insert(GroupAndName("sched", "sched_process_exit"));
+        InsertEvent("sched", "sched_switch", &events);
+        InsertEvent("sched", "sched_waking", &events);
+        InsertEvent("sched", "sched_blocked_reason", &events);
+        InsertEvent("sched", "sched_cpu_hotplug", &events);
+        InsertEvent("sched", "sched_pi_setprio", &events);
+        InsertEvent("sched", "sched_process_exit", &events);
         AddEventGroup(table, "cgroup", &events);
-        events.insert(GroupAndName("cgroup", "cgroup_transfer_tasks"));
-        events.insert(GroupAndName("cgroup", "cgroup_setup_root"));
-        events.insert(GroupAndName("cgroup", "cgroup_rmdir"));
-        events.insert(GroupAndName("cgroup", "cgroup_rename"));
-        events.insert(GroupAndName("cgroup", "cgroup_remount"));
-        events.insert(GroupAndName("cgroup", "cgroup_release"));
-        events.insert(GroupAndName("cgroup", "cgroup_mkdir"));
-        events.insert(GroupAndName("cgroup", "cgroup_destroy_root"));
-        events.insert(GroupAndName("cgroup", "cgroup_attach_task"));
-        events.insert(GroupAndName("oom", "oom_score_adj_update"));
-        events.insert(GroupAndName("task", "task_rename"));
-        events.insert(GroupAndName("task", "task_newtask"));
+        InsertEvent("cgroup", "cgroup_transfer_tasks", &events);
+        InsertEvent("cgroup", "cgroup_setup_root", &events);
+        InsertEvent("cgroup", "cgroup_rmdir", &events);
+        InsertEvent("cgroup", "cgroup_rename", &events);
+        InsertEvent("cgroup", "cgroup_remount", &events);
+        InsertEvent("cgroup", "cgroup_release", &events);
+        InsertEvent("cgroup", "cgroup_mkdir", &events);
+        InsertEvent("cgroup", "cgroup_destroy_root", &events);
+        InsertEvent("cgroup", "cgroup_attach_task", &events);
+        InsertEvent("oom", "oom_score_adj_update", &events);
+        InsertEvent("task", "task_rename", &events);
+        InsertEvent("task", "task_newtask", &events);
 
         AddEventGroup(table, "systrace", &events);
-        events.insert(GroupAndName("systrace", "0"));
+        InsertEvent("systrace", "0", &events);
 
         AddEventGroup(table, "scm", &events);
-        events.insert(GroupAndName("scm", "scm_call_start"));
-        events.insert(GroupAndName("scm", "scm_call_end"));
+        InsertEvent("scm", "scm_call_start", &events);
+        InsertEvent("scm", "scm_call_end", &events);
         continue;
       }
 
       if (category == "irq") {
         AddEventGroup(table, "irq", &events);
-        events.insert(GroupAndName("irq", "tasklet_hi_exit"));
-        events.insert(GroupAndName("irq", "tasklet_hi_entry"));
-        events.insert(GroupAndName("irq", "tasklet_exit"));
-        events.insert(GroupAndName("irq", "tasklet_entry"));
-        events.insert(GroupAndName("irq", "softirq_raise"));
-        events.insert(GroupAndName("irq", "softirq_exit"));
-        events.insert(GroupAndName("irq", "softirq_entry"));
-        events.insert(GroupAndName("irq", "irq_handler_exit"));
-        events.insert(GroupAndName("irq", "irq_handler_entry"));
+        InsertEvent("irq", "tasklet_hi_exit", &events);
+        InsertEvent("irq", "tasklet_hi_entry", &events);
+        InsertEvent("irq", "tasklet_exit", &events);
+        InsertEvent("irq", "tasklet_entry", &events);
+        InsertEvent("irq", "softirq_raise", &events);
+        InsertEvent("irq", "softirq_exit", &events);
+        InsertEvent("irq", "softirq_entry", &events);
+        InsertEvent("irq", "irq_handler_exit", &events);
+        InsertEvent("irq", "irq_handler_entry", &events);
         AddEventGroup(table, "ipi", &events);
-        events.insert(GroupAndName("ipi", "ipi_raise"));
-        events.insert(GroupAndName("ipi", "ipi_exit"));
-        events.insert(GroupAndName("ipi", "ipi_entry"));
+        InsertEvent("ipi", "ipi_raise", &events);
+        InsertEvent("ipi", "ipi_exit", &events);
+        InsertEvent("ipi", "ipi_entry", &events);
         continue;
       }
 
       if (category == "irqoff") {
-        events.insert(GroupAndName("preemptirq", "irq_enable"));
-        events.insert(GroupAndName("preemptirq", "irq_disable"));
+        InsertEvent("preemptirq", "irq_enable", &events);
+        InsertEvent("preemptirq", "irq_disable", &events);
         continue;
       }
 
       if (category == "preemptoff") {
-        events.insert(GroupAndName("preemptirq", "preempt_enable"));
-        events.insert(GroupAndName("preemptirq", "preempt_disable"));
+        InsertEvent("preemptirq", "preempt_enable", &events);
+        InsertEvent("preemptirq", "preempt_disable", &events);
         continue;
       }
 
       if (category == "i2c") {
         AddEventGroup(table, "i2c", &events);
-        events.insert(GroupAndName("i2c", "i2c_read"));
-        events.insert(GroupAndName("i2c", "i2c_write"));
-        events.insert(GroupAndName("i2c", "i2c_result"));
-        events.insert(GroupAndName("i2c", "i2c_reply"));
-        events.insert(GroupAndName("i2c", "smbus_read"));
-        events.insert(GroupAndName("i2c", "smbus_write"));
-        events.insert(GroupAndName("i2c", "smbus_result"));
-        events.insert(GroupAndName("i2c", "smbus_reply"));
+        InsertEvent("i2c", "i2c_read", &events);
+        InsertEvent("i2c", "i2c_write", &events);
+        InsertEvent("i2c", "i2c_result", &events);
+        InsertEvent("i2c", "i2c_reply", &events);
+        InsertEvent("i2c", "smbus_read", &events);
+        InsertEvent("i2c", "smbus_write", &events);
+        InsertEvent("i2c", "smbus_result", &events);
+        InsertEvent("i2c", "smbus_reply", &events);
         continue;
       }
 
       if (category == "freq") {
-        events.insert(GroupAndName("power", "cpu_frequency"));
-        events.insert(GroupAndName("power", "gpu_frequency"));
-        events.insert(GroupAndName("power", "clock_set_rate"));
-        events.insert(GroupAndName("power", "clock_disable"));
-        events.insert(GroupAndName("power", "clock_enable"));
-        events.insert(GroupAndName("clk", "clk_set_rate"));
-        events.insert(GroupAndName("clk", "clk_disable"));
-        events.insert(GroupAndName("clk", "clk_enable"));
-        events.insert(GroupAndName("power", "cpu_frequency_limits"));
-        events.insert(GroupAndName("power", "suspend_resume"));
-        events.insert(GroupAndName("cpuhp", "cpuhp_enter"));
-        events.insert(GroupAndName("cpuhp", "cpuhp_exit"));
-        events.insert(GroupAndName("cpuhp", "cpuhp_pause"));
+        InsertEvent("power", "cpu_frequency", &events);
+        InsertEvent("power", "gpu_frequency", &events);
+        InsertEvent("power", "clock_set_rate", &events);
+        InsertEvent("power", "clock_disable", &events);
+        InsertEvent("power", "clock_enable", &events);
+        InsertEvent("clk", "clk_set_rate", &events);
+        InsertEvent("clk", "clk_disable", &events);
+        InsertEvent("clk", "clk_enable", &events);
+        InsertEvent("power", "cpu_frequency_limits", &events);
+        InsertEvent("power", "suspend_resume", &events);
+        InsertEvent("cpuhp", "cpuhp_enter", &events);
+        InsertEvent("cpuhp", "cpuhp_exit", &events);
+        InsertEvent("cpuhp", "cpuhp_pause", &events);
         AddEventGroup(table, "msm_bus", &events);
-        events.insert(GroupAndName("msm_bus", "bus_update_request_end"));
-        events.insert(GroupAndName("msm_bus", "bus_update_request"));
-        events.insert(GroupAndName("msm_bus", "bus_rules_matches"));
-        events.insert(GroupAndName("msm_bus", "bus_max_votes"));
-        events.insert(GroupAndName("msm_bus", "bus_client_status"));
-        events.insert(GroupAndName("msm_bus", "bus_bke_params"));
-        events.insert(GroupAndName("msm_bus", "bus_bimc_config_limiter"));
-        events.insert(GroupAndName("msm_bus", "bus_avail_bw"));
-        events.insert(GroupAndName("msm_bus", "bus_agg_bw"));
+        InsertEvent("msm_bus", "bus_update_request_end", &events);
+        InsertEvent("msm_bus", "bus_update_request", &events);
+        InsertEvent("msm_bus", "bus_rules_matches", &events);
+        InsertEvent("msm_bus", "bus_max_votes", &events);
+        InsertEvent("msm_bus", "bus_client_status", &events);
+        InsertEvent("msm_bus", "bus_bke_params", &events);
+        InsertEvent("msm_bus", "bus_bimc_config_limiter", &events);
+        InsertEvent("msm_bus", "bus_avail_bw", &events);
+        InsertEvent("msm_bus", "bus_agg_bw", &events);
         continue;
       }
 
@@ -291,21 +300,21 @@
       }
 
       if (category == "idle") {
-        events.insert(GroupAndName("power", "cpu_idle"));
+        InsertEvent("power", "cpu_idle", &events);
         continue;
       }
 
       if (category == "disk") {
-        events.insert(GroupAndName("f2fs", "f2fs_sync_file_enter"));
-        events.insert(GroupAndName("f2fs", "f2fs_sync_file_exit"));
-        events.insert(GroupAndName("f2fs", "f2fs_write_begin"));
-        events.insert(GroupAndName("f2fs", "f2fs_write_end"));
-        events.insert(GroupAndName("ext4", "ext4_da_write_begin"));
-        events.insert(GroupAndName("ext4", "ext4_da_write_end"));
-        events.insert(GroupAndName("ext4", "ext4_sync_file_enter"));
-        events.insert(GroupAndName("ext4", "ext4_sync_file_exit"));
-        events.insert(GroupAndName("block", "block_rq_issue"));
-        events.insert(GroupAndName("block", "block_rq_complete"));
+        InsertEvent("f2fs", "f2fs_sync_file_enter", &events);
+        InsertEvent("f2fs", "f2fs_sync_file_exit", &events);
+        InsertEvent("f2fs", "f2fs_write_begin", &events);
+        InsertEvent("f2fs", "f2fs_write_end", &events);
+        InsertEvent("ext4", "ext4_da_write_begin", &events);
+        InsertEvent("ext4", "ext4_da_write_end", &events);
+        InsertEvent("ext4", "ext4_sync_file_enter", &events);
+        InsertEvent("ext4", "ext4_sync_file_exit", &events);
+        InsertEvent("block", "block_rq_issue", &events);
+        InsertEvent("block", "block_rq_complete", &events);
         continue;
       }
 
@@ -322,19 +331,19 @@
       if (category == "sync") {
         // linux kernel < 4.9
         AddEventGroup(table, "sync", &events);
-        events.insert(GroupAndName("sync", "sync_pt"));
-        events.insert(GroupAndName("sync", "sync_timeline"));
-        events.insert(GroupAndName("sync", "sync_wait"));
+        InsertEvent("sync", "sync_pt", &events);
+        InsertEvent("sync", "sync_timeline", &events);
+        InsertEvent("sync", "sync_wait", &events);
         // linux kernel == 4.9.x
         AddEventGroup(table, "fence", &events);
-        events.insert(GroupAndName("fence", "fence_annotate_wait_on"));
-        events.insert(GroupAndName("fence", "fence_destroy"));
-        events.insert(GroupAndName("fence", "fence_emit"));
-        events.insert(GroupAndName("fence", "fence_enable_signal"));
-        events.insert(GroupAndName("fence", "fence_init"));
-        events.insert(GroupAndName("fence", "fence_signaled"));
-        events.insert(GroupAndName("fence", "fence_wait_end"));
-        events.insert(GroupAndName("fence", "fence_wait_start"));
+        InsertEvent("fence", "fence_annotate_wait_on", &events);
+        InsertEvent("fence", "fence_destroy", &events);
+        InsertEvent("fence", "fence_emit", &events);
+        InsertEvent("fence", "fence_enable_signal", &events);
+        InsertEvent("fence", "fence_init", &events);
+        InsertEvent("fence", "fence_signaled", &events);
+        InsertEvent("fence", "fence_wait_end", &events);
+        InsertEvent("fence", "fence_wait_start", &events);
         // linux kernel > 4.9
         AddEventGroup(table, "dma_fence", &events);
         continue;
@@ -342,20 +351,20 @@
 
       if (category == "workq") {
         AddEventGroup(table, "workqueue", &events);
-        events.insert(GroupAndName("workqueue", "workqueue_queue_work"));
-        events.insert(GroupAndName("workqueue", "workqueue_execute_start"));
-        events.insert(GroupAndName("workqueue", "workqueue_execute_end"));
-        events.insert(GroupAndName("workqueue", "workqueue_activate_work"));
+        InsertEvent("workqueue", "workqueue_queue_work", &events);
+        InsertEvent("workqueue", "workqueue_execute_start", &events);
+        InsertEvent("workqueue", "workqueue_execute_end", &events);
+        InsertEvent("workqueue", "workqueue_activate_work", &events);
         continue;
       }
 
       if (category == "memreclaim") {
-        events.insert(GroupAndName("vmscan", "mm_vmscan_direct_reclaim_begin"));
-        events.insert(GroupAndName("vmscan", "mm_vmscan_direct_reclaim_end"));
-        events.insert(GroupAndName("vmscan", "mm_vmscan_kswapd_wake"));
-        events.insert(GroupAndName("vmscan", "mm_vmscan_kswapd_sleep"));
+        InsertEvent("vmscan", "mm_vmscan_direct_reclaim_begin", &events);
+        InsertEvent("vmscan", "mm_vmscan_direct_reclaim_end", &events);
+        InsertEvent("vmscan", "mm_vmscan_kswapd_wake", &events);
+        InsertEvent("vmscan", "mm_vmscan_kswapd_sleep", &events);
         AddEventGroup(table, "lowmemorykiller", &events);
-        events.insert(GroupAndName("lowmemorykiller", "lowmemory_kill"));
+        InsertEvent("lowmemorykiller", "lowmemory_kill", &events);
         continue;
       }
 
@@ -363,27 +372,27 @@
         AddEventGroup(table, "regulator", &events);
         events.insert(
             GroupAndName("regulator", "regulator_set_voltage_complete"));
-        events.insert(GroupAndName("regulator", "regulator_set_voltage"));
-        events.insert(GroupAndName("regulator", "regulator_enable_delay"));
-        events.insert(GroupAndName("regulator", "regulator_enable_complete"));
-        events.insert(GroupAndName("regulator", "regulator_enable"));
-        events.insert(GroupAndName("regulator", "regulator_disable_complete"));
-        events.insert(GroupAndName("regulator", "regulator_disable"));
+        InsertEvent("regulator", "regulator_set_voltage", &events);
+        InsertEvent("regulator", "regulator_enable_delay", &events);
+        InsertEvent("regulator", "regulator_enable_complete", &events);
+        InsertEvent("regulator", "regulator_enable", &events);
+        InsertEvent("regulator", "regulator_disable_complete", &events);
+        InsertEvent("regulator", "regulator_disable", &events);
         continue;
       }
 
       if (category == "binder_driver") {
-        events.insert(GroupAndName("binder", "binder_transaction"));
-        events.insert(GroupAndName("binder", "binder_transaction_received"));
-        events.insert(GroupAndName("binder", "binder_transaction_alloc_buf"));
-        events.insert(GroupAndName("binder", "binder_set_priority"));
+        InsertEvent("binder", "binder_transaction", &events);
+        InsertEvent("binder", "binder_transaction_received", &events);
+        InsertEvent("binder", "binder_transaction_alloc_buf", &events);
+        InsertEvent("binder", "binder_set_priority", &events);
         continue;
       }
 
       if (category == "binder_lock") {
-        events.insert(GroupAndName("binder", "binder_lock"));
-        events.insert(GroupAndName("binder", "binder_locked"));
-        events.insert(GroupAndName("binder", "binder_unlock"));
+        InsertEvent("binder", "binder_lock", &events);
+        InsertEvent("binder", "binder_locked", &events);
+        InsertEvent("binder", "binder_unlock", &events);
         continue;
       }
 
@@ -391,26 +400,26 @@
         AddEventGroup(table, "filemap", &events);
         events.insert(
             GroupAndName("filemap", "mm_filemap_delete_from_page_cache"));
-        events.insert(GroupAndName("filemap", "mm_filemap_add_to_page_cache"));
-        events.insert(GroupAndName("filemap", "filemap_set_wb_err"));
-        events.insert(GroupAndName("filemap", "file_check_and_advance_wb_err"));
+        InsertEvent("filemap", "mm_filemap_add_to_page_cache", &events);
+        InsertEvent("filemap", "filemap_set_wb_err", &events);
+        InsertEvent("filemap", "file_check_and_advance_wb_err", &events);
         continue;
       }
 
       if (category == "memory") {
-        events.insert(GroupAndName("kmem", "rss_stat"));
-        events.insert(GroupAndName("kmem", "ion_heap_grow"));
-        events.insert(GroupAndName("kmem", "ion_heap_shrink"));
+        InsertEvent("kmem", "rss_stat", &events);
+        InsertEvent("kmem", "ion_heap_grow", &events);
+        InsertEvent("kmem", "ion_heap_shrink", &events);
         // ion_stat supersedes ion_heap_grow / shrink for kernel 4.19+
-        events.insert(GroupAndName("ion", "ion_stat"));
-        events.insert(GroupAndName("mm_event", "mm_event_record"));
-        events.insert(GroupAndName("dmabuf_heap", "dma_heap_stat"));
+        InsertEvent("ion", "ion_stat", &events);
+        InsertEvent("mm_event", "mm_event_record", &events);
+        InsertEvent("dmabuf_heap", "dma_heap_stat", &events);
         continue;
       }
 
       if (category == "thermal") {
-        events.insert(GroupAndName("thermal", "thermal_temperature"));
-        events.insert(GroupAndName("thermal", "cdev_update"));
+        InsertEvent("thermal", "thermal_temperature", &events);
+        InsertEvent("thermal", "cdev_update", &events);
         continue;
       }
     }
diff --git a/src/traced/probes/ftrace/ftrace_procfs.cc b/src/traced/probes/ftrace/ftrace_procfs.cc
index 9176bae..e0046ab 100644
--- a/src/traced/probes/ftrace/ftrace_procfs.cc
+++ b/src/traced/probes/ftrace/ftrace_procfs.cc
@@ -283,9 +283,7 @@
 bool FtraceProcfs::WriteNumberToFile(const std::string& path, size_t value) {
   // 2^65 requires 20 digits to write.
   char buf[21];
-  int res = snprintf(buf, 21, "%zu", value);
-  if (res < 0 || res >= 21)
-    return false;
+  snprintf(buf, sizeof(buf), "%zu", value);
   return WriteToFile(path, std::string(buf));
 }
 
diff --git a/src/traced/probes/power/android_power_data_source.cc b/src/traced/probes/power/android_power_data_source.cc
index 1dccb1a..dac5bde 100644
--- a/src/traced/probes/power/android_power_data_source.cc
+++ b/src/traced/probes/power/android_power_data_source.cc
@@ -50,7 +50,7 @@
 // static
 const ProbesDataSource::Descriptor AndroidPowerDataSource::descriptor = {
     /*name*/ "android.power",
-    /*flags*/ Descriptor::kFlagsNone,
+    /*flags*/ Descriptor::kHandlesIncrementalState,
 };
 
 // Dynamically loads the libperfetto_android_internal.so library which
@@ -140,8 +140,6 @@
     std::unique_ptr<TraceWriter> writer)
     : ProbesDataSource(session_id, &descriptor),
       task_runner_(task_runner),
-      rail_descriptors_logged_(false),
-      energy_consumer_loggged_(false),
       writer_(std::move(writer)),
       weak_factory_(this) {
   using protos::pbzero::AndroidPowerConfig;
@@ -201,9 +199,19 @@
       },
       poll_interval_ms_ - static_cast<uint32_t>(now_ms % poll_interval_ms_));
 
+  if (should_emit_descriptors_) {
+    // We write incremental state cleared in its own packet to avoid the subtle
+    // code we'd need if we were to set this on the first enabled data source.
+    auto packet = writer_->NewTracePacket();
+    packet->set_sequence_flags(
+        protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED);
+  }
+
   WriteBatteryCounters();
   WritePowerRailsData();
   WriteEnergyEstimationBreakdown();
+
+  should_emit_descriptors_ = false;
 }
 
 void AndroidPowerDataSource::WriteBatteryCounters() {
@@ -252,15 +260,14 @@
 
   auto packet = writer_->NewTracePacket();
   packet->set_timestamp(static_cast<uint64_t>(base::GetBootTimeNs().count()));
-  auto* rails_proto = packet->set_power_rails();
+  packet->set_sequence_flags(
+      protos::pbzero::TracePacket::SEQ_NEEDS_INCREMENTAL_STATE);
 
-  if (!rail_descriptors_logged_) {
-    // We only add the rail descriptors to the first package, to avoid logging
-    // all rail names etc. on each one.
-    rail_descriptors_logged_ = true;
+  auto* rails_proto = packet->set_power_rails();
+  if (should_emit_descriptors_) {
     auto rail_descriptors = lib_->GetRailDescriptors();
     if (rail_descriptors.empty()) {
-      // No rails to collect data for. Don't try again in the next iteration.
+      // No rails to collect data for. Don't try again.
       rails_collection_enabled_ = false;
       return;
     }
@@ -291,8 +298,7 @@
   protos::pbzero::AndroidEnergyEstimationBreakdown* energy_estimation_proto =
       nullptr;
 
-  if (!energy_consumer_loggged_) {
-    energy_consumer_loggged_ = true;
+  if (should_emit_descriptors_) {
     packet = writer_->NewTracePacket();
     energy_estimation_proto = packet->set_android_energy_estimation_breakdown();
     auto* descriptor_proto =
@@ -316,6 +322,9 @@
       }
       packet = writer_->NewTracePacket();
       packet->set_timestamp(timestamp);
+      packet->set_sequence_flags(
+          protos::pbzero::TracePacket::SEQ_NEEDS_INCREMENTAL_STATE);
+
       energy_estimation_proto =
           packet->set_android_energy_estimation_breakdown();
       energy_estimation_proto->set_energy_consumer_id(
@@ -336,4 +345,8 @@
   writer_->Flush(callback);
 }
 
+void AndroidPowerDataSource::ClearIncrementalState() {
+  should_emit_descriptors_ = true;
+}
+
 }  // namespace perfetto
diff --git a/src/traced/probes/power/android_power_data_source.h b/src/traced/probes/power/android_power_data_source.h
index 4fecfda..56b02f6 100644
--- a/src/traced/probes/power/android_power_data_source.h
+++ b/src/traced/probes/power/android_power_data_source.h
@@ -48,6 +48,7 @@
   // ProbesDataSource implementation.
   void Start() override;
   void Flush(FlushRequestID, std::function<void()> callback) override;
+  void ClearIncrementalState() override;
 
  private:
   struct DynamicLibLoader;
@@ -57,13 +58,19 @@
   void WritePowerRailsData();
   void WriteEnergyEstimationBreakdown();
 
-  base::TaskRunner* const task_runner_;
-  uint32_t poll_interval_ms_ = 0;
+  // Battery counters.
   std::bitset<8> counters_enabled_;
-  bool rails_collection_enabled_;
-  bool rail_descriptors_logged_;
-  bool energy_consumer_loggged_;
-  bool energy_breakdown_collection_enabled_;
+
+  // Power rails.
+  bool rails_collection_enabled_ = false;
+
+  // Energy estimation.
+  bool energy_breakdown_collection_enabled_ = false;
+
+  uint32_t poll_interval_ms_ = 0;
+  bool should_emit_descriptors_ = true;
+
+  base::TaskRunner* const task_runner_;
   std::unique_ptr<TraceWriter> writer_;
   std::unique_ptr<DynamicLibLoader> lib_;
   base::WeakPtrFactory<AndroidPowerDataSource> weak_factory_;  // Keep last.
diff --git a/src/traced/probes/ps/process_stats_data_source.cc b/src/traced/probes/ps/process_stats_data_source.cc
index 9dafa72..4ac0a63 100644
--- a/src/traced/probes/ps/process_stats_data_source.cc
+++ b/src/traced/probes/ps/process_stats_data_source.cc
@@ -23,6 +23,7 @@
 
 #include "perfetto/base/task_runner.h"
 #include "perfetto/base/time.h"
+#include "perfetto/ext/base/crash_keys.h"
 #include "perfetto/ext/base/file_utils.h"
 #include "perfetto/ext/base/hash.h"
 #include "perfetto/ext/base/metatrace.h"
@@ -54,6 +55,10 @@
 // was provided in the config. The cache is trimmed if it exceeds this size.
 const size_t kThreadTimeInStateCacheSize = 10000;
 
+// TODO(b/189749310): For debugging of b/189749310. Remove by Jan 2022.
+base::CrashKey g_crash_key_proc_file("proc_file");
+base::CrashKey g_crash_key_proc_count("proc_count");
+
 int32_t ReadNextNumericDir(DIR* dirp) {
   while (struct dirent* dir_ent = readdir(dirp)) {
     if (dir_ent->d_type != DT_DIR)
@@ -152,9 +157,8 @@
     return;
   while (int32_t pid = ReadNextNumericDir(*proc_dir)) {
     WriteProcessOrThread(pid);
-    char task_path[255];
-    sprintf(task_path, "/proc/%d/task", pid);
-    base::ScopedDir task_dir(opendir(task_path));
+    base::StackString<128> task_path("/proc/%d/task", pid);
+    base::ScopedDir task_dir(opendir(task_path.c_str()));
     if (!task_dir)
       continue;
 
@@ -279,17 +283,19 @@
 
 std::string ProcessStatsDataSource::ReadProcPidFile(int32_t pid,
                                                     const std::string& file) {
+  base::StackString<128> path("/proc/%" PRId32 "/%s", pid, file.c_str());
+  auto scoped_key = g_crash_key_proc_file.SetScoped(path.string_view());
+  g_crash_key_proc_count.Set(g_crash_key_proc_count.int_value() + 1);
   std::string contents;
   contents.reserve(4096);
-  if (!base::ReadFile("/proc/" + std::to_string(pid) + "/" + file, &contents))
+  if (!base::ReadFile(path.c_str(), &contents))
     return "";
   return contents;
 }
 
 base::ScopedDir ProcessStatsDataSource::OpenProcTaskDir(int32_t pid) {
-  char task_path[255];
-  sprintf(task_path, "/proc/%d/task", pid);
-  return base::ScopedDir(opendir(task_path));
+  base::StackString<128> task_path("/proc/%d/task", pid);
+  return base::ScopedDir(opendir(task_path.c_str()));
 }
 
 std::string ProcessStatsDataSource::ReadProcStatusEntry(const std::string& buf,
@@ -367,6 +373,7 @@
     base::WeakPtr<ProcessStatsDataSource> weak_this) {
   if (!weak_this)
     return;
+  g_crash_key_proc_count.Clear();
   ProcessStatsDataSource& thiz = *weak_this;
   uint32_t period_ms = thiz.poll_period_ms_;
   uint32_t delay_ms =
diff --git a/src/traced/probes/sys_stats/sys_stats_data_source.cc b/src/traced/probes/sys_stats/sys_stats_data_source.cc
index 3d75ff2..a79cb40 100644
--- a/src/traced/probes/sys_stats/sys_stats_data_source.cc
+++ b/src/traced/probes/sys_stats/sys_stats_data_source.cc
@@ -31,6 +31,7 @@
 #include "perfetto/ext/base/metatrace.h"
 #include "perfetto/ext/base/scoped_file.h"
 #include "perfetto/ext/base/string_splitter.h"
+#include "perfetto/ext/base/string_utils.h"
 #include "perfetto/ext/base/utils.h"
 #include "perfetto/ext/traced/sys_stats_counters.h"
 
@@ -248,16 +249,15 @@
     const std::string& deviceName) {
   const char* devfreq_base_path = "/sys/class/devfreq";
   const char* freq_file_name = "cur_freq";
-  char cur_freq_path[256];
-  snprintf(cur_freq_path, sizeof(cur_freq_path), "%s/%s/%s", devfreq_base_path,
-           deviceName.c_str(), freq_file_name);
-  base::ScopedFile fd = OpenReadOnly(cur_freq_path);
+  base::StackString<256> cur_freq_path("%s/%s/%s", devfreq_base_path,
+                                       deviceName.c_str(), freq_file_name);
+  base::ScopedFile fd = OpenReadOnly(cur_freq_path.c_str());
   if (!fd && !devfreq_error_logged_) {
     devfreq_error_logged_ = true;
-    PERFETTO_PLOG("Failed to open %s", cur_freq_path);
+    PERFETTO_PLOG("Failed to open %s", cur_freq_path.c_str());
     return "";
   }
-  size_t rsize = ReadFile(&fd, cur_freq_path);
+  size_t rsize = ReadFile(&fd, cur_freq_path.c_str());
   if (!rsize)
     return "";
   return static_cast<char*>(read_buf_.Get());
diff --git a/src/traced/service/builtin_producer.cc b/src/traced/service/builtin_producer.cc
index 1e10ed1..4bd945b 100644
--- a/src/traced/service/builtin_producer.cc
+++ b/src/traced/service/builtin_producer.cc
@@ -30,6 +30,11 @@
 #include "perfetto/tracing/core/data_source_descriptor.h"
 #include "src/tracing/core/metatrace_writer.h"
 
+// This translation unit is only ever used in Android in-tree builds.
+// These producers are here  to dynamically start heapprofd and other services
+// via sysprops when a trace that requests them is active. That can only happen
+// in in-tree builds of Android.
+
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
 #include <sys/system_properties.h>
 #endif
diff --git a/src/traced/service/service.cc b/src/traced/service/service.cc
index f0a6c4a..9a5ecf4 100644
--- a/src/traced/service/service.cc
+++ b/src/traced/service/service.cc
@@ -39,6 +39,10 @@
 #include <unistd.h>
 #endif
 
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
+#include <sys/system_properties.h>
+#endif
+
 namespace perfetto {
 namespace {
 #if defined(PERFETTO_SET_SOCKET_PERMISSIONS)
@@ -195,8 +199,13 @@
     return 1;
   }
 
+  // Advertise builtin producers only on in-tree builds. These producers serve
+  // only to dynamically start heapprofd and other services via sysprops, but
+  // that can only ever happen in in-tree builds.
+#if PERFETTO_BUILDFLAG(PERFETTO_ANDROID_BUILD)
   BuiltinProducer builtin_producer(&task_runner, /*lazy_stop_delay_ms=*/30000);
   builtin_producer.ConnectInProcess(svc->service());
+#endif
 
   // Set the CPU limit and start the watchdog running. The memory limit will
   // be set inside the service code as it relies on the size of buffers.
@@ -216,6 +225,14 @@
     PERFETTO_CHECK(base::CloseFile(notif_fd) == 0);
   }
 
+#if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
+  // Notify init (perfetto.rc) that traced has been started. Used only by
+  // the perfetto_trace_on_boot init service.
+  if (__system_property_set("sys.trace.traced_started", "1") != 0) {
+    PERFETTO_PLOG("Failed to set property sys.trace.traced_started");
+  }
+#endif
+
   PERFETTO_ILOG("Started traced, listening on %s %s", GetProducerSocket(),
                 GetConsumerSocket());
   task_runner.Run();
diff --git a/src/tracing/console_interceptor.cc b/src/tracing/console_interceptor.cc
index 99ce956..ec924c0 100644
--- a/src/tracing/console_interceptor.cc
+++ b/src/tracing/console_interceptor.cc
@@ -16,10 +16,17 @@
 
 #include "perfetto/tracing/console_interceptor.h"
 
+#include <stdarg.h>
+
+#include <algorithm>
+#include <cmath>
+#include <tuple>
+
 #include "perfetto/ext/base/file_utils.h"
 #include "perfetto/ext/base/hash.h"
 #include "perfetto/ext/base/optional.h"
 #include "perfetto/ext/base/scoped_file.h"
+#include "perfetto/ext/base/string_utils.h"
 #include "perfetto/ext/base/utils.h"
 #include "perfetto/tracing/internal/track_event_internal.h"
 
@@ -35,10 +42,6 @@
 #include "protos/perfetto/trace/track_event/track_descriptor.pbzero.h"
 #include "protos/perfetto/trace/track_event/track_event.pbzero.h"
 
-#include <algorithm>
-#include <cmath>
-#include <tuple>
-
 namespace perfetto {
 
 // sRGB color.
@@ -177,18 +180,17 @@
 
   auto& tls = context_.GetThreadLocalState();
   std::array<char, 128> message_prefix{};
-  ssize_t written = 0;
+  size_t written = 0;
   if (tls.use_colors) {
-    written = snprintf(message_prefix.data(), message_prefix.size(),
-                       FMT_RGB_SET_BG " %s%s %-*.*s", track_color.r,
-                       track_color.g, track_color.b, kReset, kDim, title_width,
-                       title_width, title.data());
+    written = base::SprintfTrunc(message_prefix.data(), message_prefix.size(),
+                                 FMT_RGB_SET_BG " %s%s %-*.*s", track_color.r,
+                                 track_color.g, track_color.b, kReset, kDim,
+                                 title_width, title_width, title.data());
   } else {
-    written = snprintf(message_prefix.data(), message_prefix.size(), "%-*.*s",
-                       title_width + 2, title_width, title.data());
+    written = base::SprintfTrunc(message_prefix.data(), message_prefix.size(),
+                                 "%-*.*s", title_width + 2, title_width,
+                                 title.data());
   }
-  if (written < 0)
-    written = message_prefix.size();
   track.user_data.assign(message_prefix.begin(),
                          message_prefix.begin() + written);
 }
diff --git a/src/tracing/consumer_api_deprecated/consumer_api_deprecated.cc b/src/tracing/consumer_api_deprecated/consumer_api_deprecated.cc
index 1245873..0a82042 100644
--- a/src/tracing/consumer_api_deprecated/consumer_api_deprecated.cc
+++ b/src/tracing/consumer_api_deprecated/consumer_api_deprecated.cc
@@ -34,6 +34,7 @@
 
 #include "perfetto/base/build_config.h"
 #include "perfetto/ext/base/scoped_file.h"
+#include "perfetto/ext/base/string_utils.h"
 #include "perfetto/ext/base/temp_file.h"
 #include "perfetto/ext/base/thread_checker.h"
 #include "perfetto/ext/base/unix_task_runner.h"
@@ -149,10 +150,10 @@
     return false;
 
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
-  char memfd_name[64];
-  snprintf(memfd_name, sizeof(memfd_name), "perfetto_trace_%" PRId64, handle_);
-  buf_fd_.reset(
-      static_cast<int>(syscall(__NR_memfd_create, memfd_name, MFD_CLOEXEC)));
+
+  base::StackString<64> memfd_name("perfetto_trace_%" PRId64, handle_);
+  buf_fd_.reset(static_cast<int>(
+      syscall(__NR_memfd_create, memfd_name.c_str(), MFD_CLOEXEC)));
 #else
   // Fallback for testing on Linux/mac.
   buf_fd_ = base::TempFile::CreateUnlinked().ReleaseFD();
diff --git a/src/tracing/event_context.cc b/src/tracing/event_context.cc
index a4f1ade..42b196b 100644
--- a/src/tracing/event_context.cc
+++ b/src/tracing/event_context.cc
@@ -16,6 +16,7 @@
 
 #include "perfetto/tracing/event_context.h"
 
+#include "perfetto/tracing/internal/track_event_interned_fields.h"
 #include "protos/perfetto/trace/interned_data/interned_data.pbzero.h"
 #include "protos/perfetto/trace/track_event/track_event.pbzero.h"
 
@@ -50,4 +51,12 @@
   serialized_interned_data.Reset();
 }
 
+protos::pbzero::DebugAnnotation* EventContext::AddDebugAnnotation(
+    const char* name) {
+  auto annotation = event()->add_debug_annotations();
+  annotation->set_name_iid(
+      internal::InternedDebugAnnotationName::Get(this, name));
+  return annotation;
+}
+
 }  // namespace perfetto
diff --git a/src/tracing/internal/in_process_tracing_backend.cc b/src/tracing/internal/in_process_tracing_backend.cc
index 1df0f4b..d71a62f 100644
--- a/src/tracing/internal/in_process_tracing_backend.cc
+++ b/src/tracing/internal/in_process_tracing_backend.cc
@@ -85,11 +85,6 @@
 std::unique_ptr<ProducerEndpoint> InProcessTracingBackend::ConnectProducer(
     const ConnectProducerArgs& args) {
   PERFETTO_DCHECK(args.task_runner->RunsTasksOnCurrentThread());
-
-  // This should never happen as we can have at most one in-process backend.
-  if (service_)
-    PERFETTO_FATAL("InProcessTracingBackend initialized twice");
-
   return GetOrCreateService(args.task_runner)
       ->ConnectProducer(args.producer, /*uid=*/0, args.producer_name,
                         args.shmem_size_hint_bytes,
diff --git a/src/tracing/internal/tracing_muxer_impl.cc b/src/tracing/internal/tracing_muxer_impl.cc
index 3a4cc6d..c5f3af9 100644
--- a/src/tracing/internal/tracing_muxer_impl.cc
+++ b/src/tracing/internal/tracing_muxer_impl.cc
@@ -128,6 +128,9 @@
   return hasher.digest();
 }
 
+// Holds an earlier TracingMuxerImpl instance after ResetForTesting() is called.
+static TracingMuxerImpl* g_prev_instance{};
+
 }  // namespace
 
 // ----- Begin of TracingMuxerImpl::ProducerImpl
@@ -139,7 +142,9 @@
       backend_id_(backend_id),
       shmem_batch_commits_duration_ms_(shmem_batch_commits_duration_ms) {}
 
-TracingMuxerImpl::ProducerImpl::~ProducerImpl() = default;
+TracingMuxerImpl::ProducerImpl::~ProducerImpl() {
+  muxer_ = nullptr;
+}
 
 void TracingMuxerImpl::ProducerImpl::Initialize(
     std::unique_ptr<ProducerEndpoint> endpoint) {
@@ -153,6 +158,10 @@
   // that |task_runner| is assumed to outlive tracing sessions on all threads.)
   auto* task_runner = muxer_->task_runner_.get();
   auto deleter = [task_runner](ProducerEndpoint* e) {
+    if (task_runner->RunsTasksOnCurrentThread()) {
+      delete e;
+      return;
+    }
     task_runner->PostTask([e] { delete e; });
   };
   std::shared_ptr<ProducerEndpoint> service(endpoint.release(), deleter);
@@ -174,23 +183,37 @@
 
 void TracingMuxerImpl::ProducerImpl::OnDisconnect() {
   PERFETTO_DCHECK_THREAD(thread_checker_);
+  // If we're being destroyed, bail out.
+  if (!muxer_)
+    return;
   connected_ = false;
   // Active data sources for this producer will be stopped by
   // DestroyStoppedTraceWritersForCurrentThread() since the reconnected producer
   // will have a different connection id (even before it has finished
   // connecting).
   registered_data_sources_.reset();
-  // Keep the old service around as a dead connection in case it has active
-  // trace writers. We can't clear |service_| here because other threads may be
-  // concurrently creating new trace writers. The reconnection below will
-  // atomically swap the new service in place of the old one.
-  dead_services_.push_back(service_);
+  DisposeConnection();
+
   // Try reconnecting the producer.
   muxer_->OnProducerDisconnected(this);
 }
 
+void TracingMuxerImpl::ProducerImpl::DisposeConnection() {
+  // Keep the old service around as a dead connection in case it has active
+  // trace writers. If any tracing sessions were created, we can't clear
+  // |service_| here because other threads may be concurrently creating new
+  // trace writers. Any reconnection attempt will atomically swap the new
+  // service in place of the old one.
+  if (did_setup_tracing_) {
+    dead_services_.push_back(service_);
+  } else {
+    service_.reset();
+  }
+}
+
 void TracingMuxerImpl::ProducerImpl::OnTracingSetup() {
   PERFETTO_DCHECK_THREAD(thread_checker_);
+  did_setup_tracing_ = true;
   service_->MaybeSharedMemoryArbiter()->SetBatchCommitsDuration(
       shmem_batch_commits_duration_ms_);
 }
@@ -199,18 +222,24 @@
     DataSourceInstanceID id,
     const DataSourceConfig& cfg) {
   PERFETTO_DCHECK_THREAD(thread_checker_);
+  if (!muxer_)
+    return;
   muxer_->SetupDataSource(backend_id_, connection_id_, id, cfg);
 }
 
 void TracingMuxerImpl::ProducerImpl::StartDataSource(DataSourceInstanceID id,
                                                      const DataSourceConfig&) {
   PERFETTO_DCHECK_THREAD(thread_checker_);
+  if (!muxer_)
+    return;
   muxer_->StartDataSource(backend_id_, id);
   service_->NotifyDataSourceStarted(id);
 }
 
 void TracingMuxerImpl::ProducerImpl::StopDataSource(DataSourceInstanceID id) {
   PERFETTO_DCHECK_THREAD(thread_checker_);
+  if (!muxer_)
+    return;
   muxer_->StopDataSource_AsyncBegin(backend_id_, id);
 }
 
@@ -226,12 +255,14 @@
     const DataSourceInstanceID* instances,
     size_t instance_count) {
   PERFETTO_DCHECK_THREAD(thread_checker_);
+  if (!muxer_)
+    return;
   for (size_t inst_idx = 0; inst_idx < instance_count; inst_idx++) {
     muxer_->ClearDataSourceIncrementalState(backend_id_, instances[inst_idx]);
   }
 }
 
-void TracingMuxerImpl::ProducerImpl::SweepDeadServices() {
+bool TracingMuxerImpl::ProducerImpl::SweepDeadServices() {
   PERFETTO_DCHECK_THREAD(thread_checker_);
   auto is_unused = [](const std::shared_ptr<ProducerEndpoint>& endpoint) {
     auto* arbiter = endpoint->MaybeSharedMemoryArbiter();
@@ -245,6 +276,7 @@
     }
     it = next_it;
   }
+  return dead_services_.empty();
 }
 
 // ----- End of TracingMuxerImpl::ProducerImpl methods.
@@ -259,7 +291,9 @@
       backend_id_(backend_id),
       session_id_(session_id) {}
 
-TracingMuxerImpl::ConsumerImpl::~ConsumerImpl() = default;
+TracingMuxerImpl::ConsumerImpl::~ConsumerImpl() {
+  muxer_ = nullptr;
+}
 
 void TracingMuxerImpl::ConsumerImpl::Initialize(
     std::unique_ptr<ConsumerEndpoint> endpoint) {
@@ -300,6 +334,9 @@
 
 void TracingMuxerImpl::ConsumerImpl::OnDisconnect() {
   PERFETTO_DCHECK_THREAD(thread_checker_);
+  // If we're being destroyed, bail out.
+  if (!muxer_)
+    return;
 #if PERFETTO_BUILDFLAG(PERFETTO_OS_ANDROID)
   if (!connected_ && backend_type_ == kSystemBackend) {
     PERFETTO_ELOG(
@@ -770,8 +807,7 @@
   if (static_state->index != kMaxDataSources)
     return true;
 
-  static std::atomic<uint32_t> last_id{};
-  uint32_t new_index = last_id++;
+  uint32_t new_index = next_data_source_index_++;
   if (new_index >= kMaxDataSources) {
     PERFETTO_DLOG(
         "RegisterDataSource failed: too many data sources already registered");
@@ -885,6 +921,7 @@
           std::is_same<decltype(internal_state->data_source_instance_id),
                        DataSourceInstanceID>::value,
           "data_source_instance_id type mismatch");
+      internal_state->muxer_id_for_testing = muxer_id_for_testing_;
       internal_state->backend_id = backend_id;
       internal_state->backend_connection_id = backend_connection_id;
       internal_state->data_source_instance_id = instance_id;
@@ -1021,6 +1058,7 @@
     std::lock_guard<std::recursive_mutex> guard(ds.internal_state->lock);
     ds.internal_state->trace_lambda_enabled = false;
     ds.internal_state->data_source.reset();
+    ds.internal_state->interceptor.reset();
   }
 
   // The other fields of internal_state are deliberately *not* cleared.
@@ -1131,7 +1169,9 @@
         continue;
 
       DataSourceState* ds_state = static_state->TryGet(inst);
-      if (ds_state && ds_state->backend_id == ds_tls.backend_id &&
+      if (ds_state &&
+          ds_state->muxer_id_for_testing == ds_tls.muxer_id_for_testing &&
+          ds_state->backend_id == ds_tls.backend_id &&
           ds_state->backend_connection_id == ds_tls.backend_connection_id &&
           ds_state->buffer_id == ds_tls.buffer_id &&
           ds_state->data_source_instance_id == ds_tls.data_source_instance_id) {
@@ -1485,6 +1525,17 @@
   TracingMuxer::generation_++;
 }
 
+void TracingMuxerImpl::SweepDeadBackends() {
+  PERFETTO_DCHECK_THREAD(thread_checker_);
+  for (auto it = dead_backends_.begin(); it != dead_backends_.end();) {
+    auto next_it = it;
+    next_it++;
+    if (it->producer->SweepDeadServices())
+      dead_backends_.erase(it);
+    it = next_it;
+  }
+}
+
 TracingMuxerImpl::FindDataSourceRes TracingMuxerImpl::FindDataSource(
     TracingBackendId backend_id,
     DataSourceInstanceID instance_id) {
@@ -1613,10 +1664,94 @@
       new TracingSessionImpl(this, session_id, requested_backend_type));
 }
 
+// static
 void TracingMuxerImpl::InitializeInstance(const TracingInitArgs& args) {
   if (instance_ != TracingMuxerFake::Get())
     PERFETTO_FATAL("Tracing already initialized");
-  new TracingMuxerImpl(args);
+  // If we previously had a TracingMuxerImpl instance which was reset,
+  // reinitialize and reuse it instead of trying to create a new one. See
+  // ResetForTesting().
+  if (g_prev_instance) {
+    auto* muxer = g_prev_instance;
+    g_prev_instance = nullptr;
+    instance_ = muxer;
+    muxer->task_runner_->PostTask([muxer, args] { muxer->Initialize(args); });
+  } else {
+    new TracingMuxerImpl(args);
+  }
+}
+
+// static
+void TracingMuxerImpl::ResetForTesting() {
+  // Ideally we'd tear down the entire TracingMuxerImpl, but the lifetimes of
+  // various objects make that a non-starter. In particular:
+  //
+  // 1) Any thread that has entered a trace event has a TraceWriter, which holds
+  //    a reference back to ProducerImpl::service_.
+  //
+  // 2) ProducerImpl::service_ has a reference back to the ProducerImpl.
+  //
+  // 3) ProducerImpl holds reference to TracingMuxerImpl::task_runner_, which in
+  //    turn depends on TracingMuxerImpl itself.
+  //
+  // Because of this, it's not safe to deallocate TracingMuxerImpl until all
+  // threads have dropped their TraceWriters. Since we can't really ask the
+  // caller to guarantee this, we'll instead reset enough of the muxer's state
+  // so that it can be reinitialized later and ensure all necessary objects from
+  // the old state remain alive until all references have gone away.
+  auto* muxer = reinterpret_cast<TracingMuxerImpl*>(instance_);
+
+  base::WaitableEvent reset_done;
+  auto do_reset = [muxer, &reset_done] {
+    // Unregister all data sources so they don't interfere with any future
+    // tracing sessions.
+    for (RegisteredDataSource& rds : muxer->data_sources_) {
+      for (RegisteredBackend& backend : muxer->backends_) {
+        if (!backend.producer->service_)
+          continue;
+        backend.producer->service_->UnregisterDataSource(rds.descriptor.name());
+      }
+    }
+    for (auto& backend : muxer->backends_) {
+      // Check that no consumer session is currently active on any backend.
+      for (auto& consumer : backend.consumers)
+        PERFETTO_CHECK(!consumer->service_);
+      backend.producer->muxer_ = nullptr;
+      backend.producer->DisposeConnection();
+      muxer->dead_backends_.push_back(std::move(backend));
+    }
+    muxer->backends_.clear();
+    muxer->interceptors_.clear();
+
+    for (auto& ds : muxer->data_sources_) {
+      ds.static_state->~DataSourceStaticState();
+      new (ds.static_state) DataSourceStaticState{};
+    }
+    muxer->data_sources_.clear();
+    muxer->next_data_source_index_ = 0;
+
+    // Free all backends without active trace writers or other inbound
+    // references. Note that even if all the backends get swept, the muxer still
+    // needs to stay around since |task_runner_| is assumed to be long-lived.
+    muxer->SweepDeadBackends();
+
+    // Make sure we eventually discard any per-thread trace writers from the
+    // previous instance.
+    muxer->muxer_id_for_testing_++;
+
+    g_prev_instance = muxer;
+    instance_ = TracingMuxerFake::Get();
+    reset_done.Notify();
+  };
+
+  // Some tests run the muxer and the test on the same thread. In these cases,
+  // we can reset synchronously.
+  if (muxer->task_runner_->RunsTasksOnCurrentThread()) {
+    do_reset();
+  } else {
+    muxer->task_runner_->PostTask(std::move(do_reset));
+    reset_done.Wait();
+  }
 }
 
 TracingMuxer::~TracingMuxer() = default;
diff --git a/src/tracing/internal/tracing_muxer_impl.h b/src/tracing/internal/tracing_muxer_impl.h
index d0898a7..874e073 100644
--- a/src/tracing/internal/tracing_muxer_impl.h
+++ b/src/tracing/internal/tracing_muxer_impl.h
@@ -97,6 +97,7 @@
   using TracingSessionGlobalID = uint64_t;
 
   static void InitializeInstance(const TracingInitArgs&);
+  static void ResetForTesting();
 
   // TracingMuxer implementation.
   bool RegisterDataSource(const DataSourceDescriptor&,
@@ -178,6 +179,7 @@
     void RegisterDataSource(const DataSourceDescriptor&,
                             DataSourceFactory,
                             DataSourceStaticState*);
+    void DisposeConnection();
 
     // perfetto::Producer implementation.
     void OnConnect() override;
@@ -191,12 +193,13 @@
     void Flush(FlushRequestID, const DataSourceInstanceID*, size_t) override;
     void ClearIncrementalState(const DataSourceInstanceID*, size_t) override;
 
-    void SweepDeadServices();
+    bool SweepDeadServices();
 
     PERFETTO_THREAD_CHECKER(thread_checker_)
-    TracingMuxerImpl* const muxer_;
+    TracingMuxerImpl* muxer_;
     TracingBackendId const backend_id_;
     bool connected_ = false;
+    bool did_setup_tracing_ = false;
     uint32_t connection_id_ = 0;
 
     const uint32_t shmem_batch_commits_duration_ms_ = 0;
@@ -258,7 +261,7 @@
     // Will eventually inform the |muxer_| when it is safe to remove |this|.
     void Disconnect();
 
-    TracingMuxerImpl* const muxer_;
+    TracingMuxerImpl* muxer_;
     BackendType const backend_type_;
     TracingBackendId const backend_id_;
     TracingSessionGlobalID const session_id_;
@@ -383,6 +386,7 @@
   void InitializeConsumer(TracingSessionGlobalID session_id);
   void OnConsumerDisconnected(ConsumerImpl* consumer);
   void OnProducerDisconnected(ProducerImpl* producer);
+  void SweepDeadBackends();
 
   struct FindDataSourceRes {
     FindDataSourceRes() = default;
@@ -396,6 +400,7 @@
   };
   FindDataSourceRes FindDataSource(TracingBackendId, DataSourceInstanceID);
 
+  // WARNING: If you add new state here, be sure to update ResetForTesting.
   std::unique_ptr<base::TaskRunner> task_runner_;
   std::vector<RegisteredDataSource> data_sources_;
   std::vector<RegisteredBackend> backends_;
@@ -403,11 +408,18 @@
   TracingPolicy* policy_ = nullptr;
 
   std::atomic<TracingSessionGlobalID> next_tracing_session_id_{};
+  std::atomic<uint32_t> next_data_source_index_{};
+  uint32_t muxer_id_for_testing_{};
 
   // Maximum number of times we will try to reconnect producer backend.
   // Should only be modified for testing purposes.
   std::atomic<uint32_t> max_producer_reconnections_{100u};
 
+  // After ResetForTesting() is called, holds tracing backends which needs to be
+  // kept alive until all inbound references have gone away. See
+  // SweepDeadBackends().
+  std::list<RegisteredBackend> dead_backends_;
+
   PERFETTO_THREAD_CHECKER(thread_checker_)
 };
 
diff --git a/src/tracing/test/api_integrationtest.cc b/src/tracing/test/api_integrationtest.cc
index 50b1463..676a3a0 100644
--- a/src/tracing/test/api_integrationtest.cc
+++ b/src/tracing/test/api_integrationtest.cc
@@ -344,6 +344,67 @@
 
 TestTracingPolicy* g_test_tracing_policy = new TestTracingPolicy();  // Leaked.
 
+class ParsedIncrementalState {
+ public:
+  void ClearIfNeeded(const perfetto::protos::gen::TracePacket& packet) {
+    if (packet.sequence_flags() &
+        perfetto::protos::pbzero::TracePacket::SEQ_INCREMENTAL_STATE_CLEARED) {
+      incremental_state_was_cleared_ = true;
+      categories_.clear();
+      event_names_.clear();
+      debug_annotation_names_.clear();
+      seen_tracks_.clear();
+    }
+  }
+
+  void Parse(const perfetto::protos::gen::TracePacket& packet) {
+    // Update incremental state.
+    if (packet.has_interned_data()) {
+      const auto& interned_data = packet.interned_data();
+      for (const auto& it : interned_data.event_categories()) {
+        EXPECT_EQ(categories_.find(it.iid()), categories_.end());
+        categories_[it.iid()] = it.name();
+      }
+      for (const auto& it : interned_data.event_names()) {
+        EXPECT_EQ(event_names_.find(it.iid()), event_names_.end());
+        event_names_[it.iid()] = it.name();
+      }
+      for (const auto& it : interned_data.debug_annotation_names()) {
+        EXPECT_EQ(debug_annotation_names_.find(it.iid()),
+                  debug_annotation_names_.end());
+        debug_annotation_names_[it.iid()] = it.name();
+      }
+    }
+  }
+
+  bool HasSeenTrack(uint64_t uuid) const {
+    return seen_tracks_.count(uuid) != 0;
+  }
+
+  void InsertTrack(uint64_t uuid) { seen_tracks_.insert(uuid); }
+
+  std::string GetCategory(uint64_t iid) { return categories_[iid]; }
+
+  std::string GetEventName(const perfetto::protos::gen::TrackEvent& event) {
+    if (event.has_name_iid())
+      return event_names_[event.name_iid()];
+    return event.name();
+  }
+
+  std::string GetDebugAnnotationName(uint64_t iid) {
+    return debug_annotation_names_[iid];
+  }
+
+  bool WasCleared() const { return incremental_state_was_cleared_; }
+
+ private:
+  bool incremental_state_was_cleared_ = false;
+  std::map<uint64_t, std::string> categories_;
+  std::map<uint64_t, std::string> event_names_;
+  std::map<uint64_t, std::string> debug_annotation_names_;
+  std::set<uint64_t> seen_tracks_;
+};
+
 // -------------------------
 // Declaration of test class
 // -------------------------
@@ -356,30 +417,16 @@
     g_test_tracing_policy->should_allow_consumer_connection = true;
 
     // Start a fresh system service for this test, tearing down any previous
-    // service that was running.
-    uint32_t supported_backends =
-        perfetto::kInProcessBackend | perfetto::kSystemBackend;
-    if (!perfetto::test::StartSystemService())
-      supported_backends &= ~perfetto::kSystemBackend;
-
-    // If the system backend wasn't supported, skip all system backend tests.
-    auto backend = GetParam();
-    if (!(supported_backends & backend))
+    // service that was running. If the system backend isn't supported, skip all
+    // system backend tests.
+    if (GetParam() == perfetto::kSystemBackend &&
+        !perfetto::test::StartSystemService()) {
       GTEST_SKIP();
-
-    static bool was_initialized;
-    if (!was_initialized) {
-      EXPECT_FALSE(perfetto::Tracing::IsInitialized());
-      was_initialized = true;
-    } else {
-      EXPECT_TRUE(perfetto::Tracing::IsInitialized());
     }
 
-    // Since the client API can only be initialized once per process, initialize
-    // both the in-process and system backends for every test here. The actual
-    // service to be used is chosen by the test parameter.
+    EXPECT_FALSE(perfetto::Tracing::IsInitialized());
     TracingInitArgs args;
-    args.backends = supported_backends;
+    args.backends = GetParam();
     args.tracing_policy = g_test_tracing_policy;
     perfetto::Tracing::Initialize(args);
     RegisterDataSource<MockDataSource>("my_data_source");
@@ -397,7 +444,11 @@
     perfetto::test::DisableReconnectLimit();
   }
 
-  void TearDown() override { instance = nullptr; }
+  void TearDown() override {
+    instance = nullptr;
+    sessions_.clear();
+    perfetto::Tracing::ResetForTesting();
+  }
 
   template <typename DataSourceType>
   TestDataSourceHandle* RegisterDataSource(std::string name) {
@@ -507,31 +558,21 @@
 
     // Read back the trace, maintaining interning tables as we go.
     std::vector<std::string> slices;
-    std::map<uint64_t, std::string> categories;
-    std::map<uint64_t, std::string> event_names;
-    std::map<uint64_t, std::string> debug_annotation_names;
-    std::set<uint64_t> seen_tracks;
     perfetto::protos::gen::Trace parsed_trace;
     EXPECT_TRUE(
         parsed_trace.ParseFromArray(raw_trace.data(), raw_trace.size()));
 
-    bool incremental_state_was_cleared = false;
+    ParsedIncrementalState incremental_state;
+
     uint32_t sequence_id = 0;
     for (const auto& packet : parsed_trace.packet()) {
-      if (packet.sequence_flags() & perfetto::protos::pbzero::TracePacket::
-                                        SEQ_INCREMENTAL_STATE_CLEARED) {
-        incremental_state_was_cleared = true;
-        categories.clear();
-        event_names.clear();
-        debug_annotation_names.clear();
-        seen_tracks.clear();
-      }
+      incremental_state.ClearIfNeeded(packet);
 
       if (packet.has_track_descriptor()) {
         // Make sure we haven't seen any events on this track before the
         // descriptor was written.
-        EXPECT_EQ(seen_tracks.find(packet.track_descriptor().uuid()),
-                  seen_tracks.end());
+        EXPECT_FALSE(
+            incremental_state.HasSeenTrack(packet.track_descriptor().uuid()));
       }
 
       if (!packet.has_track_event())
@@ -544,28 +585,13 @@
         EXPECT_EQ(sequence_id, packet.trusted_packet_sequence_id());
       }
 
-      // Update incremental state.
-      if (packet.has_interned_data()) {
-        const auto& interned_data = packet.interned_data();
-        for (const auto& it : interned_data.event_categories()) {
-          EXPECT_EQ(categories.find(it.iid()), categories.end());
-          categories[it.iid()] = it.name();
-        }
-        for (const auto& it : interned_data.event_names()) {
-          EXPECT_EQ(event_names.find(it.iid()), event_names.end());
-          event_names[it.iid()] = it.name();
-        }
-        for (const auto& it : interned_data.debug_annotation_names()) {
-          EXPECT_EQ(debug_annotation_names.find(it.iid()),
-                    debug_annotation_names.end());
-          debug_annotation_names[it.iid()] = it.name();
-        }
-      }
+      incremental_state.Parse(packet);
+
       const auto& track_event = packet.track_event();
       std::string slice;
 
       if (track_event.has_track_uuid()) {
-        seen_tracks.insert(track_event.track_uuid());
+        incremental_state.InsertTrack(track_event.track_uuid());
         std::stringstream track;
         track << "[track=" << track_event.track_uuid() << "]";
         slice += track.str();
@@ -623,11 +649,12 @@
       }
       size_t category_count = 0;
       for (const auto& it : track_event.category_iids())
-        slice += (category_count++ ? "," : ":") + categories[it];
+        slice +=
+            (category_count++ ? "," : ":") + incremental_state.GetCategory(it);
       for (const auto& it : track_event.categories())
         slice += (category_count++ ? ",$" : ":$") + it;
-      if (track_event.has_name_iid())
-        slice += "." + event_names[track_event.name_iid()];
+      if (track_event.has_name() || track_event.has_name_iid())
+        slice += "." + incremental_state.GetEventName(track_event);
 
       if (track_event.debug_annotations_size()) {
         slice += "(";
@@ -636,7 +663,8 @@
           if (!first_annotation) {
             slice += ",";
           }
-          slice += debug_annotation_names[it.name_iid()] + "=";
+          slice +=
+              incremental_state.GetDebugAnnotationName(it.name_iid()) + "=";
           std::stringstream value;
           if (it.has_bool_value()) {
             value << "(bool)" << it.bool_value();
@@ -661,9 +689,37 @@
         slice += ")";
       }
 
+      if (track_event.flow_ids_size()) {
+        slice += "(flow_ids=";
+        std::stringstream value;
+        bool first_annotation = true;
+        for (uint64_t id : track_event.flow_ids()) {
+          if (!first_annotation) {
+            value << ",";
+          }
+          first_annotation = false;
+          value << id;
+        }
+        slice += value.str() + ")";
+      }
+
+      if (track_event.terminating_flow_ids_size()) {
+        slice += "(terminating_flow_ids=";
+        std::stringstream value;
+        bool first_annotation = true;
+        for (uint64_t id : track_event.terminating_flow_ids()) {
+          if (!first_annotation) {
+            value << ",";
+          }
+          value << id;
+          first_annotation = false;
+        }
+        slice += value.str() + ")";
+      }
+
       slices.push_back(slice);
     }
-    EXPECT_TRUE(incremental_state_was_cleared);
+    EXPECT_TRUE(incremental_state.WasCleared());
     return slices;
   }
 
@@ -1852,6 +1908,57 @@
   }
 };
 
+auto GetWriteLogMessageRefLambda = []() {
+  return [](perfetto::EventContext& ctx) {
+    auto* log = ctx.event()->set_log_message();
+    log->set_source_location_iid(1);
+    log->set_body_iid(2);
+  };
+};
+
+void CheckTypedArguments(
+    const std::vector<char>& raw_trace,
+    const char* event_name,
+    perfetto::protos::gen::TrackEvent::Type type,
+    std::function<void(const perfetto::protos::gen::TrackEvent&)> checker) {
+  perfetto::protos::gen::Trace parsed_trace;
+  ASSERT_TRUE(parsed_trace.ParseFromArray(raw_trace.data(), raw_trace.size()));
+
+  bool found_slice = false;
+  ParsedIncrementalState incremental_state;
+
+  for (const auto& packet : parsed_trace.packet()) {
+    incremental_state.ClearIfNeeded(packet);
+    incremental_state.Parse(packet);
+
+    if (!packet.has_track_event())
+      continue;
+    const auto& track_event = packet.track_event();
+    if (track_event.type() != type) {
+      continue;
+    }
+    if (event_name &&
+        incremental_state.GetEventName(track_event) != event_name) {
+      continue;
+    }
+
+    checker(track_event);
+    found_slice = true;
+  }
+  EXPECT_TRUE(found_slice);
+}
+
+void CheckLogMessagePresent(const std::vector<char>& raw_trace) {
+  CheckTypedArguments(raw_trace, nullptr,
+                      perfetto::protos::gen::TrackEvent::TYPE_SLICE_BEGIN,
+                      [](const perfetto::protos::gen::TrackEvent& track_event) {
+                        EXPECT_TRUE(track_event.has_log_message());
+                        const auto& log = track_event.log_message();
+                        EXPECT_EQ(1u, log.source_location_iid());
+                        EXPECT_EQ(2u, log.body_iid());
+                      });
+}
+
 }  // namespace
 
 TEST_P(PerfettoApiTest, InlineTrackEventTypedArgs_NestedSingle) {
@@ -1866,31 +1973,9 @@
 
   tracing_session->get()->StopBlocking();
   std::vector<char> raw_trace = tracing_session->get()->ReadTraceBlocking();
-  std::string trace(raw_trace.data(), raw_trace.size());
-
-  perfetto::protos::gen::Trace parsed_trace;
-  ASSERT_TRUE(parsed_trace.ParseFromArray(raw_trace.data(), raw_trace.size()));
-
-  bool found_args = false;
-  for (const auto& packet : parsed_trace.packet()) {
-    if (!packet.has_track_event())
-      continue;
-    const auto& track_event = packet.track_event();
-    if (track_event.type() !=
-        perfetto::protos::gen::TrackEvent::TYPE_SLICE_BEGIN) {
-      continue;
-    }
-
-    EXPECT_TRUE(track_event.has_log_message());
-    const auto& log = track_event.log_message();
-    EXPECT_EQ(1u, log.source_location_iid());
-    EXPECT_EQ(2u, log.body_iid());
-    found_args = true;
-  }
-  EXPECT_TRUE(found_args);
 }
 
-TEST_P(PerfettoApiTest, InlineTrackEventTypedAndUntypedArgs) {
+TEST_P(PerfettoApiTest, TrackEventArgs_TypedAndUntyped) {
   // Create a new trace session.
   auto* tracing_session = NewTraceWithCategories({"foo"});
   tracing_session->get()->StartBlocking();
@@ -1905,34 +1990,15 @@
   std::vector<char> raw_trace = tracing_session->get()->ReadTraceBlocking();
   std::string trace(raw_trace.data(), raw_trace.size());
 
-  perfetto::protos::gen::Trace parsed_trace;
-  ASSERT_TRUE(parsed_trace.ParseFromArray(raw_trace.data(), raw_trace.size()));
-
   // Find typed argument.
-  bool found_args = false;
-  for (const auto& packet : parsed_trace.packet()) {
-    if (!packet.has_track_event())
-      continue;
-    const auto& track_event = packet.track_event();
-    if (track_event.type() !=
-        perfetto::protos::gen::TrackEvent::TYPE_SLICE_BEGIN) {
-      continue;
-    }
-
-    EXPECT_TRUE(track_event.has_log_message());
-    const auto& log = track_event.log_message();
-    EXPECT_EQ(1u, log.source_location_iid());
-    EXPECT_EQ(2u, log.body_iid());
-    found_args = true;
-  }
-  EXPECT_TRUE(found_args);
+  CheckLogMessagePresent(raw_trace);
 
   // Find untyped argument.
   EXPECT_THAT(ReadSlicesFromTrace(raw_trace),
               ElementsAre("B:foo.E(arg=(string)value)"));
 }
 
-TEST_P(PerfettoApiTest, InlineTrackEventUntypedAndTypedArgs) {
+TEST_P(PerfettoApiTest, TrackEventArgs_UntypedAndTyped) {
   // Create a new trace session.
   auto* tracing_session = NewTraceWithCategories({"foo"});
   tracing_session->get()->StartBlocking();
@@ -1945,35 +2011,269 @@
   tracing_session->get()->StopBlocking();
 
   std::vector<char> raw_trace = tracing_session->get()->ReadTraceBlocking();
-  std::string trace(raw_trace.data(), raw_trace.size());
-
-  perfetto::protos::gen::Trace parsed_trace;
-  ASSERT_TRUE(parsed_trace.ParseFromArray(raw_trace.data(), raw_trace.size()));
 
   // Find typed argument.
-  bool found_args = false;
-  for (const auto& packet : parsed_trace.packet()) {
-    if (!packet.has_track_event())
-      continue;
-    const auto& track_event = packet.track_event();
-    if (track_event.type() !=
-        perfetto::protos::gen::TrackEvent::TYPE_SLICE_BEGIN) {
-      continue;
-    }
-
-    EXPECT_TRUE(track_event.has_log_message());
-    const auto& log = track_event.log_message();
-    EXPECT_EQ(1u, log.source_location_iid());
-    EXPECT_EQ(2u, log.body_iid());
-    found_args = true;
-  }
-  EXPECT_TRUE(found_args);
+  CheckLogMessagePresent(raw_trace);
 
   // Find untyped argument.
   EXPECT_THAT(ReadSlicesFromTrace(raw_trace),
               ElementsAre("B:foo.E(arg=(string)value)"));
 }
 
+TEST_P(PerfettoApiTest, TrackEventArgs_UntypedAndRefLambda) {
+  // Create a new trace session.
+  auto* tracing_session = NewTraceWithCategories({"foo"});
+  tracing_session->get()->StartBlocking();
+
+  TRACE_EVENT_BEGIN("foo", "E", "arg", "value", GetWriteLogMessageRefLambda());
+  TRACE_EVENT_END("foo");
+
+  tracing_session->get()->StopBlocking();
+
+  std::vector<char> raw_trace = tracing_session->get()->ReadTraceBlocking();
+
+  // Find typed argument.
+  CheckLogMessagePresent(raw_trace);
+
+  // Find untyped argument.
+  EXPECT_THAT(ReadSlicesFromTrace(raw_trace),
+              ElementsAre("B:foo.E(arg=(string)value)"));
+}
+
+TEST_P(PerfettoApiTest, TrackEventArgs_RefLambdaAndUntyped) {
+  // Create a new trace session.
+  auto* tracing_session = NewTraceWithCategories({"foo"});
+  tracing_session->get()->StartBlocking();
+
+  TRACE_EVENT_BEGIN("foo", "E", GetWriteLogMessageRefLambda(), "arg", "value");
+  TRACE_EVENT_END("foo");
+
+  tracing_session->get()->StopBlocking();
+
+  std::vector<char> raw_trace = tracing_session->get()->ReadTraceBlocking();
+
+  // Find typed argument.
+  CheckLogMessagePresent(raw_trace);
+
+  // Find untyped argument.
+  EXPECT_THAT(ReadSlicesFromTrace(raw_trace),
+              ElementsAre("B:foo.E(arg=(string)value)"));
+}
+
+TEST_P(PerfettoApiTest, TrackEventArgs_RefLambdaAndTyped) {
+  // Create a new trace session.
+  auto* tracing_session = NewTraceWithCategories({"foo"});
+  tracing_session->get()->StartBlocking();
+
+  TRACE_EVENT_BEGIN(
+      "foo", "E",
+      [](perfetto::EventContext& ctx) {
+        ctx.AddDebugAnnotation("arg", "value");
+      },
+      perfetto::protos::pbzero::TrackEvent::kLogMessage, LogMessage());
+  TRACE_EVENT_END("foo");
+
+  tracing_session->get()->StopBlocking();
+
+  std::vector<char> raw_trace = tracing_session->get()->ReadTraceBlocking();
+
+  // Find typed argument.
+  CheckLogMessagePresent(raw_trace);
+
+  // Find untyped argument.
+  EXPECT_THAT(ReadSlicesFromTrace(raw_trace),
+              ElementsAre("B:foo.E(arg=(string)value)"));
+}
+
+TEST_P(PerfettoApiTest, TrackEventArgs_TypedAndRefLambda) {
+  // Create a new trace session.
+  auto* tracing_session = NewTraceWithCategories({"foo"});
+  tracing_session->get()->StartBlocking();
+
+  TRACE_EVENT_BEGIN("foo", "E",
+                    perfetto::protos::pbzero::TrackEvent::kLogMessage,
+                    LogMessage(), [](perfetto::EventContext& ctx) {
+                      ctx.AddDebugAnnotation("arg", "value");
+                    });
+  TRACE_EVENT_END("foo");
+
+  tracing_session->get()->StopBlocking();
+
+  std::vector<char> raw_trace = tracing_session->get()->ReadTraceBlocking();
+
+  // Find typed argument.
+  CheckLogMessagePresent(raw_trace);
+
+  // Find untyped argument.
+  EXPECT_THAT(ReadSlicesFromTrace(raw_trace),
+              ElementsAre("B:foo.E(arg=(string)value)"));
+}
+
+TEST_P(PerfettoApiTest, TrackEventArgs_RefLambdaAndRefLambda) {
+  // Create a new trace session.
+  auto* tracing_session = NewTraceWithCategories({"foo"});
+  tracing_session->get()->StartBlocking();
+
+  TRACE_EVENT_BEGIN(
+      "foo", "E",
+      [](perfetto::EventContext& ctx) {
+        ctx.AddDebugAnnotation("arg1", "value1");
+      },
+      [](perfetto::EventContext& ctx) {
+        ctx.AddDebugAnnotation("arg2", "value2");
+      });
+  TRACE_EVENT_END("foo");
+
+  tracing_session->get()->StopBlocking();
+
+  std::vector<char> raw_trace = tracing_session->get()->ReadTraceBlocking();
+
+  // Find untyped arguments.
+  EXPECT_THAT(ReadSlicesFromTrace(raw_trace),
+              ElementsAre("B:foo.E(arg1=(string)value1,arg2=(string)value2)"));
+}
+
+TEST_P(PerfettoApiTest, TrackEventArgs_RefLambdaAndLambda) {
+  // Create a new trace session.
+  auto* tracing_session = NewTraceWithCategories({"foo"});
+  tracing_session->get()->StartBlocking();
+
+  TRACE_EVENT_BEGIN(
+      "foo", "E",
+      [](perfetto::EventContext& ctx) {
+        ctx.AddDebugAnnotation("arg1", "value1");
+      },
+      [](perfetto::EventContext ctx) {
+        ctx.AddDebugAnnotation("arg2", "value2");
+      });
+  TRACE_EVENT_END("foo");
+
+  tracing_session->get()->StopBlocking();
+
+  std::vector<char> raw_trace = tracing_session->get()->ReadTraceBlocking();
+
+  // Find untyped arguments.
+  EXPECT_THAT(ReadSlicesFromTrace(raw_trace),
+              ElementsAre("B:foo.E(arg1=(string)value1,arg2=(string)value2)"));
+}
+
+TEST_P(PerfettoApiTest, TrackEventArgs_RefLambda) {
+  // Create a new trace session.
+  auto* tracing_session = NewTraceWithCategories({"foo"});
+  tracing_session->get()->StartBlocking();
+
+  TRACE_EVENT_BEGIN("foo", "E", [](perfetto::EventContext& ctx) {
+    ctx.AddDebugAnnotation("arg", "value");
+  });
+  TRACE_EVENT_END("foo");
+
+  tracing_session->get()->StopBlocking();
+
+  std::vector<char> raw_trace = tracing_session->get()->ReadTraceBlocking();
+
+  // Find untyped argument.
+  EXPECT_THAT(ReadSlicesFromTrace(raw_trace),
+              ElementsAre("B:foo.E(arg=(string)value)"));
+}
+
+TEST_P(PerfettoApiTest, TrackEventArgs_Flow_Global) {
+  // Create a new trace session.
+  auto* tracing_session = NewTraceWithCategories({"foo"});
+  tracing_session->get()->StartBlocking();
+
+  TRACE_EVENT_INSTANT("foo", "E1", perfetto::Flow::Global(42));
+  TRACE_EVENT_INSTANT("foo", "E2", perfetto::TerminatingFlow::Global(42));
+
+  tracing_session->get()->StopBlocking();
+
+  std::vector<char> raw_trace = tracing_session->get()->ReadTraceBlocking();
+
+  // Find typed argument.
+  CheckTypedArguments(
+      raw_trace, "E1", perfetto::protos::gen::TrackEvent::TYPE_INSTANT,
+      [](const perfetto::protos::gen::TrackEvent& track_event) {
+        EXPECT_THAT(track_event.flow_ids(), testing::ElementsAre(42));
+      });
+}
+
+TEST_P(PerfettoApiTest, TrackEventArgs_MultipleFlows) {
+  // Create a new trace session.
+  auto* tracing_session = NewTraceWithCategories({"foo"});
+  tracing_session->get()->StartBlocking();
+
+  {
+    TRACE_EVENT("foo", "E1", perfetto::Flow::Global(1),
+                perfetto::Flow::Global(2), perfetto::Flow::Global(3));
+  }
+  {
+    TRACE_EVENT("foo", "E2", perfetto::Flow::Global(1),
+                perfetto::TerminatingFlow::Global(2));
+  }
+  { TRACE_EVENT("foo", "E3", perfetto::TerminatingFlow::Global(3)); }
+
+  tracing_session->get()->StopBlocking();
+
+  std::vector<char> raw_trace = tracing_session->get()->ReadTraceBlocking();
+  EXPECT_THAT(ReadSlicesFromTrace(raw_trace),
+              ElementsAre("B:foo.E1(flow_ids=1,2,3)", "E",
+                          "B:foo.E2(flow_ids=1)(terminating_flow_ids=2)", "E",
+                          "B:foo.E3(terminating_flow_ids=3)"));
+}
+
+TEST_P(PerfettoApiTest, TrackEventArgs_Flow_ProcessScoped) {
+  // Create a new trace session.
+  auto* tracing_session = NewTraceWithCategories({"foo"});
+  tracing_session->get()->StartBlocking();
+
+  TRACE_EVENT_INSTANT("foo", "E1", perfetto::Flow::ProcessScoped(1));
+  TRACE_EVENT_INSTANT("foo", "E2", perfetto::TerminatingFlow::ProcessScoped(1));
+  TRACE_EVENT_INSTANT("foo", "Flush");
+
+  tracing_session->get()->StopBlocking();
+
+  std::vector<char> raw_trace = tracing_session->get()->ReadTraceBlocking();
+
+  // Find typed arguments.
+  CheckTypedArguments(raw_trace, "E1",
+                      perfetto::protos::gen::TrackEvent::TYPE_INSTANT,
+                      [](const perfetto::protos::gen::TrackEvent& track_event) {
+                        EXPECT_EQ(track_event.flow_ids_size(), 1);
+                      });
+  CheckTypedArguments(raw_trace, "E2",
+                      perfetto::protos::gen::TrackEvent::TYPE_INSTANT,
+                      [](const perfetto::protos::gen::TrackEvent& track_event) {
+                        EXPECT_EQ(track_event.terminating_flow_ids_size(), 1);
+                      });
+}
+
+TEST_P(PerfettoApiTest, TrackEventArgs_Flow_FromPointer) {
+  // Create a new trace session.
+  auto* tracing_session = NewTraceWithCategories({"foo"});
+  tracing_session->get()->StartBlocking();
+
+  int a;
+  int* ptr = &a;
+  TRACE_EVENT_INSTANT("foo", "E1", perfetto::Flow::FromPointer(ptr));
+  TRACE_EVENT_INSTANT("foo", "E2", perfetto::TerminatingFlow::FromPointer(ptr));
+  TRACE_EVENT_INSTANT("foo", "Flush");
+
+  tracing_session->get()->StopBlocking();
+
+  std::vector<char> raw_trace = tracing_session->get()->ReadTraceBlocking();
+
+  // Find typed arguments.
+  CheckTypedArguments(raw_trace, "E1",
+                      perfetto::protos::gen::TrackEvent::TYPE_INSTANT,
+                      [](const perfetto::protos::gen::TrackEvent& track_event) {
+                        EXPECT_EQ(track_event.flow_ids_size(), 1);
+                      });
+  CheckTypedArguments(raw_trace, "E2",
+                      perfetto::protos::gen::TrackEvent::TYPE_INSTANT,
+                      [](const perfetto::protos::gen::TrackEvent& track_event) {
+                        EXPECT_EQ(track_event.terminating_flow_ids_size(), 1);
+                      });
+}
+
 struct InternedLogMessageBody
     : public perfetto::TrackEventInternedDataIndex<
           InternedLogMessageBody,
@@ -2380,6 +2680,9 @@
                     [&](perfetto::TracedValue context) {
                       std::move(context).WriteInt64(42);
                     });
+  TRACE_EVENT_BEGIN("test", "E", [&](perfetto::EventContext ctx) {
+    ctx.AddDebugAnnotation("debug_annotation", "value");
+  });
   perfetto::TrackEvent::Flush();
 
   tracing_session->get()->StopBlocking();
@@ -2394,7 +2697,8 @@
           "B:test.E(ptr_arg=(pointer)baadf00d)",
           "B:test.E(size_t_arg=(uint)42)", "B:test.E(ptrdiff_t_arg=(int)-7)",
           "B:test.E(enum_arg=(uint)1)", "B:test.E(signed_enum_arg=(int)-1)",
-          "B:test.E(class_enum_arg=(int)0)", "B:test.E(traced_value=(int)42)"));
+          "B:test.E(class_enum_arg=(int)0)", "B:test.E(traced_value=(int)42)",
+          "B:test.E(debug_annotation=(string)value)"));
 }
 
 TEST_P(PerfettoApiTest, TrackEventCustomDebugAnnotations) {
diff --git a/src/tracing/tracing.cc b/src/tracing/tracing.cc
index 019341e..090c5cb 100644
--- a/src/tracing/tracing.cc
+++ b/src/tracing/tracing.cc
@@ -45,7 +45,7 @@
   // Make sure the headers and implementation files agree on the build config.
   PERFETTO_CHECK(args.dcheck_is_on_ == PERFETTO_DCHECK_IS_ON());
   if (args.log_message_callback) {
-    SetLogMessageCallback(args.log_message_callback);
+    base::SetLogMessageCallback(args.log_message_callback);
   }
   internal::TracingMuxerImpl::InitializeInstance(args);
   internal::TrackRegistry::InitializeInstance();
@@ -58,6 +58,16 @@
   return g_was_initialized;
 }
 
+// static
+void Tracing::ResetForTesting() {
+  if (!g_was_initialized)
+    return;
+  base::SetLogMessageCallback(nullptr);
+  internal::TracingMuxerImpl::ResetForTesting();
+  internal::TrackRegistry::ResetForTesting();
+  g_was_initialized = false;
+}
+
 //  static
 std::unique_ptr<TracingSession> Tracing::NewTrace(BackendType backend) {
   return static_cast<internal::TracingMuxerImpl*>(internal::TracingMuxer::Get())
diff --git a/src/tracing/track.cc b/src/tracing/track.cc
index c1e7e01..6e69c25 100644
--- a/src/tracing/track.cc
+++ b/src/tracing/track.cc
@@ -175,6 +175,11 @@
   }
 }
 
+void TrackRegistry::ResetForTesting() {
+  delete instance_;
+  instance_ = nullptr;
+}
+
 void TrackRegistry::UpdateTrack(Track track,
                                 const std::string& serialized_desc) {
   std::lock_guard<std::mutex> lock(mutex_);
diff --git a/test/cts/AndroidTest.xml b/test/cts/AndroidTest.xml
index 800a6d4..2a9276c 100644
--- a/test/cts/AndroidTest.xml
+++ b/test/cts/AndroidTest.xml
@@ -26,6 +26,7 @@
         <option name="test-file-name" value="CtsPerfettoDebuggableApp.apk" />
         <option name="test-file-name" value="CtsPerfettoReleaseApp.apk" />
         <option name="test-file-name" value="CtsPerfettoProfileableApp.apk" />
+        <option name="test-file-name" value="CtsPerfettoNonProfileableApp.apk" />
     </target_preparer>
     <target_preparer class="com.android.compatibility.common.tradefed.targetprep.FilePusher">
         <option name="cleanup" value="true" />
diff --git a/test/cts/device_feature_test_cts.cc b/test/cts/device_feature_test_cts.cc
index 3ed5bc1..6ed16c3 100644
--- a/test/cts/device_feature_test_cts.cc
+++ b/test/cts/device_feature_test_cts.cc
@@ -21,9 +21,9 @@
 namespace perfetto {
 
 TEST(PerfettoDeviceFeatureTest, TestMaxCpusForAtraceChmod) {
-  // Check that there are no more than 16 CPUs so that the assumption in the
+  // Check that there are no more than 24 CPUs so that the assumption in the
   // atrace.rc for clearing CPU buffers is valid.
-  ASSERT_LE(sysconf(_SC_NPROCESSORS_CONF), 16);
+  ASSERT_LE(sysconf(_SC_NPROCESSORS_CONF), 24);
 }
 
 }  // namespace perfetto
diff --git a/test/cts/heapprofd_test_cts.cc b/test/cts/heapprofd_test_cts.cc
index 69aa344..a8f4641 100644
--- a/test/cts/heapprofd_test_cts.cc
+++ b/test/cts/heapprofd_test_cts.cc
@@ -234,5 +234,25 @@
   StopApp(app_name);
 }
 
+TEST(HeapprofdCtsTest, NonProfileableAppRuntime) {
+  std::string app_name = "android.perfetto.cts.app.nonprofileable";
+  const auto& packets = ProfileRuntime(app_name);
+  if (IsUserBuild())
+    AssertNoProfileContents(packets);
+  else
+    AssertExpectedAllocationsPresent(packets);
+  StopApp(app_name);
+}
+
+TEST(HeapprofdCtsTest, NonProfileableAppStartup) {
+  std::string app_name = "android.perfetto.cts.app.nonprofileable";
+  const auto& packets = ProfileStartup(app_name);
+  if (IsUserBuild())
+    AssertNoProfileContents(packets);
+  else
+    AssertExpectedAllocationsPresent(packets);
+  StopApp(app_name);
+}
+
 }  // namespace
 }  // namespace perfetto
diff --git a/test/cts/test_apps/Android.bp b/test/cts/test_apps/Android.bp
index 4263879..1172be5 100644
--- a/test/cts/test_apps/Android.bp
+++ b/test/cts/test_apps/Android.bp
@@ -80,3 +80,23 @@
     ],
     jni_uses_platform_apis: true,
 }
+
+android_test_helper_app {
+    name: "CtsPerfettoNonProfileableApp",
+    // tag this module as a cts test artifact
+    test_suites: [
+        "cts",
+        "vts10",
+        "general-tests",
+    ],
+
+    manifest: "AndroidManifest_nonprofileable.xml",
+
+    compile_multilib: "both",
+    srcs: ["src/**/*.java"],
+    sdk_version: "current",
+    jni_libs: [
+        "libperfettocts_native",
+    ],
+    jni_uses_platform_apis: true,
+}
diff --git a/test/cts/test_apps/AndroidManifest_nonprofileable.xml b/test/cts/test_apps/AndroidManifest_nonprofileable.xml
new file mode 100755
index 0000000..c4a716d
--- /dev/null
+++ b/test/cts/test_apps/AndroidManifest_nonprofileable.xml
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ -->
+
+<manifest xmlns:android="http://schemas.android.com/apk/res/android"
+    package="android.perfetto.cts.app.nonprofileable">
+
+    <application>
+        <profileable android:shell="true" android:enabled="false"/>
+        <activity
+          android:name="android.perfetto.cts.app.MainActivity"
+          android:exported="true">
+        </activity>
+        <activity-alias
+          android:name="android.perfetto.cts.app.nonprofileable.MainActivity"
+          android:targetActivity="android.perfetto.cts.app.MainActivity"
+          android:exported="true">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity-alias>
+        <activity
+          android:name="android.perfetto.cts.app.BusyWaitActivity"
+          android:exported="true">
+        </activity>
+        <activity-alias
+          android:name="android.perfetto.cts.app.nonprofileable.BusyWaitActivity"
+          android:targetActivity="android.perfetto.cts.app.BusyWaitActivity"
+          android:exported="true">
+            <intent-filter>
+                <action android:name="android.intent.action.MAIN" />
+                <category android:name="android.intent.category.LAUNCHER" />
+            </intent-filter>
+        </activity-alias>
+    </application>
+</manifest>
+
diff --git a/test/cts/traced_perf_test_cts.cc b/test/cts/traced_perf_test_cts.cc
index 7b867cd..831f8b3 100644
--- a/test/cts/traced_perf_test_cts.cc
+++ b/test/cts/traced_perf_test_cts.cc
@@ -184,6 +184,23 @@
   StopApp(app_name);
 }
 
+TEST(TracedPerfCtsTest, SystemWideNonProfileableApp) {
+  if (!HasPerfLsmHooks())
+    GTEST_SKIP() << "skipped due to lack of perf_event_open LSM hooks";
+
+  std::string app_name = "android.perfetto.cts.app.nonprofileable";
+  const auto& packets = ProfileSystemWide(app_name);
+  int app_pid = PidForProcessName(app_name);
+  ASSERT_GT(app_pid, 0) << "failed to find pid for target process";
+
+  if (!IsUserBuild())
+    AssertHasSampledStacksForPid(packets, app_pid);
+  else
+    AssertNoStacksForPid(packets, app_pid);
+  PERFETTO_CHECK(IsAppRunning(app_name));
+  StopApp(app_name);
+}
+
 TEST(TracedPerfCtsTest, SystemWideReleaseApp) {
   if (!HasPerfLsmHooks())
     GTEST_SKIP() << "skipped due to lack of perf_event_open LSM hooks";
diff --git a/test/trace_processor/profiling/callstack_sampling_flamegraph.out b/test/trace_processor/profiling/callstack_sampling_flamegraph.out
index f98a857..d16a96e 100644
--- a/test/trace_processor/profiling/callstack_sampling_flamegraph.out
+++ b/test/trace_processor/profiling/callstack_sampling_flamegraph.out
@@ -1,11 +1,11 @@
-"id","type","depth","name","map_name","count","cumulative_count","size","cumulative_size","alloc_count","cumulative_alloc_count","alloc_size","cumulative_alloc_size","parent_id","source_file","line_number"
-0,"experimental_flamegraph_nodes",0,"__start_thread","/apex/com.android.runtime/lib64/bionic/libc.so",0,560,0,560,0,0,0,0,"[NULL]","[NULL]","[NULL]"
-1,"experimental_flamegraph_nodes",1,"_ZL15__pthread_startPv","/apex/com.android.runtime/lib64/bionic/libc.so",0,560,0,560,0,0,0,0,0,"[NULL]","[NULL]"
-2,"experimental_flamegraph_nodes",2,"_ZN3art6Thread14CreateCallbackEPv","/apex/com.android.art/lib64/libart.so",0,301,0,301,0,0,0,0,1,"[NULL]","[NULL]"
-3,"experimental_flamegraph_nodes",3,"_ZN3art35InvokeVirtualOrInterfaceWithJValuesIPNS_9ArtMethodEEENS_6JValueERKNS_33ScopedObjectAccessAlreadyRunnableEP8_jobjectT_PK6jvalue","/apex/com.android.art/lib64/libart.so",0,301,0,301,0,0,0,0,2,"[NULL]","[NULL]"
-4,"experimental_flamegraph_nodes",4,"_ZN3art9ArtMethod6InvokeEPNS_6ThreadEPjjPNS_6JValueEPKc","/apex/com.android.art/lib64/libart.so",0,301,0,301,0,0,0,0,3,"[NULL]","[NULL]"
-5,"experimental_flamegraph_nodes",5,"art_quick_invoke_stub","/apex/com.android.art/lib64/libart.so",0,301,0,301,0,0,0,0,4,"[NULL]","[NULL]"
-6,"experimental_flamegraph_nodes",6,"android.os.HandlerThread.run","/system/framework/arm64/boot-framework.oat",0,43,0,43,0,0,0,0,5,"[NULL]","[NULL]"
-7,"experimental_flamegraph_nodes",7,"android.os.Looper.loop","/system/framework/arm64/boot-framework.oat",0,43,0,43,0,0,0,0,6,"[NULL]","[NULL]"
-8,"experimental_flamegraph_nodes",8,"android.os.Looper.loopOnce","/system/framework/arm64/boot-framework.oat",1,43,1,43,0,0,0,0,7,"[NULL]","[NULL]"
-9,"experimental_flamegraph_nodes",9,"android.os.Handler.dispatchMessage","/system/framework/arm64/boot-framework.oat",0,35,0,35,0,0,0,0,8,"[NULL]","[NULL]"
+"id","type","depth","name","map_name","count","cumulative_count","size","cumulative_size","alloc_count","cumulative_alloc_count","alloc_size","cumulative_alloc_size","parent_id","source_file","line_number","upid_group"
+0,"experimental_flamegraph_nodes",0,"__start_thread","/apex/com.android.runtime/lib64/bionic/libc.so",0,560,0,560,0,0,0,0,"[NULL]","[NULL]","[NULL]","[NULL]"
+1,"experimental_flamegraph_nodes",1,"_ZL15__pthread_startPv","/apex/com.android.runtime/lib64/bionic/libc.so",0,560,0,560,0,0,0,0,0,"[NULL]","[NULL]","[NULL]"
+2,"experimental_flamegraph_nodes",2,"_ZN3art6Thread14CreateCallbackEPv","/apex/com.android.art/lib64/libart.so",0,301,0,301,0,0,0,0,1,"[NULL]","[NULL]","[NULL]"
+3,"experimental_flamegraph_nodes",3,"_ZN3art35InvokeVirtualOrInterfaceWithJValuesIPNS_9ArtMethodEEENS_6JValueERKNS_33ScopedObjectAccessAlreadyRunnableEP8_jobjectT_PK6jvalue","/apex/com.android.art/lib64/libart.so",0,301,0,301,0,0,0,0,2,"[NULL]","[NULL]","[NULL]"
+4,"experimental_flamegraph_nodes",4,"_ZN3art9ArtMethod6InvokeEPNS_6ThreadEPjjPNS_6JValueEPKc","/apex/com.android.art/lib64/libart.so",0,301,0,301,0,0,0,0,3,"[NULL]","[NULL]","[NULL]"
+5,"experimental_flamegraph_nodes",5,"art_quick_invoke_stub","/apex/com.android.art/lib64/libart.so",0,301,0,301,0,0,0,0,4,"[NULL]","[NULL]","[NULL]"
+6,"experimental_flamegraph_nodes",6,"android.os.HandlerThread.run","/system/framework/arm64/boot-framework.oat",0,43,0,43,0,0,0,0,5,"[NULL]","[NULL]","[NULL]"
+7,"experimental_flamegraph_nodes",7,"android.os.Looper.loop","/system/framework/arm64/boot-framework.oat",0,43,0,43,0,0,0,0,6,"[NULL]","[NULL]","[NULL]"
+8,"experimental_flamegraph_nodes",8,"android.os.Looper.loopOnce","/system/framework/arm64/boot-framework.oat",1,43,1,43,0,0,0,0,7,"[NULL]","[NULL]","[NULL]"
+9,"experimental_flamegraph_nodes",9,"android.os.Handler.dispatchMessage","/system/framework/arm64/boot-framework.oat",0,35,0,35,0,0,0,0,8,"[NULL]","[NULL]","[NULL]"
diff --git a/test/trace_processor/profiling/callstack_sampling_flamegraph_multi_process.out b/test/trace_processor/profiling/callstack_sampling_flamegraph_multi_process.out
new file mode 100644
index 0000000..a0b68b3
--- /dev/null
+++ b/test/trace_processor/profiling/callstack_sampling_flamegraph_multi_process.out
@@ -0,0 +1,4 @@
+"count","description"
+658,"BothProcesses"
+483,"FirstProcess"
+175,"SecondProcess"
diff --git a/test/trace_processor/profiling/callstack_sampling_flamegraph_multi_process.sql b/test/trace_processor/profiling/callstack_sampling_flamegraph_multi_process.sql
new file mode 100644
index 0000000..510b9d3
--- /dev/null
+++ b/test/trace_processor/profiling/callstack_sampling_flamegraph_multi_process.sql
@@ -0,0 +1,17 @@
+select count(*) as count, 'BothProcesses' as description from experimental_flamegraph
+where upid_group = "30,41"
+  and profile_type = 'perf'
+  and ts <= 7689491063351
+  and size > 0
+union all
+select count(*) as count, 'FirstProcess' as description from experimental_flamegraph
+where upid = 30
+  and profile_type = 'perf'
+  and ts <= 7689491063351
+  and size > 0
+union all
+select count(*) as count, 'SecondProcess' as description from experimental_flamegraph
+where upid = 41
+  and profile_type = 'perf'
+  and ts <= 7689491063351
+  and size > 0;
diff --git a/test/trace_processor/profiling/heap_profile_flamegraph_system-server-native-profile.out b/test/trace_processor/profiling/heap_profile_flamegraph_system-server-native-profile.out
index b521691..511538a 100644
--- a/test/trace_processor/profiling/heap_profile_flamegraph_system-server-native-profile.out
+++ b/test/trace_processor/profiling/heap_profile_flamegraph_system-server-native-profile.out
@@ -1,11 +1,11 @@
-"id","type","depth","name","map_name","count","cumulative_count","size","cumulative_size","alloc_count","cumulative_alloc_count","alloc_size","cumulative_alloc_size","parent_id","source_file","line_number"
-0,"experimental_flamegraph_nodes",0,"__start_thread","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,"[NULL]","[NULL]","[NULL]"
-1,"experimental_flamegraph_nodes",1,"_ZL15__pthread_startPv","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,0,"[NULL]","[NULL]"
-2,"experimental_flamegraph_nodes",2,"_ZN7android14AndroidRuntime15javaThreadShellEPv","/system/lib64/libandroid_runtime.so",0,5,0,27704,0,77,0,348050,1,"[NULL]","[NULL]"
-3,"experimental_flamegraph_nodes",3,"_ZN7android6Thread11_threadLoopEPv","/system/lib64/libutils.so",0,5,0,27704,0,77,0,348050,2,"[NULL]","[NULL]"
-4,"experimental_flamegraph_nodes",4,"_ZN7android10PoolThread10threadLoopEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,3,"[NULL]","[NULL]"
-5,"experimental_flamegraph_nodes",5,"_ZN7android14IPCThreadState14joinThreadPoolEb","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,4,"[NULL]","[NULL]"
-6,"experimental_flamegraph_nodes",6,"_ZN7android14IPCThreadState20getAndExecuteCommandEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,5,"[NULL]","[NULL]"
-7,"experimental_flamegraph_nodes",7,"_ZN7android14IPCThreadState14executeCommandEi","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,6,"[NULL]","[NULL]"
-8,"experimental_flamegraph_nodes",8,"_ZN7android7BBinder8transactEjRKNS_6ParcelEPS1_j","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,7,"[NULL]","[NULL]"
-9,"experimental_flamegraph_nodes",9,"_ZN11JavaBBinder10onTransactEjRKN7android6ParcelEPS1_j","/system/lib64/libandroid_runtime.so",0,0,0,0,0,60,0,262730,8,"[NULL]","[NULL]"
+"id","type","depth","name","map_name","count","cumulative_count","size","cumulative_size","alloc_count","cumulative_alloc_count","alloc_size","cumulative_alloc_size","parent_id","source_file","line_number","upid_group"
+0,"experimental_flamegraph_nodes",0,"__start_thread","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,"[NULL]","[NULL]","[NULL]","[NULL]"
+1,"experimental_flamegraph_nodes",1,"_ZL15__pthread_startPv","/apex/com.android.runtime/lib64/bionic/libc.so",0,8,0,84848,0,210,0,1084996,0,"[NULL]","[NULL]","[NULL]"
+2,"experimental_flamegraph_nodes",2,"_ZN7android14AndroidRuntime15javaThreadShellEPv","/system/lib64/libandroid_runtime.so",0,5,0,27704,0,77,0,348050,1,"[NULL]","[NULL]","[NULL]"
+3,"experimental_flamegraph_nodes",3,"_ZN7android6Thread11_threadLoopEPv","/system/lib64/libutils.so",0,5,0,27704,0,77,0,348050,2,"[NULL]","[NULL]","[NULL]"
+4,"experimental_flamegraph_nodes",4,"_ZN7android10PoolThread10threadLoopEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,3,"[NULL]","[NULL]","[NULL]"
+5,"experimental_flamegraph_nodes",5,"_ZN7android14IPCThreadState14joinThreadPoolEb","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,4,"[NULL]","[NULL]","[NULL]"
+6,"experimental_flamegraph_nodes",6,"_ZN7android14IPCThreadState20getAndExecuteCommandEv","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,5,"[NULL]","[NULL]","[NULL]"
+7,"experimental_flamegraph_nodes",7,"_ZN7android14IPCThreadState14executeCommandEi","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,6,"[NULL]","[NULL]","[NULL]"
+8,"experimental_flamegraph_nodes",8,"_ZN7android7BBinder8transactEjRKNS_6ParcelEPS1_j","/system/lib64/libbinder.so",0,1,0,4096,0,64,0,279182,7,"[NULL]","[NULL]","[NULL]"
+9,"experimental_flamegraph_nodes",9,"_ZN11JavaBBinder10onTransactEjRKN7android6ParcelEPS1_j","/system/lib64/libandroid_runtime.so",0,0,0,0,0,60,0,262730,8,"[NULL]","[NULL]","[NULL]"
diff --git a/test/trace_processor/profiling/index b/test/trace_processor/profiling/index
index f594f4f..dad2297 100644
--- a/test/trace_processor/profiling/index
+++ b/test/trace_processor/profiling/index
@@ -53,3 +53,4 @@
 # trace_processor_shell.
 ../../data/heapprofd_standalone_client_example-trace stack_profile_symbols.sql stack_profile_symbols.out
 ../../data/callstack_sampling.pftrace callstack_sampling_flamegraph.sql callstack_sampling_flamegraph.out
+../../data/callstack_sampling.pftrace callstack_sampling_flamegraph_multi_process.sql callstack_sampling_flamegraph_multi_process.out
diff --git a/test/trace_processor/span_join/index b/test/trace_processor/span_join/index
index b0635e2..774b414 100644
--- a/test/trace_processor/span_join/index
+++ b/test/trace_processor/span_join/index
@@ -10,14 +10,25 @@
 ../../data/android_sched_and_ps.pb span_join_unpartitioned_empty.sql span_join_unpartitioned_empty.out
 
 # Outer join
-../../data/android_sched_and_ps.pb span_outer_join.sql span_outer_join.out
-../../data/android_sched_and_ps.pb span_outer_join_empty.sql span_outer_join_empty_android_sched_and_ps.out
+../common/synth_1.py span_outer_join.sql span_outer_join.out
+
+../common/synth_1.py span_outer_join_empty.sql span_outer_join_empty.out
+../common/synth_1.py span_outer_join_unpartitioned_empty.sql span_outer_join_unpartitioned_empty.out
+../common/synth_1.py span_outer_join_unpartitioned_left_empty.sql span_outer_join_unpartitioned_left_empty.out
+../common/synth_1.py span_outer_join_unpartitioned_right_empty.sql span_outer_join_unpartitioned_right_empty.out
+
+../common/synth_1.py span_outer_join_mixed.sql span_outer_join_mixed.out
+../common/synth_1.py span_outer_join_mixed_empty.sql span_outer_join_mixed_empty.out
+../common/synth_1.py span_outer_join_mixed_left_empty.sql span_outer_join_mixed_left_empty.out
+../common/synth_1.py span_outer_join_mixed_left_empty_rev.sql span_outer_join_mixed_left_empty_rev.out
+../common/synth_1.py span_outer_join_mixed_right_empty.sql span_outer_join_mixed_right_empty.out
+../common/synth_1.py span_outer_join_mixed_right_empty_rev.sql span_outer_join_mixed_right_empty_rev.out
+../common/synth_1.py span_outer_join_mixed.sql span_outer_join_mixed.out
 
 # Left join
-../../data/android_sched_and_ps.pb span_left_join.sql span_left_join.out
-../../data/android_sched_and_ps.pb span_left_join_unpartitioned.sql span_left_join_unpartitioned.out
-../../data/android_sched_and_ps.pb span_left_join_left_unpartitioned.sql span_left_join_left_unpartitioned.out
-../../data/android_sched_and_ps.pb span_left_join_left_partitioned.sql span_left_join_left_partitioned.out
-../../data/android_sched_and_ps.pb span_left_join_empty_right.sql span_left_join_empty_right.out
-../../data/android_sched_and_ps.pb span_left_join_unordered.sql span_left_join_unordered_android_sched_and_ps.out
-
+../common/synth_1.py span_left_join.sql span_left_join.out
+../common/synth_1.py span_left_join_unpartitioned.sql span_left_join_unpartitioned.out
+../common/synth_1.py span_left_join_left_unpartitioned.sql span_left_join_left_unpartitioned.out
+../common/synth_1.py span_left_join_left_partitioned.sql span_left_join_left_partitioned.out
+../common/synth_1.py span_left_join_empty_right.sql span_left_join_empty_right.out
+../common/synth_1.py span_left_join_unordered.sql span_left_join_unordered_android_sched_and_ps.out
diff --git a/test/trace_processor/span_join/span_outer_join_empty_android_sched_and_ps.out b/test/trace_processor/span_join/span_outer_join_empty.out
similarity index 100%
rename from test/trace_processor/span_join/span_outer_join_empty_android_sched_and_ps.out
rename to test/trace_processor/span_join/span_outer_join_empty.out
diff --git a/test/trace_processor/span_join/span_outer_join_mixed.out b/test/trace_processor/span_join/span_outer_join_mixed.out
new file mode 100644
index 0000000..f2e5496
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_mixed.out
@@ -0,0 +1,23 @@
+
+
+
+
+
+"ts","dur","part","a","b"
+50,50,1,"[NULL]",14
+100,50,1,10,14
+150,350,1,10,"[NULL]"
+500,50,1,11,"[NULL]"
+550,50,1,11,15
+600,50,1,"[NULL]",16
+900,500,1,"[NULL]",17
+50,100,2,"[NULL]",14
+500,50,2,12,"[NULL]"
+550,50,2,"[NULL]",15
+600,50,2,"[NULL]",16
+900,500,2,"[NULL]",17
+50,100,3,"[NULL]",14
+550,50,3,"[NULL]",15
+600,50,3,13,16
+650,50,3,13,"[NULL]"
+900,500,3,"[NULL]",17
diff --git a/test/trace_processor/span_join/span_outer_join_mixed.sql b/test/trace_processor/span_join/span_outer_join_mixed.sql
new file mode 100644
index 0000000..b05b18e
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_mixed.sql
@@ -0,0 +1,49 @@
+--
+-- Copyright 2021 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+create table t1(
+  ts BIG INT,
+  dur BIG INT,
+  part BIG INT,
+  a BIG INT,
+  PRIMARY KEY (part, ts)
+) without rowid;
+
+create table t2(
+  ts BIG INT,
+  dur BIG INT,
+  b BIG INT,
+  PRIMARY KEY (ts)
+) without rowid;
+
+-- Add some rows to t1.
+INSERT INTO t1(ts, dur, part, a)
+VALUES
+(100, 400, 1, 10),
+(500, 100, 1, 11),
+(500, 50, 2, 12),
+(600, 100, 3, 13);
+
+-- Add some rows to t2.
+INSERT INTO t2(ts, dur, b)
+VALUES
+(50, 100, 14),
+(550, 50, 15),
+(600, 50, 16),
+(900, 500, 17);
+
+create virtual table sp using span_outer_join(t1 PARTITIONED part, t2);
+
+select * from sp;
diff --git a/test/trace_processor/span_join/span_outer_join_mixed_empty.out b/test/trace_processor/span_join/span_outer_join_mixed_empty.out
new file mode 100644
index 0000000..29a8f1b
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_mixed_empty.out
@@ -0,0 +1,4 @@
+
+
+
+"ts","dur","part"
diff --git a/test/trace_processor/span_join/span_outer_join_mixed_empty.sql b/test/trace_processor/span_join/span_outer_join_mixed_empty.sql
new file mode 100644
index 0000000..f5dc5ba
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_mixed_empty.sql
@@ -0,0 +1,33 @@
+--
+-- Copyright 2021 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+create table t1(
+  ts BIG INT,
+  dur BIG INT,
+  part BIG INT,
+  PRIMARY KEY (part, ts)
+) without rowid;
+
+create table t2(
+  ts BIG INT,
+  dur BIG INT,
+  PRIMARY KEY (ts)
+) without rowid;
+
+-- t1 and t2 are empty.
+
+create virtual table sp using span_outer_join(t1 PARTITIONED part, t2);
+
+select * from sp;
diff --git a/test/trace_processor/span_join/span_outer_join_mixed_left_empty.out b/test/trace_processor/span_join/span_outer_join_mixed_left_empty.out
new file mode 100644
index 0000000..75e52f0
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_mixed_left_empty.out
@@ -0,0 +1,5 @@
+
+
+
+
+"ts","dur","part"
diff --git a/test/trace_processor/span_join/span_outer_join_mixed_left_empty.sql b/test/trace_processor/span_join/span_outer_join_mixed_left_empty.sql
new file mode 100644
index 0000000..2eeae27
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_mixed_left_empty.sql
@@ -0,0 +1,37 @@
+--
+-- Copyright 2021 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+create table t1(
+  ts BIG INT,
+  dur BIG INT,
+  part BIG INT,
+  PRIMARY KEY (part, ts)
+) without rowid;
+
+create table t2(
+  ts BIG INT,
+  dur BIG INT,
+  PRIMARY KEY (ts)
+) without rowid;
+
+INSERT INTO t2(ts, dur)
+VALUES
+(100, 400),
+(500, 50),
+(600, 100);
+
+create virtual table sp using span_outer_join(t1 PARTITIONED part, t2);
+
+select * from sp;
diff --git a/test/trace_processor/span_join/span_outer_join_mixed_left_empty_rev.out b/test/trace_processor/span_join/span_outer_join_mixed_left_empty_rev.out
new file mode 100644
index 0000000..f43c5e2
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_mixed_left_empty_rev.out
@@ -0,0 +1,8 @@
+
+
+
+
+"ts","dur","part"
+100,400,0
+100,50,1
+600,100,1
diff --git a/test/trace_processor/span_join/span_outer_join_mixed_left_empty_rev.sql b/test/trace_processor/span_join/span_outer_join_mixed_left_empty_rev.sql
new file mode 100644
index 0000000..5926fb3
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_mixed_left_empty_rev.sql
@@ -0,0 +1,37 @@
+--
+-- Copyright 2021 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+create table t1(
+  ts BIG INT,
+  dur BIG INT,
+  part BIG INT,
+  PRIMARY KEY (part, ts)
+) without rowid;
+
+create table t2(
+  ts BIG INT,
+  dur BIG INT,
+  PRIMARY KEY (ts)
+) without rowid;
+
+INSERT INTO t1(ts, dur, part)
+VALUES
+(100, 400, 0),
+(100, 50, 1),
+(600, 100, 1);
+
+create virtual table sp using span_outer_join(t2, t1 PARTITIONED part);
+
+select * from sp;
diff --git a/test/trace_processor/span_join/span_outer_join_mixed_right_empty.out b/test/trace_processor/span_join/span_outer_join_mixed_right_empty.out
new file mode 100644
index 0000000..31a5fbb
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_mixed_right_empty.out
@@ -0,0 +1,8 @@
+
+
+
+
+"ts","dur","part","b"
+100,400,0,"[NULL]"
+100,50,1,"[NULL]"
+600,100,1,"[NULL]"
diff --git a/test/trace_processor/span_join/span_outer_join_mixed_right_empty.sql b/test/trace_processor/span_join/span_outer_join_mixed_right_empty.sql
new file mode 100644
index 0000000..3001d28
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_mixed_right_empty.sql
@@ -0,0 +1,38 @@
+--
+-- Copyright 2021 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+create table t1(
+  ts BIG INT,
+  dur BIG INT,
+  part BIG INT,
+  PRIMARY KEY (part, ts)
+) without rowid;
+
+create table t2(
+  ts BIG INT,
+  dur BIG INT,
+  b BIG INT,
+  PRIMARY KEY (ts)
+) without rowid;
+
+INSERT INTO t1(ts, dur, part)
+VALUES
+(100, 400, 0),
+(100, 50, 1),
+(600, 100, 1);
+
+create virtual table sp using span_outer_join(t1 PARTITIONED part, t2);
+
+select * from sp;
diff --git a/test/trace_processor/span_join/span_outer_join_mixed_right_empty_rev.out b/test/trace_processor/span_join/span_outer_join_mixed_right_empty_rev.out
new file mode 100644
index 0000000..724e3b4
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_mixed_right_empty_rev.out
@@ -0,0 +1,5 @@
+
+
+
+
+"ts","dur","part","b"
diff --git a/test/trace_processor/span_join/span_outer_join_mixed_right_empty_rev.sql b/test/trace_processor/span_join/span_outer_join_mixed_right_empty_rev.sql
new file mode 100644
index 0000000..f325cc4
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_mixed_right_empty_rev.sql
@@ -0,0 +1,38 @@
+--
+-- Copyright 2021 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+create table t1(
+  ts BIG INT,
+  dur BIG INT,
+  part BIG INT,
+  PRIMARY KEY (part, ts)
+) without rowid;
+
+create table t2(
+  ts BIG INT,
+  dur BIG INT,
+  b BIG INT,
+  PRIMARY KEY (ts)
+) without rowid;
+
+INSERT INTO t2(ts, dur)
+VALUES
+(100, 400),
+(500, 50),
+(600, 100);
+
+create virtual table sp using span_outer_join(t2, t1 PARTITIONED part);
+
+select * from sp;
diff --git a/test/trace_processor/span_join/span_outer_join_unpartitioned.out b/test/trace_processor/span_join/span_outer_join_unpartitioned.out
new file mode 100644
index 0000000..d668103
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_unpartitioned.out
@@ -0,0 +1,14 @@
+
+
+
+
+
+"ts","dur","a","b"
+50,50,"[NULL]",4
+100,50,1,4
+150,350,1,"[NULL]"
+500,50,2,"[NULL]"
+550,50,"[NULL]",5
+600,50,3,6
+650,50,3,"[NULL]"
+900,500,"[NULL]",7
diff --git a/test/trace_processor/span_join/span_outer_join_unpartitioned.sql b/test/trace_processor/span_join/span_outer_join_unpartitioned.sql
new file mode 100644
index 0000000..ce4fa79
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_unpartitioned.sql
@@ -0,0 +1,47 @@
+--
+-- Copyright 2021 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+create table t1(
+  ts BIG INT,
+  dur BIG INT,
+  a BIG INT,
+  PRIMARY KEY (ts)
+) without rowid;
+
+create table t2(
+  ts BIG INT,
+  dur BIG INT,
+  b BIG INT,
+  PRIMARY KEY (ts)
+) without rowid;
+
+-- Add some rows to t1.
+INSERT INTO t1(ts, dur, a)
+VALUES
+(100, 400, 1),
+(500, 50, 2),
+(600, 100, 3);
+
+-- Add some rows to t2.
+INSERT INTO t2(ts, dur, b)
+VALUES
+(50, 100, 4),
+(550, 50, 5),
+(600, 50, 6),
+(900, 500, 7);
+
+create virtual table sp using span_outer_join(t1, t2);
+
+select * from sp;
diff --git a/test/trace_processor/span_join/span_outer_join_unpartitioned_empty.out b/test/trace_processor/span_join/span_outer_join_unpartitioned_empty.out
new file mode 100644
index 0000000..d02fdd7
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_unpartitioned_empty.out
@@ -0,0 +1,4 @@
+
+
+
+"ts","dur"
diff --git a/test/trace_processor/span_join/span_outer_join_unpartitioned_empty.sql b/test/trace_processor/span_join/span_outer_join_unpartitioned_empty.sql
new file mode 100644
index 0000000..8155974
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_unpartitioned_empty.sql
@@ -0,0 +1,32 @@
+--
+-- Copyright 2021 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+create table t1(
+  ts BIG INT,
+  dur BIG INT,
+  PRIMARY KEY (ts)
+) without rowid;
+
+create table t2(
+  ts BIG INT,
+  dur BIG INT,
+  PRIMARY KEY (ts)
+) without rowid;
+
+-- t1 and t2 are empty.
+
+create virtual table sp using span_outer_join(t1, t2);
+
+select * from sp;
diff --git a/test/trace_processor/span_join/span_outer_join_unpartitioned_left_empty.out b/test/trace_processor/span_join/span_outer_join_unpartitioned_left_empty.out
new file mode 100644
index 0000000..4ea997f
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_unpartitioned_left_empty.out
@@ -0,0 +1,8 @@
+
+
+
+
+"ts","dur"
+100,400
+500,50
+600,100
diff --git a/test/trace_processor/span_join/span_outer_join_unpartitioned_left_empty.sql b/test/trace_processor/span_join/span_outer_join_unpartitioned_left_empty.sql
new file mode 100644
index 0000000..ab41a28
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_unpartitioned_left_empty.sql
@@ -0,0 +1,37 @@
+--
+-- Copyright 2021 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+create table t1(
+  ts BIG INT,
+  dur BIG INT,
+  PRIMARY KEY (ts)
+) without rowid;
+
+create table t2(
+  ts BIG INT,
+  dur BIG INT,
+  PRIMARY KEY (ts)
+) without rowid;
+
+-- Add some rows to t2.
+INSERT INTO t2(ts, dur)
+VALUES
+(100, 400),
+(500, 50),
+(600, 100);
+
+create virtual table sp using span_outer_join(t1, t2);
+
+select * from sp;
diff --git a/test/trace_processor/span_join/span_outer_join_unpartitioned_right_empty.out b/test/trace_processor/span_join/span_outer_join_unpartitioned_right_empty.out
new file mode 100644
index 0000000..4ea997f
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_unpartitioned_right_empty.out
@@ -0,0 +1,8 @@
+
+
+
+
+"ts","dur"
+100,400
+500,50
+600,100
diff --git a/test/trace_processor/span_join/span_outer_join_unpartitioned_right_empty.sql b/test/trace_processor/span_join/span_outer_join_unpartitioned_right_empty.sql
new file mode 100644
index 0000000..1e3d959
--- /dev/null
+++ b/test/trace_processor/span_join/span_outer_join_unpartitioned_right_empty.sql
@@ -0,0 +1,37 @@
+--
+-- Copyright 2021 The Android Open Source Project
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     https://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+create table t1(
+  ts BIG INT,
+  dur BIG INT,
+  PRIMARY KEY (ts)
+) without rowid;
+
+create table t2(
+  ts BIG INT,
+  dur BIG INT,
+  PRIMARY KEY (ts)
+) without rowid;
+
+-- Add some rows to t1.
+INSERT INTO t1(ts, dur)
+VALUES
+(100, 400),
+(500, 50),
+(600, 100);
+
+create virtual table sp using span_outer_join(t1, t2);
+
+select * from sp;
diff --git a/tools/busy_threads/busy_threads.cc b/tools/busy_threads/busy_threads.cc
index 3a58b50..7d745ac 100644
--- a/tools/busy_threads/busy_threads.cc
+++ b/tools/busy_threads/busy_threads.cc
@@ -25,6 +25,7 @@
 #include "perfetto/ext/base/file_utils.h"
 #include "perfetto/ext/base/getopt.h"
 #include "perfetto/ext/base/scoped_file.h"
+#include "perfetto/ext/base/string_utils.h"
 
 #define PERFETTO_HAVE_PTHREADS                \
   (PERFETTO_BUILDFLAG(PERFETTO_OS_LINUX) ||   \
@@ -43,10 +44,9 @@
 
 void SetRandomThreadName(uint32_t thread_name_count) {
 #if PERFETTO_HAVE_PTHREADS
-  char name[16] = {};
-  snprintf(name, sizeof(name), "busy-%" PRIu32,
-           static_cast<uint32_t>(rand()) % thread_name_count);
-  pthread_setname_np(pthread_self(), name);
+  base::StackString<16> name("busy-%" PRIu32,
+                             static_cast<uint32_t>(rand()) % thread_name_count);
+  pthread_setname_np(pthread_self(), name.c_str());
 #endif
 }
 
diff --git a/tools/compact_reencode/main.cc b/tools/compact_reencode/main.cc
index 19d4c22..df662ac 100644
--- a/tools/compact_reencode/main.cc
+++ b/tools/compact_reencode/main.cc
@@ -79,11 +79,12 @@
   if (bundle.has_cpu())
     bundle_out->set_cpu(bundle.cpu());
 
-  protozero::PackedVarInt switch_timestamp;
-  protozero::PackedVarInt switch_prev_state;
-  protozero::PackedVarInt switch_next_pid;
-  protozero::PackedVarInt switch_next_prio;
-  protozero::PackedVarInt switch_next_comm_index;
+  using protozero::PackedVarInt;
+  std::unique_ptr<PackedVarInt> switch_timestamp(new PackedVarInt());
+  std::unique_ptr<PackedVarInt> switch_prev_state(new PackedVarInt());
+  std::unique_ptr<PackedVarInt> switch_next_pid(new PackedVarInt());
+  std::unique_ptr<PackedVarInt> switch_next_prio(new PackedVarInt());
+  std::unique_ptr<PackedVarInt> switch_next_comm_index(new PackedVarInt());
 
   uint64_t last_switch_timestamp = 0;
 
@@ -99,11 +100,11 @@
   };
 
   // sched_waking pieces
-  protozero::PackedVarInt waking_timestamp;
-  protozero::PackedVarInt waking_pid;
-  protozero::PackedVarInt waking_target_cpu;
-  protozero::PackedVarInt waking_prio;
-  protozero::PackedVarInt waking_comm_index;
+  std::unique_ptr<PackedVarInt> waking_timestamp(new PackedVarInt());
+  std::unique_ptr<PackedVarInt> waking_pid(new PackedVarInt());
+  std::unique_ptr<PackedVarInt> waking_target_cpu(new PackedVarInt());
+  std::unique_ptr<PackedVarInt> waking_prio(new PackedVarInt());
+  std::unique_ptr<PackedVarInt> waking_comm_index(new PackedVarInt());
 
   uint64_t last_waking_timestamp = 0;
 
@@ -112,31 +113,31 @@
     if (!event.has_sched_switch() && !event.has_sched_waking()) {
       CopyField(bundle_out, event_it.field());
     } else if (event.has_sched_switch()) {
-      switch_timestamp.Append(event.timestamp() - last_switch_timestamp);
+      switch_timestamp->Append(event.timestamp() - last_switch_timestamp);
       last_switch_timestamp = event.timestamp();
 
       protos::pbzero::SchedSwitchFtraceEvent::Decoder sswitch(
           event.sched_switch());
 
       auto iid = intern(sswitch.next_comm().ToStdString());
-      switch_next_comm_index.Append(iid);
+      switch_next_comm_index->Append(iid);
 
-      switch_next_pid.Append(sswitch.next_pid());
-      switch_next_prio.Append(sswitch.next_prio());
-      switch_prev_state.Append(sswitch.prev_state());
+      switch_next_pid->Append(sswitch.next_pid());
+      switch_next_prio->Append(sswitch.next_prio());
+      switch_prev_state->Append(sswitch.prev_state());
     } else {
-      waking_timestamp.Append(event.timestamp() - last_waking_timestamp);
+      waking_timestamp->Append(event.timestamp() - last_waking_timestamp);
       last_waking_timestamp = event.timestamp();
 
       protos::pbzero::SchedWakingFtraceEvent::Decoder swaking(
           event.sched_waking());
 
       auto iid = intern(swaking.comm().ToStdString());
-      waking_comm_index.Append(iid);
+      waking_comm_index->Append(iid);
 
-      waking_pid.Append(swaking.pid());
-      waking_target_cpu.Append(swaking.target_cpu());
-      waking_prio.Append(swaking.prio());
+      waking_pid->Append(swaking.pid());
+      waking_target_cpu->Append(swaking.target_cpu());
+      waking_prio->Append(swaking.prio());
     }
   }
 
@@ -145,17 +146,17 @@
   for (const auto& s : string_table)
     compact_sched->add_intern_table(s.data(), s.size());
 
-  compact_sched->set_switch_timestamp(switch_timestamp);
-  compact_sched->set_switch_next_comm_index(switch_next_comm_index);
-  compact_sched->set_switch_next_pid(switch_next_pid);
-  compact_sched->set_switch_next_prio(switch_next_prio);
-  compact_sched->set_switch_prev_state(switch_prev_state);
+  compact_sched->set_switch_timestamp(*switch_timestamp);
+  compact_sched->set_switch_next_comm_index(*switch_next_comm_index);
+  compact_sched->set_switch_next_pid(*switch_next_pid);
+  compact_sched->set_switch_next_prio(*switch_next_prio);
+  compact_sched->set_switch_prev_state(*switch_prev_state);
 
-  compact_sched->set_waking_timestamp(waking_timestamp);
-  compact_sched->set_waking_pid(waking_pid);
-  compact_sched->set_waking_target_cpu(waking_target_cpu);
-  compact_sched->set_waking_prio(waking_prio);
-  compact_sched->set_waking_comm_index(waking_comm_index);
+  compact_sched->set_waking_timestamp(*waking_timestamp);
+  compact_sched->set_waking_pid(*waking_pid);
+  compact_sched->set_waking_target_cpu(*waking_target_cpu);
+  compact_sched->set_waking_prio(*waking_prio);
+  compact_sched->set_waking_comm_index(*waking_comm_index);
 }
 
 std::string ReEncode(const std::string& raw) {
diff --git a/tools/gen_bazel b/tools/gen_bazel
index fa77498..3a1b8ae 100755
--- a/tools/gen_bazel
+++ b/tools/gen_bazel
@@ -40,7 +40,6 @@
     'host_os="linux"',
     'is_debug=false',
     'is_perfetto_build_generator=true',
-    'enable_perfetto_watchdog=true',
     'monolithic_binaries=true',
     'target_os="linux"',
     'enable_perfetto_heapprofd=false',
diff --git a/tools/heap_profile b/tools/heap_profile
index 387d767..588f151 100755
--- a/tools/heap_profile
+++ b/tools/heap_profile
@@ -576,7 +576,7 @@
 
 
 # BEGIN_SECTION_GENERATED_BY(roll-prebuilts)
-# Revision: 387c10f55b96e95f96ec9248c3af28772bccfff0
+# Revision: v20.1
 PERFETTO_PREBUILT_MANIFEST = [{
     'tool':
         'trace_to_text',
@@ -585,11 +585,11 @@
     'file_name':
         'trace_to_text',
     'file_size':
-        7087080,
+        7136208,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/mac-amd64/trace_to_text',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/mac-amd64/trace_to_text',
     'sha256':
-        '19126f67c1edd3e525c63d3cc5085ea965d77868b91d163052f8f608cf496cd8',
+        '2a4cdaf2884639a5c80ba99f8da82e2684151d319524eb9f40498418c1df2b4e',
     'platform':
         'darwin',
     'machine': ['x86_64']
@@ -601,11 +601,11 @@
     'file_name':
         'trace_to_text.exe',
     'file_size':
-        6703616,
+        6580224,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/windows-amd64/trace_to_text.exe',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/windows-amd64/trace_to_text.exe',
     'sha256':
-        'fc776c807682c1de0785468190fe9795fda97f2a9c32dad15affe24b81dad817',
+        '6bda73d8e887a45716fd9a6fd80033d9ea02253f59d9c7693765134a43076aa4',
     'platform':
         'win32',
     'machine': ['amd64']
@@ -617,11 +617,11 @@
     'file_name':
         'trace_to_text',
     'file_size':
-        7437720,
+        7652480,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/linux-amd64/trace_to_text',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/linux-amd64/trace_to_text',
     'sha256':
-        '8112656e058a8ebbd6f7251452527c51c0a3e88bc160a33d95e3da61228d1285',
+        '9b59a562e9289fa683dfff296183e35bc95fe3eba7b382e07e32d0b9c6c19eca',
     'platform':
         'linux',
     'machine': ['x86_64']
diff --git a/tools/install-build-deps b/tools/install-build-deps
index e1e90d8..44cf0b9 100755
--- a/tools/install-build-deps
+++ b/tools/install-build-deps
@@ -126,8 +126,8 @@
         'linux', 'x64'),
     Dependency(
         'buildtools/win/clang.tgz',
-        'https://commondatastorage.googleapis.com/chromium-browser-clang/Linux_x64/clang-llvmorg-14-init-3191-g0e03450a-1.tgz',
-        'dd7479d43ce61401e057a5dee8b7e32bc2bd0d0e15d4f46c6858daf9170c9978',
+        'https://commondatastorage.googleapis.com/chromium-browser-clang/Win/clang-llvmorg-14-init-3191-g0e03450a-1.tgz',
+        '4292d191742e7120200c63224f02e1f2f2a7be8b57c0f18edf6ca0955bdd43df',
         'windows', 'x64'),
 ]
 
diff --git a/tools/package-prebuilts-for-github-release b/tools/package-prebuilts-for-github-release
new file mode 100755
index 0000000..d7a9414
--- /dev/null
+++ b/tools/package-prebuilts-for-github-release
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+# Copyright (C) 2021 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Pulls LUCI-generated binaries and generates .zip files for GitHub releases.
+
+Usage: ./tools/package_prebuilts_for_github_release v20.0
+
+This will generate one .zip file for every os-arch combo (e.g. android-arm.zip)
+into /tmp/perfetto-prebuilts-v20.0/ .
+"""
+
+import argparse
+import subprocess
+import os
+import sys
+
+
+def exec(*args):
+  print(' '.join(args))
+  subprocess.check_call(args)
+
+
+def main():
+  parser = argparse.ArgumentParser(epilog='Example %s v19.0' % __file__)
+  parser.add_argument('version')
+
+  args = parser.parse_args()
+  tmpdir = '/tmp/perfetto-prebuilts-' + args.version
+  src = 'gs://perfetto-luci-artifacts/%s/' % args.version
+  os.makedirs(tmpdir, exist_ok=True)
+  os.chdir(tmpdir)
+  exec('gsutil', '-m', 'rsync', '-rc', src, tmpdir + '/')
+  zips = []
+  for arch in os.listdir(tmpdir):
+    if not os.path.isdir(arch):
+      continue
+    exec('zip', '-9r', '%s.zip' % arch, arch)
+    zips.append(arch + '.zip')
+  print('')
+  print('%d zip files saved in %s (%s)' % (len(zips), tmpdir, ','.join(zips)))
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/tools/record_android_trace b/tools/record_android_trace
index 4dcb96d..78272fc 100755
--- a/tools/record_android_trace
+++ b/tools/record_android_trace
@@ -19,6 +19,7 @@
 import hashlib
 import http.server
 import os
+import re
 import shutil
 import socketserver
 import subprocess
@@ -245,7 +246,18 @@
   with open(args.config or os.devnull, 'rb') as f:
     print('Running ' + ' '.join(cmd))
     proc = adb('shell', *cmd, stdin=f, stdout=subprocess.PIPE)
-    bg_pid = proc.communicate()[0].decode().strip()
+    proc_out = proc.communicate()[0].decode().strip()
+    # On older versions of Android (x86_64 emulator running API 22) the output
+    # looks like:
+    #   WARNING: linker: /data/local/tmp/tracebox: unused DT entry: ...
+    #   WARNING: ... (other 2 WARNING: linker: lines)
+    #   1234  <-- The actual pid we want.
+    match = re.search(r'^(\d+)$', proc_out, re.M)
+    if match is None:
+      prt('Failed to read the pid from perfetto --background', ANSI.RED)
+      prt(proc_out)
+      sys.exit(1)
+    bg_pid = match.group(1)
     exit_code = proc.wait()
 
   if exit_code != 0:
@@ -260,11 +272,16 @@
   adb_failure_count = 0
   while ctrl_c_count < 2:
     try:
-      poll = adb('shell', 'test -d /proc/%s || exit 42' % bg_pid)
-      poll_res = poll.wait()
-      if poll_res == 42:
+      # On older Android devices adbd doesn't propagate the exit code. Hence
+      # the RUN/TERM parts.
+      poll = adb(
+          'shell',
+          'test -d /proc/%s && echo RUN || echo TERM' % bg_pid,
+          stdout=subprocess.PIPE)
+      poll_res = poll.communicate()[0].decode().strip()
+      if poll_res == 'TERM':
         break  # Process terminated
-      if poll_res == 0:
+      if poll_res == 'RUN':
         # The 'perfetto' cmdline client is still running. If previously we had
         # an ADB error, tell the user now it's all right again.
         if adb_failure_count > 0:
@@ -363,7 +380,7 @@
 
 
 # BEGIN_SECTION_GENERATED_BY(roll-prebuilts)
-# Revision: 387c10f55b96e95f96ec9248c3af28772bccfff0
+# Revision: v20.1
 PERFETTO_PREBUILT_MANIFEST = [{
     'tool':
         'tracebox',
@@ -374,9 +391,9 @@
     'file_size':
         1021824,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/android-arm/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/android-arm/tracebox',
     'sha256':
-        'de6362a183adf3d70ece245c083fd50b170061ffbcd878765052eea774198c4e'
+        'e6fdfcb3660073a04cc12c91cc69c924784aaf7db34fdb642bd69ad34cea2073'
 }, {
     'tool':
         'tracebox',
@@ -387,9 +404,9 @@
     'file_size':
         1559112,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/android-arm64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/android-arm64/tracebox',
     'sha256':
-        '90ee3858410ad3e57cd946d3734cba69c22f423c0d4d8d35c61c14c317b5293a'
+        'c825b09de85e24e0ac24b574fce6e4252867133b44ee78358c986da27b1f3258'
 }, {
     'tool':
         'tracebox',
@@ -400,9 +417,9 @@
     'file_size':
         1587028,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/android-x86/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/android-x86/tracebox',
     'sha256':
-        'c95b3d794e05462bc89d133ba7aca8a33032bab95d70ac43b24dbefe2db8c996'
+        '46356225173099dbd702b2553d59a6567c1843b4e0e7aabc34f05464113c2280'
 }, {
     'tool':
         'tracebox',
@@ -413,9 +430,9 @@
     'file_size':
         1829448,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/android-x64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/android-x64/tracebox',
     'sha256':
-        'd1da3f3c95bd8685b5bd75af19137c8d952fc88bb691dfc2a7e00706f3837074'
+        '38ef019e53aaf5a10ca421e1ad91937ccb756cba859d9697e024eefe12d3dbc5'
 }]
 
 
diff --git a/tools/roll-prebuilts b/tools/roll-prebuilts
index abb1a29..7bebe02 100755
--- a/tools/roll-prebuilts
+++ b/tools/roll-prebuilts
@@ -162,18 +162,15 @@
 
 
 def main():
-  parser = argparse.ArgumentParser()
-  parser.add_argument('-r', '--revision')
+  usage = '%s v20.0 | 0a1b2c3d\n\n' % __file__
+  usage += 'To list available revisions run\n'
+  usage += 'gsutil ls gs://perfetto-luci-artifacts/\n'
+  usage += 'or visit https://chrome-infra-packages.appspot.com/p/perfetto\n'
+  parser = argparse.ArgumentParser(usage=usage)
+  parser.add_argument('version')
   args = parser.parse_args()
 
-  if args.revision is None:
-    print('Error: must pass --revision=v1.2 or --revision=0a1b2c3d\n')
-    print('To list available revisions run')
-    print('gsutil ls gs://perfetto-luci-artifacts/')
-    print('or visit https://chrome-infra-packages.appspot.com/p/perfetto')
-    return 1
-
-  git_revision = args.revision
+  git_revision = args.version
   for spec in SCRIPTS_TO_UPDATE:
     logging.info('Updating %s', spec['script'])
     update_script(git_revision, spec['tool'], spec['script'], spec['archs'])
diff --git a/tools/trace_processor b/tools/trace_processor
index 8e17af5..6e9a85b 100755
--- a/tools/trace_processor
+++ b/tools/trace_processor
@@ -27,7 +27,7 @@
 TOOL_NAME = 'trace_processor_shell'
 
 # BEGIN_SECTION_GENERATED_BY(roll-prebuilts)
-# Revision: 387c10f55b96e95f96ec9248c3af28772bccfff0
+# Revision: v20.1
 PERFETTO_PREBUILT_MANIFEST = [{
     'tool':
         'trace_processor_shell',
@@ -36,11 +36,11 @@
     'file_name':
         'trace_processor_shell',
     'file_size':
-        7038736,
+        7087856,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/mac-amd64/trace_processor_shell',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/mac-amd64/trace_processor_shell',
     'sha256':
-        '2d5d054d64af5c2b0f5649bbe653bb5d71a2b8465fc69d00d52abb818a093f83',
+        '01ede10c037ce128b2d4ad2736369bb7f1b7ba98b1ab9991ad5c6bfec29cbabf',
     'platform':
         'darwin',
     'machine': ['x86_64']
@@ -52,11 +52,11 @@
     'file_name':
         'trace_processor_shell.exe',
     'file_size':
-        6671872,
+        6556160,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/windows-amd64/trace_processor_shell.exe',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/windows-amd64/trace_processor_shell.exe',
     'sha256':
-        'f1438f5731b97770e4f80a6fbeb04841ed1ad6738dc15defeb69395e889581c6',
+        '126a92f1ec8b15953ca570ca5ad66b03c67a91796a84e702361ee84a0fab6c26',
     'platform':
         'win32',
     'machine': ['amd64']
@@ -68,11 +68,11 @@
     'file_name':
         'trace_processor_shell',
     'file_size':
-        7386768,
+        7610536,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/linux-amd64/trace_processor_shell',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/linux-amd64/trace_processor_shell',
     'sha256':
-        '643e9b4bbea808434b4630522241f9306d17f4669b8ccb3cf9a52d61b3871f5d',
+        '052ae0ec898e9977126418beb29c27405b96a019c1b182ab230ecf3eafdb5900',
     'platform':
         'linux',
     'machine': ['x86_64']
@@ -84,11 +84,11 @@
     'file_name':
         'trace_processor_shell',
     'file_size':
-        4640056,
+        4857436,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/linux-arm/trace_processor_shell',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/linux-arm/trace_processor_shell',
     'sha256':
-        '6118b0b863a9c9f04ea368fbec6aa6f482ccabb5abd7e8b94b31483bdb0f9d56',
+        'a0543d49aaa8fd9da145ed3b82a48905b73a09b96df30b009dc9d62e405269f9',
     'platform':
         'linux',
     'machine': ['armv6l', 'armv7l', 'armv8l']
@@ -100,11 +100,11 @@
     'file_name':
         'trace_processor_shell',
     'file_size':
-        6171712,
+        6649456,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/linux-arm64/trace_processor_shell',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/linux-arm64/trace_processor_shell',
     'sha256':
-        'c1ed98c6104fb57b6452b1a01c3b8d8b9f25593eb85590f58de0efc8cf3c423d',
+        'be8270ac5c02ee82c6bf6474b551521e2e9f9939ef67a93db72cd34b1bb105b8',
     'platform':
         'linux',
     'machine': ['aarch64']
diff --git a/tools/tracebox b/tools/tracebox
index 158ff73..919ce72 100755
--- a/tools/tracebox
+++ b/tools/tracebox
@@ -27,7 +27,7 @@
 TOOL_NAME = 'tracebox'
 
 # BEGIN_SECTION_GENERATED_BY(roll-prebuilts)
-# Revision: 387c10f55b96e95f96ec9248c3af28772bccfff0
+# Revision: v20.1
 PERFETTO_PREBUILT_MANIFEST = [{
     'tool':
         'tracebox',
@@ -36,11 +36,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        1315792,
+        1315800,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/mac-amd64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/mac-amd64/tracebox',
     'sha256':
-        '8460e9e87b41ee5011df50d1a63edd02b4f58003f1ffdd2c0e9f8dc283491e2b',
+        '111aef8fcc958dfe51e55a6ab5ac4eeb0416f0b0af1a3d6048cbc648916b7937',
     'platform':
         'darwin',
     'machine': ['x86_64']
@@ -52,11 +52,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        1750688,
+        1729912,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/linux-amd64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/linux-amd64/tracebox',
     'sha256':
-        '05fa0a739ce40d38cf8b65c3f43f8f509991394c69f0f17e68c3f46076904716',
+        '4d918920efb27f6434046dfa51ef82a2432109659cec746192f749d0d273a35e',
     'platform':
         'linux',
     'machine': ['x86_64']
@@ -68,11 +68,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        1750688,
+        1729912,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/linux-amd64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/linux-amd64/tracebox',
     'sha256':
-        '05fa0a739ce40d38cf8b65c3f43f8f509991394c69f0f17e68c3f46076904716',
+        '4d918920efb27f6434046dfa51ef82a2432109659cec746192f749d0d273a35e',
     'platform':
         'linux',
     'machine': ['x86_64']
@@ -84,11 +84,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        990096,
+        988288,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/linux-arm/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/linux-arm/tracebox',
     'sha256':
-        '4777d8034bf9cd4f2ea8ce28d62aef7facfd4cd7e22e1de415d178d287a437e5',
+        '5d8b336ca5ef731deaf328c01c89ac3d863bcac8a3bf8549d2730a85bee5242e',
     'platform':
         'linux',
     'machine': ['armv6l', 'armv7l', 'armv8l']
@@ -100,11 +100,11 @@
     'file_name':
         'tracebox',
     'file_size':
-        1493456,
+        1589656,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/linux-arm64/tracebox',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/linux-arm64/tracebox',
     'sha256':
-        '799d657730e1edfc647a5ac344c3840beaf192e432faaff395ba55cacabdb01e',
+        '4c09b4b648cdf901b2b7d1454199e96aaeb8c41ddf6a713920f946b49448bf3c',
     'platform':
         'linux',
     'machine': ['aarch64']
diff --git a/tools/traceconv b/tools/traceconv
index ab2fd98..610eb3c 100755
--- a/tools/traceconv
+++ b/tools/traceconv
@@ -27,7 +27,7 @@
 TOOL_NAME = 'trace_to_text'
 
 # BEGIN_SECTION_GENERATED_BY(roll-prebuilts)
-# Revision: 387c10f55b96e95f96ec9248c3af28772bccfff0
+# Revision: v20.1
 PERFETTO_PREBUILT_MANIFEST = [{
     'tool':
         'trace_to_text',
@@ -36,11 +36,11 @@
     'file_name':
         'trace_to_text',
     'file_size':
-        7087080,
+        7136208,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/mac-amd64/trace_to_text',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/mac-amd64/trace_to_text',
     'sha256':
-        '19126f67c1edd3e525c63d3cc5085ea965d77868b91d163052f8f608cf496cd8',
+        '2a4cdaf2884639a5c80ba99f8da82e2684151d319524eb9f40498418c1df2b4e',
     'platform':
         'darwin',
     'machine': ['x86_64']
@@ -52,11 +52,11 @@
     'file_name':
         'trace_to_text',
     'file_size':
-        7437720,
+        7652480,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/linux-amd64/trace_to_text',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/linux-amd64/trace_to_text',
     'sha256':
-        '8112656e058a8ebbd6f7251452527c51c0a3e88bc160a33d95e3da61228d1285',
+        '9b59a562e9289fa683dfff296183e35bc95fe3eba7b382e07e32d0b9c6c19eca',
     'platform':
         'linux',
     'machine': ['x86_64']
@@ -68,11 +68,11 @@
     'file_name':
         'trace_to_text.exe',
     'file_size':
-        6703616,
+        6580224,
     'url':
-        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/387c10f55b96e95f96ec9248c3af28772bccfff0/windows-amd64/trace_to_text.exe',
+        'https://commondatastorage.googleapis.com/perfetto-luci-artifacts/v20.1/windows-amd64/trace_to_text.exe',
     'sha256':
-        'fc776c807682c1de0785468190fe9795fda97f2a9c32dad15affe24b81dad817',
+        '6bda73d8e887a45716fd9a6fd80033d9ea02253f59d9c7693765134a43076aa4',
     'platform':
         'win32',
     'machine': ['amd64']
diff --git a/tools/write_version_header.py b/tools/write_version_header.py
index e8ddcb9..7dc8783 100755
--- a/tools/write_version_header.py
+++ b/tools/write_version_header.py
@@ -81,6 +81,7 @@
 
 def main():
   parser = argparse.ArgumentParser()
+  parser.add_argument('--check_git', action='store_true')
   parser.add_argument(
       '--no_git',
       action='store_true',
@@ -91,6 +92,11 @@
   parser.add_argument('--changelog', help='Path to CHANGELOG.')
   args = parser.parse_args()
 
+  if args.check_git:
+    has_git = os.path.exists(os.path.join(PROJECT_ROOT, '.git', 'HEAD'))
+    print('1' if has_git else '0')
+    return 0
+
   release = get_latest_release(args.changelog)
 
   if args.no_git:
diff --git a/ui/release/channels.json b/ui/release/channels.json
index 0ba34d0..ade4052 100644
--- a/ui/release/channels.json
+++ b/ui/release/channels.json
@@ -2,11 +2,11 @@
   "channels": [
     {
       "name": "stable",
-      "rev": "d996f9e27fef14001401a4ae3c82dc24e71517ee"
+      "rev": "e923f820f4b479d3def9f68088ef710adc0464dd"
     },
     {
       "name": "canary",
-      "rev": "bbb025d604073a713c6c345828aca916b5fa119c"
+      "rev": "909884449a9da4fa70a86577f0d869b81214c04b"
     },
     {
       "name": "autopush",
diff --git a/ui/src/assets/common.scss b/ui/src/assets/common.scss
index e03adcf..3e36ee2 100644
--- a/ui/src/assets/common.scss
+++ b/ui/src/assets/common.scss
@@ -209,6 +209,10 @@
   width: 100%;
 }
 
+.x-scrollable {
+  overflow-x: auto;
+}
+
 .query-table {
     width: 100%;
     font-size: 14px;
diff --git a/ui/src/assets/record.scss b/ui/src/assets/record.scss
index f732e25..39e0c42 100644
--- a/ui/src/assets/record.scss
+++ b/ui/src/assets/record.scss
@@ -311,11 +311,16 @@
     padding: 7px;
     width: 75px;
 
-    &:hover {
+    &:hover:enabled {
       box-shadow: 0 0 4px 0px #999;
     }
 
-    &.load {
+    &:not(:enabled) {
+      background-color: hsl(0, 0%, 83%);
+      color: hsl(0, 0%, 50%);
+    }
+
+    &.load:enabled {
       background-color: hsl(88, 50%, 67%);
     }
 
@@ -325,8 +330,20 @@
 
     &.save {
       width: 160px;
-      background-color: hsl(197, 50%, 67%);
+
+      &:enabled {
+        background-color: hsl(197, 50%, 67%);
+      }
     }
+
+    &.reset {
+      width: 300px;
+      background-color: hsl(0, 50%, 67%);
+    }
+  }
+
+  .reset-wrapper {
+    padding: 1em;
   }
 
   .input-config {
diff --git a/ui/src/common/actions.ts b/ui/src/common/actions.ts
index 9a14db8..41bc4f3 100644
--- a/ui/src/common/actions.ts
+++ b/ui/src/common/actions.ts
@@ -46,6 +46,7 @@
   createEmptyState,
   EngineMode,
   FlamegraphStateViewingOption,
+  LoadedConfig,
   LogsPagination,
   NewEngineMode,
   OmniboxState,
@@ -458,8 +459,11 @@
     }
   },
 
-  setRecordConfig(state: StateDraft, args: {config: RecordConfig;}): void {
+  setRecordConfig(
+      state: StateDraft,
+      args: {config: RecordConfig, configType?: LoadedConfig}): void {
     state.recordConfig = args.config;
+    state.lastLoadedConfig = args.configType || {type: 'NONE'};
   },
 
   selectNote(state: StateDraft, args: {id: string}): void {
diff --git a/ui/src/common/engine.ts b/ui/src/common/engine.ts
index 065df54..aea57a9 100644
--- a/ui/src/common/engine.ts
+++ b/ui/src/common/engine.ts
@@ -191,7 +191,9 @@
       case TPM.TPM_COMPUTE_METRIC:
         const metricRes = assertExists(rpc.metricResult) as ComputeMetricResult;
         if (metricRes.error && metricRes.error.length > 0) {
-          throw new QueryError(`ComputeMetric() error: ${metricRes.error}`);
+          throw new QueryError(`ComputeMetric() error: ${metricRes.error}`, {
+            query: 'COMPUTE_METRIC',
+          });
         }
         assertExists(this.pendingComputeMetrics.shift()).resolve(metricRes);
         break;
@@ -292,7 +294,9 @@
     rpc.queryArgs = new QueryArgs();
     rpc.queryArgs.sqlQuery = sqlQuery;
     rpc.queryArgs.timeQueuedNs = Math.floor(performance.now() * 1e6);
-    const result = createQueryResult();
+    const result = createQueryResult({
+      query: sqlQuery,
+    });
     this.pendingQueries.push(result);
     this.rpcSendRequest(rpc);
     return result;
diff --git a/ui/src/common/http_rpc_engine.ts b/ui/src/common/http_rpc_engine.ts
index d404ea9..ba9c72e 100644
--- a/ui/src/common/http_rpc_engine.ts
+++ b/ui/src/common/http_rpc_engine.ts
@@ -87,7 +87,7 @@
     const httpRpcState: HttpRpcState = {connected: false};
     console.info(
         `It's safe to ignore the ERR_CONNECTION_REFUSED on ${RPC_URL} below. ` +
-        `That might happen while probing the exernal native accelerator. The ` +
+        `That might happen while probing the external native accelerator. The ` +
         `error is non-fatal and unlikely to be the culprit for any UI bug.`);
     try {
       const resp = await fetchWithTimeout(
diff --git a/ui/src/common/query_result.ts b/ui/src/common/query_result.ts
index 70186cf..dd80b4d 100644
--- a/ui/src/common/query_result.ts
+++ b/ui/src/common/query_result.ts
@@ -60,7 +60,25 @@
 
 export type ColumnType = string|number|null;
 
-export class QueryError extends Error {}
+// Info that could help debug a query error. For example the query
+// in question, the stack where the query was issued, the active
+// plugin etc.
+export interface QueryErrorInfo {
+  query: string;
+}
+
+export class QueryError extends Error {
+  readonly query: string;
+
+  constructor(message: string, info: QueryErrorInfo) {
+    super(message);
+    this.query = info.query;
+  }
+
+  toString() {
+    return `Query: ${this.query}\n` + super.toString();
+  }
+}
 
 // One row extracted from an SQL result:
 export interface Row {
@@ -199,6 +217,11 @@
   private _error?: string;
   private _numRows = 0;
   private _isComplete = false;
+  private _errorInfo: QueryErrorInfo;
+
+  constructor(errorInfo: QueryErrorInfo) {
+    this._errorInfo = errorInfo;
+  }
 
   // --- QueryResult implementation.
 
@@ -337,7 +360,7 @@
     if (this._error === undefined) {
       promise.resolve(arg);
     } else {
-      promise.reject(new QueryError(this._error));
+      promise.reject(new QueryError(this._error, this._errorInfo));
     }
   }
 }
@@ -703,9 +726,13 @@
 // 2. Clients that know how to handle the streaming can use it straight away.
 class WaitableQueryResultImpl implements QueryResult, WritableQueryResult,
                                          PromiseLike<QueryResult> {
-  private impl = new QueryResultImpl();
+  private impl: QueryResultImpl;
   private thenCalled = false;
 
+  constructor(errorInfo: QueryErrorInfo) {
+    this.impl = new QueryResultImpl(errorInfo);
+  }
+
   // QueryResult implementation. Proxies all calls to the impl object.
   iter<T extends Row>(spec: T) {
      return this.impl.iter(spec);
@@ -758,7 +785,7 @@
   }
 }
 
-export function createQueryResult(): QueryResult&Promise<QueryResult>&
-    WritableQueryResult {
-  return new WaitableQueryResultImpl();
+export function createQueryResult(errorInfo: QueryErrorInfo): QueryResult&
+    Promise<QueryResult>&WritableQueryResult {
+  return new WaitableQueryResultImpl(errorInfo);
 }
diff --git a/ui/src/common/query_result_unittest.ts b/ui/src/common/query_result_unittest.ts
index 2b95c0b..b71f26b 100644
--- a/ui/src/common/query_result_unittest.ts
+++ b/ui/src/common/query_result_unittest.ts
@@ -32,7 +32,7 @@
     batch: [batch],
   });
 
-  const qr = createQueryResult();
+  const qr = createQueryResult({query: 'Some query'});
   qr.appendResultBatch(QueryResultProto.encode(resProto).finish());
   expect(qr.isComplete()).toBe(true);
   expect(qr.numRows()).toBe(1);
@@ -81,7 +81,7 @@
     batch: [batch],
   });
 
-  const qr = createQueryResult();
+  const qr = createQueryResult({query: 'Some query'});
   qr.appendResultBatch(QueryResultProto.encode(resProto).finish());
   const actual: string[] = [];
   for (const iter = qr.iter({n: NUM}); iter.valid(); iter.next()) {
@@ -112,7 +112,7 @@
     batch: [batch],
   });
 
-  const qr = createQueryResult();
+  const qr = createQueryResult({query: 'Some query'});
   qr.appendResultBatch(QueryResultProto.encode(resProto).finish());
   const actual: number[] = [];
   for (const iter = qr.iter({n: NUM}); iter.valid(); iter.next()) {
@@ -140,7 +140,7 @@
     batch: [batch],
   });
 
-  const qr = createQueryResult();
+  const qr = createQueryResult({query: 'Some query'});
   qr.appendResultBatch(QueryResultProto.encode(resProto).finish());
   const actual: string[] = [];
   for (const iter = qr.iter({s: STR}); iter.valid(); iter.next()) {
@@ -165,7 +165,7 @@
     batch: [batch],
   });
 
-  const qr = createQueryResult();
+  const qr = createQueryResult({query: 'Some query'});
   qr.appendResultBatch(QueryResultProto.encode(resProto).finish());
   const actualNums = new Array<number|null>();
   const actualStrings = new Array<string|null>();
@@ -192,7 +192,7 @@
     batch: [{isLastBatch: true}],
     error: 'Oh dear, this SQL query is too complicated, I give up',
   });
-  const qr = createQueryResult();
+  const qr = createQueryResult({query: 'Some query'});
   qr.appendResultBatch(QueryResultProto.encode(resProto).finish());
   expect(qr.error()).toContain('Oh dear');
   expect(qr.isComplete()).toBe(true);
@@ -216,7 +216,7 @@
     ],
     error: 'I tried, I was getting there, but then I failed',
   });
-  const qr = createQueryResult();
+  const qr = createQueryResult({query: 'Some query'});
   qr.appendResultBatch(QueryResultProto.encode(resProto).finish());
   expect(qr.error()).toContain('I failed');
   const rows: number[] = [];
@@ -245,7 +245,7 @@
     }],
   });
 
-  const qr = createQueryResult();
+  const qr = createQueryResult({query: 'Some query'});
   expect(qr.isComplete()).toBe(false);
 
   qr.appendResultBatch(QueryResultProto.encode(batch1).finish());
@@ -281,7 +281,7 @@
     batch: [batch],
   });
 
-  const qr = createQueryResult();
+  const qr = createQueryResult({query: 'Some query'});
   qr.appendResultBatch(QueryResultProto.encode(resProto).finish());
   expect(qr.isComplete()).toBe(true);
   expect(qr.numRows()).toBe(1);
diff --git a/ui/src/common/state.ts b/ui/src/common/state.ts
index 2905967..5fc9f89 100644
--- a/ui/src/common/state.ts
+++ b/ui/src/common/state.ts
@@ -12,7 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-import {validateRecordConfig} from '../controller/validate_config';
+import {createEmptyRecordConfig} from '../controller/validate_config';
+import {autosaveConfigStore} from '../frontend/record_config';
+
+import {featureFlags} from './feature_flags';
 import {
   AggregationAttrs,
   PivotAttrs,
@@ -65,8 +68,10 @@
 // 6: Common PivotTableConfig and pivot table specific PivotTableState.
 // 7: Split Chrome categories in two and add 'symbolize ksyms' flag.
 // 8: Rename several variables
+// 9: Add a field to track last loaded recording profile name
+// 10: Change last loaded profile tracking type to accommodate auto-save.
 // "[...]HeapProfileFlamegraph[...]" -> "[...]Flamegraph[...]".
-export const STATE_VERSION = 8;
+export const STATE_VERSION = 10;
 
 export const SCROLLING_TRACK_GROUP = 'ScrollingTracks';
 
@@ -316,6 +321,22 @@
   selectedTrackIds?: number[];
 }
 
+export interface LoadedConfigNone {
+  type: 'NONE';
+}
+
+export interface LoadedConfigAutomatic {
+  type: 'AUTOMATIC';
+}
+
+export interface LoadedConfigNamed {
+  type: 'NAMED';
+  name: string;
+}
+
+export type LoadedConfig =
+    LoadedConfigNone|LoadedConfigAutomatic|LoadedConfigNamed;
+
 export interface State {
   // tslint:disable-next-line:no-any
   [key: string]: any;
@@ -329,6 +350,7 @@
    */
   recordConfig: RecordConfig;
   displayConfigAsPbtxt: boolean;
+  lastLoadedConfig: LoadedConfig;
 
   /**
    * Open traces.
@@ -427,10 +449,6 @@
   return target.os === 'CrOS';
 }
 
-export function isLinuxTarget(target: RecordingTarget) {
-  return target.os === 'L';
-}
-
 export function isAdbTarget(target: RecordingTarget):
     target is AdbRecordingTarget {
   return !!(target as AdbRecordingTarget).serial;
@@ -445,7 +463,10 @@
       return true;
     }
   }
-  return false;
+  if (config.chromeCategoriesSelected.length > 0) {
+    return true;
+  }
+  return config.chromeHighOverheadCategoriesSelected.length > 0;
 }
 
 export interface RecordConfig {
@@ -523,10 +544,6 @@
   symbolizeKsyms: boolean;
 }
 
-export function createEmptyRecordConfig(): RecordConfig {
-  return validateRecordConfig({});
-}
-
 export function getDefaultRecordingTargets(): RecordingTarget[] {
   return [
     {os: 'Q', name: 'Android Q+'},
@@ -782,6 +799,14 @@
   ];
 }
 
+const AUTOLOAD_STARTED_CONFIG_FLAG = featureFlags.register({
+  id: 'autoloadStartedConfig',
+  name: 'Auto-load last used recording config',
+  description: 'Starting a recording automatically saves its configuration. ' +
+      'This flag controls whether this config is automatically loaded.',
+  defaultValue: false,
+});
+
 export function createEmptyState(): State {
   return {
     version: STATE_VERSION,
@@ -805,8 +830,11 @@
     pivotTableConfig: {},
     pivotTable: {},
 
-    recordConfig: createEmptyRecordConfig(),
+    recordConfig: AUTOLOAD_STARTED_CONFIG_FLAG.get() ?
+        autosaveConfigStore.get() :
+        createEmptyRecordConfig(),
     displayConfigAsPbtxt: false,
+    lastLoadedConfig: {type: 'NONE'},
 
     frontendLocalState: {
       omniboxState: {
diff --git a/ui/src/controller/aggregation/aggregation_controller.ts b/ui/src/controller/aggregation/aggregation_controller.ts
index 3f4b126..06c3cee 100644
--- a/ui/src/controller/aggregation/aggregation_controller.ts
+++ b/ui/src/controller/aggregation/aggregation_controller.ts
@@ -39,16 +39,8 @@
 }
 
 function isAreaEqual(area: Area, previousArea?: Area) {
-  if (previousArea === undefined) {
-    return false;
-  }
-  if (previousArea === undefined) {
-    return true;
-  }
-  if (area.startSec !== previousArea.startSec) {
-    return false;
-  }
-  if (area.endSec !== previousArea.endSec) {
+  if (previousArea === undefined || area.startSec !== previousArea.startSec ||
+      area.endSec !== previousArea.endSec) {
     return false;
   }
   return area.tracks.every((element, i) => element === previousArea.tracks[i]);
diff --git a/ui/src/controller/aggregation/frame_aggregation_controller.ts b/ui/src/controller/aggregation/frame_aggregation_controller.ts
new file mode 100644
index 0000000..cca2110
--- /dev/null
+++ b/ui/src/controller/aggregation/frame_aggregation_controller.ts
@@ -0,0 +1,103 @@
+// Copyright (C) 2021 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import {ColumnDef} from '../../common/aggregation_data';
+import {Engine} from '../../common/engine';
+import {Area, Sorting} from '../../common/state';
+import {toNs} from '../../common/time';
+import {
+  ACTUAL_FRAMES_SLICE_TRACK_KIND,
+  Config
+} from '../../tracks/actual_frames/common';
+import {globals} from '../globals';
+
+import {AggregationController} from './aggregation_controller';
+
+export class FrameAggregationController extends AggregationController {
+  async createAggregateView(engine: Engine, area: Area) {
+    await engine.query(`drop view if exists ${this.kind};`);
+
+    const selectedSqlTrackIds = [];
+    for (const trackId of area.tracks) {
+      const track = globals.state.tracks[trackId];
+      // Track will be undefined for track groups.
+      if (track !== undefined &&
+          track.kind === ACTUAL_FRAMES_SLICE_TRACK_KIND) {
+        selectedSqlTrackIds.push((track.config as Config).trackIds);
+      }
+    }
+    if (selectedSqlTrackIds.length === 0) return false;
+
+    const query = `create view ${this.kind} as
+        SELECT
+        jank_type,
+        count(1) as occurrences,
+        MIN(dur) as minDur,
+        AVG(dur) as meanDur,
+        MAX(dur) as maxDur
+        FROM actual_frame_timeline_slice
+        WHERE track_id IN (${selectedSqlTrackIds}) AND
+        ts + dur > ${toNs(area.startSec)} AND
+        ts < ${toNs(area.endSec)} group by jank_type`;
+
+    await engine.query(query);
+    return true;
+  }
+
+  getTabName() {
+    return 'Frames';
+  }
+
+  async getExtra() {}
+
+  getDefaultSorting(): Sorting {
+    return {column: 'occurrences', direction: 'DESC'};
+  }
+
+  getColumnDefinitions(): ColumnDef[] {
+    return [
+      {
+        title: 'Jank Type',
+        kind: 'STRING',
+        columnConstructor: Uint16Array,
+        columnId: 'jank_type',
+      },
+      {
+        title: 'Min duration',
+        kind: 'NUMBER',
+        columnConstructor: Uint16Array,
+        columnId: 'minDur',
+      },
+      {
+        title: 'Max duration',
+        kind: 'NUMBER',
+        columnConstructor: Uint16Array,
+        columnId: 'maxDur',
+      },
+      {
+        title: 'Mean duration',
+        kind: 'NUMBER',
+        columnConstructor: Uint16Array,
+        columnId: 'meanDur',
+      },
+      {
+        title: 'Occurrences',
+        kind: 'NUMBER',
+        columnConstructor: Uint16Array,
+        columnId: 'occurrences',
+        sum: true
+      }
+    ];
+  }
+}
diff --git a/ui/src/controller/consumer_port_types.ts b/ui/src/controller/consumer_port_types.ts
index 4e72f5b..8b3cd45 100644
--- a/ui/src/controller/consumer_port_types.ts
+++ b/ui/src/controller/consumer_port_types.ts
@@ -59,11 +59,6 @@
   return obj.type === 'GetTraceStatsResponse';
 }
 
-export function isGetCategoriesResponse(obj: Typed):
-    obj is GetCategoriesResponse {
-  return obj.type === 'GetCategoriesResponse';
-}
-
 export function isFreeBuffersResponse(obj: Typed): obj is FreeBuffersResponse {
   return obj.type === 'FreeBuffersResponse';
 }
diff --git a/ui/src/controller/flamegraph_controller.ts b/ui/src/controller/flamegraph_controller.ts
index bafad23..7459c3f 100644
--- a/ui/src/controller/flamegraph_controller.ts
+++ b/ui/src/controller/flamegraph_controller.ts
@@ -87,125 +87,102 @@
 
   run() {
     const selection = globals.state.currentFlamegraphState;
-
-    if (!selection) return;
-
-    if (this.shouldRequestData(selection)) {
-      if (this.requestingData) {
-        this.queuedRequest = true;
-      } else {
-        this.requestingData = true;
-        const selectedFlamegraphState: FlamegraphState =
-            this.copyFlamegraphState(selection);
-
-        this.getFlamegraphMetadata(
-                selection.type,
-                selectedFlamegraphState.ts,
-                selectedFlamegraphState.upid)
-            .then(result => {
-              if (result !== undefined) {
-                Object.assign(this.flamegraphDetails, result);
-              }
-
-              // TODO(hjd): Clean this up.
-              if (this.lastSelectedFlamegraphState &&
-                  this.lastSelectedFlamegraphState.focusRegex !==
-                      selection.focusRegex) {
-                this.flamegraphDatasets.clear();
-              }
-
-              this.lastSelectedFlamegraphState =
-                  this.copyFlamegraphState(selection);
-
-              const expandedId = selectedFlamegraphState.expandedCallsite ?
-                  selectedFlamegraphState.expandedCallsite.id :
-                  -1;
-              const rootSize =
-                  selectedFlamegraphState.expandedCallsite === undefined ?
-                  undefined :
-                  selectedFlamegraphState.expandedCallsite.totalSize;
-
-              const key = `${selectedFlamegraphState.upid};${
-                  selectedFlamegraphState.ts}`;
-
-              this.getFlamegraphData(
-                      key,
-                      selectedFlamegraphState.viewingOption ?
-                          selectedFlamegraphState.viewingOption :
-                          DEFAULT_VIEWING_OPTION,
-                      selection.ts,
-                      selectedFlamegraphState.upid,
-                      selectedFlamegraphState.type,
-                      selectedFlamegraphState.focusRegex)
-                  .then(flamegraphData => {
-                    if (flamegraphData !== undefined && selection &&
-                        selection.kind === selectedFlamegraphState.kind &&
-                        selection.id === selectedFlamegraphState.id &&
-                        selection.ts === selectedFlamegraphState.ts) {
-                      const expandedFlamegraphData =
-                          expandCallsites(flamegraphData, expandedId);
-                      this.prepareAndMergeCallsites(
-                          expandedFlamegraphData,
-                          this.lastSelectedFlamegraphState!.viewingOption,
-                          rootSize,
-                          this.lastSelectedFlamegraphState!.expandedCallsite);
-                    }
-                  })
-                  .finally(() => {
-                    this.requestingData = false;
-                    if (this.queuedRequest) {
-                      this.queuedRequest = false;
-                      this.run();
-                    }
-                  });
-            });
-      }
+    if (!selection || !this.shouldRequestData(selection)) {
+      return;
     }
+    if (this.requestingData) {
+      this.queuedRequest = true;
+      return;
+    }
+    this.requestingData = true;
+
+    this.assembleFlamegraphDetails(selection);
   }
 
-  private copyFlamegraphState(flamegraphState: FlamegraphState):
-      FlamegraphState {
-    return {
-      kind: flamegraphState.kind,
-      id: flamegraphState.id,
-      upid: flamegraphState.upid,
-      ts: flamegraphState.ts,
-      type: flamegraphState.type,
-      expandedCallsite: flamegraphState.expandedCallsite,
-      viewingOption: flamegraphState.viewingOption,
-      focusRegex: flamegraphState.focusRegex,
-    };
+  private async assembleFlamegraphDetails(selection: FlamegraphState) {
+    const selectedFlamegraphState = {...selection};
+    const flamegraphMetadata = await this.getFlamegraphMetadata(
+        selection.type,
+        selectedFlamegraphState.ts,
+        selectedFlamegraphState.upid);
+    if (flamegraphMetadata !== undefined) {
+      Object.assign(this.flamegraphDetails, flamegraphMetadata);
+    }
+
+    // TODO(hjd): Clean this up.
+    if (this.lastSelectedFlamegraphState &&
+        this.lastSelectedFlamegraphState.focusRegex !== selection.focusRegex) {
+      this.flamegraphDatasets.clear();
+    }
+
+    this.lastSelectedFlamegraphState = {...selection};
+
+    const expandedId = selectedFlamegraphState.expandedCallsite ?
+        selectedFlamegraphState.expandedCallsite.id :
+        -1;
+    const rootSize = selectedFlamegraphState.expandedCallsite === undefined ?
+        undefined :
+        selectedFlamegraphState.expandedCallsite.totalSize;
+
+    const key = `${selectedFlamegraphState.upid};${selectedFlamegraphState.ts}`;
+
+    try {
+      const flamegraphData = await this.getFlamegraphData(
+          key,
+          selectedFlamegraphState.viewingOption ?
+              selectedFlamegraphState.viewingOption :
+              DEFAULT_VIEWING_OPTION,
+          selection.ts,
+          selectedFlamegraphState.upid,
+          selectedFlamegraphState.type,
+          selectedFlamegraphState.focusRegex);
+      if (flamegraphData !== undefined && selection &&
+          selection.kind === selectedFlamegraphState.kind &&
+          selection.id === selectedFlamegraphState.id &&
+          selection.ts === selectedFlamegraphState.ts) {
+        const expandedFlamegraphData =
+            expandCallsites(flamegraphData, expandedId);
+        this.prepareAndMergeCallsites(
+            expandedFlamegraphData,
+            this.lastSelectedFlamegraphState.viewingOption,
+            rootSize,
+            this.lastSelectedFlamegraphState.expandedCallsite);
+      }
+    } finally {
+      this.requestingData = false;
+      if (this.queuedRequest) {
+        this.queuedRequest = false;
+        this.run();
+      }
+    }
   }
 
   private shouldRequestData(selection: FlamegraphState) {
     return selection.kind === 'FLAMEGRAPH_STATE' &&
         (this.lastSelectedFlamegraphState === undefined ||
-         (this.lastSelectedFlamegraphState !== undefined &&
-          (this.lastSelectedFlamegraphState.id !== selection.id ||
-           this.lastSelectedFlamegraphState.ts !== selection.ts ||
-           this.lastSelectedFlamegraphState.type !== selection.type ||
-           this.lastSelectedFlamegraphState.upid !== selection.upid ||
-           this.lastSelectedFlamegraphState.viewingOption !==
-               selection.viewingOption ||
-           this.lastSelectedFlamegraphState.focusRegex !==
-               selection.focusRegex ||
-           this.lastSelectedFlamegraphState.expandedCallsite !==
-               selection.expandedCallsite)));
+         (this.lastSelectedFlamegraphState.id !== selection.id ||
+          this.lastSelectedFlamegraphState.ts !== selection.ts ||
+          this.lastSelectedFlamegraphState.type !== selection.type ||
+          this.lastSelectedFlamegraphState.upid !== selection.upid ||
+          this.lastSelectedFlamegraphState.viewingOption !==
+              selection.viewingOption ||
+          this.lastSelectedFlamegraphState.focusRegex !==
+              selection.focusRegex ||
+          this.lastSelectedFlamegraphState.expandedCallsite !==
+              selection.expandedCallsite));
   }
 
   private prepareAndMergeCallsites(
       flamegraphData: CallsiteInfo[],
       viewingOption: string|undefined = DEFAULT_VIEWING_OPTION,
       rootSize?: number, expandedCallsite?: CallsiteInfo) {
-    const mergedFlamegraphData = mergeCallsites(
+    this.flamegraphDetails.flamegraph = mergeCallsites(
         flamegraphData, this.getMinSizeDisplayed(flamegraphData, rootSize));
-    this.flamegraphDetails.flamegraph = mergedFlamegraphData;
     this.flamegraphDetails.expandedCallsite = expandedCallsite;
     this.flamegraphDetails.viewingOption = viewingOption;
     publishFlamegraphDetails(this.flamegraphDetails);
   }
 
-
   async getFlamegraphData(
       baseKey: string, viewingOption: string, ts: number, upid: number,
       type: string, focusRegex: string): Promise<CallsiteInfo[]> {
diff --git a/ui/src/controller/record_controller.ts b/ui/src/controller/record_controller.ts
index 1b2c363..644e5af 100644
--- a/ui/src/controller/record_controller.ts
+++ b/ui/src/controller/record_controller.ts
@@ -161,13 +161,6 @@
     ftraceEvents.add('raw_syscalls/sys_exit');
   }
 
-  if (procThreadAssociationFtrace) {
-    ftraceEvents.add('sched/sched_process_exit');
-    ftraceEvents.add('sched/sched_process_free');
-    ftraceEvents.add('task/task_newtask');
-    ftraceEvents.add('task/task_rename');
-  }
-
   if (uiCfg.batteryDrain) {
     const ds = new TraceConfig.DataSource();
     ds.config = new DataSourceConfig();
@@ -197,7 +190,7 @@
   let sysStatsCfg: SysStatsConfig|undefined = undefined;
 
   if (uiCfg.cpuCoarse) {
-    if (sysStatsCfg === undefined) sysStatsCfg = new SysStatsConfig();
+    sysStatsCfg = new SysStatsConfig();
     sysStatsCfg.statPeriodMs = uiCfg.cpuCoarsePollMs;
     sysStatsCfg.statCounters = [
       SysStatsConfig.StatCounters.STAT_CPU_TIMES,
@@ -216,6 +209,13 @@
     ftraceEvents.add('kmem/ion_heap_shrink');
   }
 
+  if (procThreadAssociationFtrace) {
+    ftraceEvents.add('sched/sched_process_exit');
+    ftraceEvents.add('sched/sched_process_free');
+    ftraceEvents.add('task/task_newtask');
+    ftraceEvents.add('task/task_rename');
+  }
+
   if (uiCfg.meminfo) {
     if (sysStatsCfg === undefined) sysStatsCfg = new SysStatsConfig();
     sysStatsCfg.meminfoPeriodMs = uiCfg.meminfoPeriodMs;
@@ -398,7 +398,7 @@
   }
 
   if (chromeCategories.size !== 0) {
-    let chromeRecordMode = '';
+    let chromeRecordMode;
     if (uiCfg.mode === 'STOP_WHEN_FULL') {
       chromeRecordMode = 'record-until-full';
     } else {
@@ -785,7 +785,7 @@
   // protocol. Actually, there is no full consumer_port implementation, but
   // only the support to start tracing and fetch the file.
   async getTargetController(target: RecordingTarget): Promise<RpcConsumerPort> {
-    const identifier = this.getTargetIdentifier(target);
+    const identifier = RecordController.getTargetIdentifier(target);
 
     // The reason why caching the target 'record controller' Promise is that
     // multiple rcp calls can happen while we are trying to understand if an
@@ -819,7 +819,7 @@
     return controllerPromise;
   }
 
-  private getTargetIdentifier(target: RecordingTarget): string {
+  private static getTargetIdentifier(target: RecordingTarget): string {
     return isAdbTarget(target) ? target.serial : target.os;
   }
 
diff --git a/ui/src/controller/record_controller_jsdomtest.ts b/ui/src/controller/record_controller_jsdomtest.ts
index 916054b..09916a2 100644
--- a/ui/src/controller/record_controller_jsdomtest.ts
+++ b/ui/src/controller/record_controller_jsdomtest.ts
@@ -14,9 +14,9 @@
 
 import {assertExists} from '../base/logging';
 import {TraceConfig} from '../common/protos';
-import {createEmptyRecordConfig} from '../common/state';
 
 import {genConfigProto, toPbtxt} from './record_controller';
+import {createEmptyRecordConfig} from './validate_config';
 
 test('encodeConfig', () => {
   const config = createEmptyRecordConfig();
diff --git a/ui/src/controller/selection_controller.ts b/ui/src/controller/selection_controller.ts
index 4bb8d45..7ea4a32 100644
--- a/ui/src/controller/selection_controller.ts
+++ b/ui/src/controller/selection_controller.ts
@@ -214,11 +214,11 @@
     const result = await this.args.engine.query(query);
     const it = result.iter({
       name: STR,
-      value: STR,
+      value: STR_NULL,
     });
     for (; it.valid(); it.next()) {
       const name = it.name;
-      const value = it.value;
+      const value = it.value || 'NULL';
       if (name === 'destination slice id' && !isNaN(Number(value))) {
         const destTrackId = await this.getDestTrackId(value);
         args.set(
diff --git a/ui/src/controller/trace_controller.ts b/ui/src/controller/trace_controller.ts
index fd2b3b3..a5e0a84 100644
--- a/ui/src/controller/trace_controller.ts
+++ b/ui/src/controller/trace_controller.ts
@@ -49,6 +49,9 @@
   CpuByProcessAggregationController
 } from './aggregation/cpu_by_process_aggregation_controller';
 import {
+  FrameAggregationController
+} from './aggregation/frame_aggregation_controller';
+import {
   SliceAggregationController
 } from './aggregation/slice_aggregation_controller';
 import {
@@ -105,6 +108,7 @@
   'android_batt',
   'android_sysui_cuj',
   'android_jank',
+  'android_camera',
   'trace_metadata',
 ];
 const FLAGGED_METRICS: Array<[Flag, string]> = METRICS.map(m => {
@@ -216,6 +220,10 @@
             'counter_aggregation',
             CounterAggregationController,
             {engine, kind: 'counter_aggregation'}));
+        childControllers.push(Child(
+            'frame_aggregation',
+            FrameAggregationController,
+            {engine, kind: 'frame_aggregation'}));
         childControllers.push(Child('search', SearchController, {
           engine,
           app: globals,
@@ -245,6 +253,10 @@
     return;
   }
 
+  onDestroy() {
+    frontendGlobals.engines.delete(this.engineId);
+  }
+
   private async loadTrace(): Promise<EngineMode> {
     this.updateStatus('Creating trace processor');
     // Check if there is any instance of the trace_processor_shell running in
@@ -254,25 +266,26 @@
     if (globals.state.newEngineMode === 'USE_HTTP_RPC_IF_AVAILABLE') {
       useRpc = (await HttpRpcEngine.checkConnection()).connected;
     }
+    let engine;
     if (useRpc) {
       console.log('Opening trace using native accelerator over HTTP+RPC');
       engineMode = 'HTTP_RPC';
-      const engine =
-          new HttpRpcEngine(this.engineId, LoadingManager.getInstance);
+      engine = new HttpRpcEngine(this.engineId, LoadingManager.getInstance);
       engine.errorHandler = (err) => {
         globals.dispatch(
             Actions.setEngineFailed({mode: 'HTTP_RPC', failure: `${err}`}));
         throw err;
       };
-      this.engine = engine;
     } else {
       console.log('Opening trace using built-in WASM engine');
       engineMode = 'WASM';
       const enginePort = resetEngineWorker();
-      this.engine = new WasmEngineProxy(
+      engine = new WasmEngineProxy(
           this.engineId, enginePort, LoadingManager.getInstance);
     }
+    this.engine = engine;
 
+    frontendGlobals.engines.set(this.engineId, engine);
     globals.dispatch(Actions.setEngineReady({
       engineId: this.engineId,
       ready: false,
@@ -569,11 +582,11 @@
     }
     const traceUuid = result.firstRow({uuid: STR}).uuid;
     const engineConfig = assertExists(globals.state.engines[engine.id]);
-    if (!cacheTrace(engineConfig.source, traceUuid)) {
-      // If the trace is not cacheable (has been opened from URL or RPC) don't
-      // append a ?trace_id to the URL. Doing so would cause an error if the
-      // tab is discarded or the user hits the reload button because the trace
-      // is not in the cache.
+    if (!(await cacheTrace(engineConfig.source, traceUuid))) {
+      // If the trace is not cacheable (cacheable means it has been opened from
+      // URL or RPC) only append '?trace_id' to the URL, without the trace_id
+      // value. Doing otherwise would cause an error if the tab is discarded or
+      // the user hits the reload button because the trace is not in the cache.
       return '';
     }
     return traceUuid;
diff --git a/ui/src/controller/track_decider.ts b/ui/src/controller/track_decider.ts
index eb7f9de..dbb7bb6 100644
--- a/ui/src/controller/track_decider.ts
+++ b/ui/src/controller/track_decider.ts
@@ -1015,6 +1015,10 @@
       union
       select upid, utid from sched join thread using(utid) group by utid
       union
+      select distinct(process.upid), 0 as utid from process
+        join thread on process.upid = thread.upid
+        join perf_sample on thread.utid = perf_sample.utid
+      union
       select upid, utid from (
         select distinct(utid) from cpu_profile_stack_sample
       ) join thread using(utid)
@@ -1100,6 +1104,9 @@
           summaryTrackId,
           name,
           id: pUuid,
+          // Perf profiling tracks remain collapsed, otherwise we would have too
+          // many expanded process tracks for some perf traces, leading to
+          // jankyness.
           collapsed: !hasHeapProfiles,
         });
 
diff --git a/ui/src/controller/validate_config.ts b/ui/src/controller/validate_config.ts
index b5838c8..dbc2564 100644
--- a/ui/src/controller/validate_config.ts
+++ b/ui/src/controller/validate_config.ts
@@ -185,3 +185,7 @@
     symbolizeKsyms: v.boolean('symbolizeKsyms'),
   };
 }
+
+export function createEmptyRecordConfig(): RecordConfig {
+  return validateRecordConfig({});
+}
diff --git a/ui/src/frontend/checkerboard.ts b/ui/src/frontend/checkerboard.ts
index d271759..7ce182b 100644
--- a/ui/src/frontend/checkerboard.ts
+++ b/ui/src/frontend/checkerboard.ts
@@ -33,11 +33,12 @@
   if (LOADING_TEXT_WIDTH === 0) {
     LOADING_TEXT_WIDTH = ctx.measureText(LOADING_TEXT).width;
   }
-  ctx.fillText(
-      LOADING_TEXT,
-      leftPx + widthPx / 2 - LOADING_TEXT_WIDTH,
-      heightPx / 2,
-      widthPx);
+  if (LOADING_TEXT_WIDTH <= widthPx) {
+    ctx.fillText(
+        LOADING_TEXT,
+        leftPx + widthPx / 2 - LOADING_TEXT_WIDTH / 2,
+        heightPx / 2);
+  }
   ctx.textBaseline = oldBaseline;
 }
 
diff --git a/ui/src/frontend/chrome_slice_panel.ts b/ui/src/frontend/chrome_slice_panel.ts
index 0fa99bb..8188561 100644
--- a/ui/src/frontend/chrome_slice_panel.ts
+++ b/ui/src/frontend/chrome_slice_panel.ts
@@ -16,11 +16,12 @@
 
 import {Actions} from '../common/actions';
 import {Arg, ArgsTree, isArgTreeArray, isArgTreeMap} from '../common/arg_types';
-import {timeToCode, toNs} from '../common/time';
+import {timeToCode} from '../common/time';
 
 import {globals, SliceDetails} from './globals';
-import {Panel, PanelSize} from './panel';
+import {PanelSize} from './panel';
 import {verticalScrollToTrack} from './scroll_helper';
+import {SlicePanel} from './slice_panel';
 
 // Table row contents is one of two things:
 // 1. Key-value pair
@@ -122,7 +123,7 @@
   }
 }
 
-export class ChromeSliceDetailsPanel extends Panel {
+export class ChromeSliceDetailsPanel extends SlicePanel {
   view() {
     const sliceInfo = globals.sliceDetails;
     if (sliceInfo.ts !== undefined && sliceInfo.dur !== undefined &&
@@ -136,9 +137,7 @@
               sliceInfo.category);
       builder.add('Start time', timeToCode(sliceInfo.ts));
       builder.add(
-          'Duration',
-          toNs(sliceInfo.dur) === -1 ? '-1 (Did not end)' :
-                                       timeToCode(sliceInfo.dur));
+          'Duration', this.computeDuration(sliceInfo.ts, sliceInfo.dur));
       builder.add(
           'Slice ID', sliceInfo.id ? sliceInfo.id.toString() : 'Unknown');
       if (sliceInfo.description) {
diff --git a/ui/src/frontend/details_panel.ts b/ui/src/frontend/details_panel.ts
index 2e8ce7d..9f95ab3 100644
--- a/ui/src/frontend/details_panel.ts
+++ b/ui/src/frontend/details_panel.ts
@@ -38,7 +38,7 @@
 import {ColumnDisplay, ColumnPicker} from './pivot_table_editor';
 import {PivotTableHelper} from './pivot_table_helper';
 import {QueryTable} from './query_table';
-import {SliceDetailsPanel} from './slice_panel';
+import {SliceDetailsPanel} from './slice_details_panel';
 import {ThreadStatePanel} from './thread_state_panel';
 
 const UP_ICON = 'keyboard_arrow_up';
@@ -214,8 +214,6 @@
 
 export class DetailsPanel implements m.ClassComponent {
   private detailsHeight = DEFAULT_DETAILS_HEIGHT_PX;
-  // Used to set details panel to default height on selection.
-  private showDetailsPanel = true;
 
   view() {
     interface DetailsPanel {
@@ -366,8 +364,6 @@
       });
     }
 
-    this.showDetailsPanel = detailsPanels.length > 0;
-
     const currentTabDetails =
         detailsPanels.filter(tab => tab.key === globals.state.currentTab)[0];
 
@@ -381,7 +377,7 @@
         {
           style: {
             height: `${this.detailsHeight}px`,
-            display: this.showDetailsPanel ? null : 'none'
+            display: detailsPanels.length > 0 ? null : 'none'
           }
         },
         m(DragHandle, {
diff --git a/ui/src/frontend/globals.ts b/ui/src/frontend/globals.ts
index 904a371..ec9b061 100644
--- a/ui/src/frontend/globals.ts
+++ b/ui/src/frontend/globals.ts
@@ -20,6 +20,7 @@
   ConversionJobName,
   ConversionJobStatus
 } from '../common/conversion_jobs';
+import {Engine} from '../common/engine';
 import {MetricResult} from '../common/metric_data';
 import {CurrentSearchResults, SearchSummary} from '../common/search_data';
 import {CallsiteInfo, createEmptyState, State} from '../common/state';
@@ -206,6 +207,8 @@
     count: new Uint8Array(0),
   };
 
+  engines = new Map<string, Engine>();
+
   initialize(dispatch: Dispatch, router: Router) {
     this._dispatch = dispatch;
     this._router = router;
@@ -232,6 +235,7 @@
     this._threadStateDetails = {};
     this._flamegraphDetails = {};
     this._cpuProfileDetails = {};
+    this.engines.clear();
   }
 
   get router(): Router {
diff --git a/ui/src/frontend/index.ts b/ui/src/frontend/index.ts
index 8a86027..b264c3c 100644
--- a/ui/src/frontend/index.ts
+++ b/ui/src/frontend/index.ts
@@ -133,6 +133,9 @@
     ],
     'script-src': [
       `'self'`,
+      // TODO(b/201596551): this is required for Wasm after crrev.com/c/3179051
+      // and should be replaced with 'wasm-unsafe-eval'.
+      `'unsafe-eval'`,
       'https://*.google.com',
       'https://*.googleusercontent.com',
       'https://www.googletagmanager.com',
diff --git a/ui/src/frontend/keyboard_event_handler.ts b/ui/src/frontend/keyboard_event_handler.ts
index cb94e2b..17a60dc 100644
--- a/ui/src/frontend/keyboard_event_handler.ts
+++ b/ui/src/frontend/keyboard_event_handler.ts
@@ -185,7 +185,11 @@
       endTs = startTs + slice.dur;
     } else if (slice.ts) {
       startTs = slice.ts + globals.state.traceTime.startSec;
-      endTs = startTs + INSTANT_FOCUS_DURATION_S;
+      // This will handle either:
+      // a)slice.dur === -1 -> unfinished slice
+      // b)slice.dur === 0  -> instant event
+      endTs = slice.dur === -1 ? globals.state.traceTime.endSec :
+                                 startTs + INSTANT_FOCUS_DURATION_S;
     }
   } else if (selection.kind === 'THREAD_STATE') {
     const threadState = globals.threadStateDetails;
diff --git a/ui/src/frontend/query_table.ts b/ui/src/frontend/query_table.ts
index 6b48524..aaf4742 100644
--- a/ui/src/frontend/query_table.ts
+++ b/ui/src/frontend/query_table.ts
@@ -159,9 +159,15 @@
               },
               'Close'),
             ),
+        // TODO(rsavitski): the x-scrollable works for the
+        // dedicated query page, but is insufficient in the case of
+        // the results being presented within the bottom details
+        // pane in the timeline view. In that case, the
+        // details-panel-container enforces non-scrollability.
+        // Ideally we'd want to make that case scrollable as well.
         resp.error ?
             m('.query-error', `SQL error: ${resp.error}`) :
-            m('.query-table-container',
+            m('.query-table-container.x-scrollable',
               m('table.query-table', m('thead', header), m('tbody', rows))));
   }
 
diff --git a/ui/src/frontend/record_config.ts b/ui/src/frontend/record_config.ts
index 2676b31..e81d239 100644
--- a/ui/src/frontend/record_config.ts
+++ b/ui/src/frontend/record_config.ts
@@ -13,9 +13,14 @@
 // limitations under the License.
 
 import {RecordConfig} from '../common/state';
-import {JsonObject, validateRecordConfig} from '../controller/validate_config';
+import {
+  createEmptyRecordConfig,
+  JsonObject,
+  validateRecordConfig
+} from '../controller/validate_config';
 
 const LOCAL_STORAGE_RECORD_CONFIGS_KEY = 'recordConfigs';
+const LOCAL_STORAGE_AUTOSAVE_CONFIG_KEY = 'autosaveConfig';
 
 class NamedRecordConfig {
   title: string;
@@ -37,9 +42,11 @@
 
 export class RecordConfigStore {
   recordConfigs: NamedRecordConfig[];
+  recordConfigNames: Set<string>;
 
   constructor() {
     this.recordConfigs = [];
+    this.recordConfigNames = new Set();
     this.reloadFromLocalStorage();
   }
 
@@ -48,10 +55,13 @@
     // modifications of local storage from a different tab.
     this.reloadFromLocalStorage();
 
-    const config = new NamedRecordConfig(
-        title ? title : new Date().toJSON(), recordConfig, new Date().toJSON());
+    const savedTitle = title ? title : new Date().toJSON();
+    const config =
+        new NamedRecordConfig(savedTitle, recordConfig, new Date().toJSON());
 
     this.recordConfigs.push(config);
+    this.recordConfigNames.add(savedTitle);
+
     window.localStorage.setItem(
         LOCAL_STORAGE_RECORD_CONFIGS_KEY, JSON.stringify(this.recordConfigs));
   }
@@ -70,6 +80,7 @@
     }
 
     if (idx !== -1) {
+      this.recordConfigNames.delete(this.recordConfigs[idx].title);
       this.recordConfigs.splice(idx, 1);
       window.localStorage.setItem(
           LOCAL_STORAGE_RECORD_CONFIGS_KEY, JSON.stringify(this.recordConfigs));
@@ -81,6 +92,7 @@
 
   private clearRecordConfigs(): void {
     this.recordConfigs = [];
+    this.recordConfigNames.clear();
     window.localStorage.setItem(
         LOCAL_STORAGE_RECORD_CONFIGS_KEY, JSON.stringify([]));
   }
@@ -90,6 +102,8 @@
         window.localStorage.getItem(LOCAL_STORAGE_RECORD_CONFIGS_KEY);
 
     if (configsLocalStorage) {
+      this.recordConfigNames.clear();
+
       try {
         const validConfigLocalStorage: NamedRecordConfig[] = [];
         const parsedConfigsLocalStorage = JSON.parse(configsLocalStorage);
@@ -104,6 +118,7 @@
           if (!NamedRecordConfig.isValid(parsedConfigsLocalStorage[i])) {
             continue;
           }
+          this.recordConfigNames.add(parsedConfigsLocalStorage[i].title);
           validConfigLocalStorage.push(new NamedRecordConfig(
               parsedConfigsLocalStorage[i].title,
               parsedConfigsLocalStorage[i].config,
@@ -121,8 +136,49 @@
       this.clearRecordConfigs();
     }
   }
+
+  canSave(title: string) {
+    return !this.recordConfigNames.has(title);
+  }
 }
 
 // This class is a singleton to avoid many instances
 // conflicting as they attempt to edit localStorage.
 export const recordConfigStore = new RecordConfigStore();
+
+export class AutosaveConfigStore {
+  config: RecordConfig;
+
+  // Whether the current config is a default one or has been saved before.
+  // Used to determine whether the button to load "last started config" should
+  // be present in the recording profiles list.
+  hasSavedConfig: boolean;
+
+  constructor() {
+    this.hasSavedConfig = false;
+    this.config = createEmptyRecordConfig();
+    const savedItem =
+        window.localStorage.getItem(LOCAL_STORAGE_AUTOSAVE_CONFIG_KEY);
+    if (savedItem === null) {
+      return;
+    }
+    const parsed = JSON.parse(savedItem);
+    if (parsed !== null && typeof parsed === 'object') {
+      this.config = validateRecordConfig(parsed as JsonObject);
+      this.hasSavedConfig = true;
+    }
+  }
+
+  get(): RecordConfig {
+    return this.config;
+  }
+
+  save(newConfig: RecordConfig) {
+    window.localStorage.setItem(
+        LOCAL_STORAGE_AUTOSAVE_CONFIG_KEY, JSON.stringify(newConfig));
+    this.config = newConfig;
+    this.hasSavedConfig = true;
+  }
+}
+
+export const autosaveConfigStore = new AutosaveConfigStore();
diff --git a/ui/src/frontend/record_page.ts b/ui/src/frontend/record_page.ts
index ec62542..f6d2e26 100644
--- a/ui/src/frontend/record_page.ts
+++ b/ui/src/frontend/record_page.ts
@@ -29,15 +29,18 @@
   isAndroidTarget,
   isChromeTarget,
   isCrOSTarget,
+  LoadedConfig,
   MAX_TIME,
+  RecordConfig,
   RecordingTarget,
   RecordMode
 } from '../common/state';
 import {AdbOverWebUsb} from '../controller/adb';
+import {createEmptyRecordConfig} from '../controller/validate_config';
 
 import {globals} from './globals';
 import {createPage, PageAttrs} from './pages';
-import {recordConfigStore} from './record_config';
+import {autosaveConfigStore, recordConfigStore} from './record_config';
 import {
   CodeSnippet,
   Dropdown,
@@ -899,19 +902,39 @@
       recordingLog());
 }
 
+function loadedConfigEqual(cfg1: LoadedConfig, cfg2: LoadedConfig): boolean {
+  return cfg1.type === 'NAMED' && cfg2.type === 'NAMED' ?
+      cfg1.name === cfg2.name :
+      cfg1.type === cfg2.type;
+}
+
+function loadConfigButton(
+    config: RecordConfig, configType: LoadedConfig): m.Vnode {
+  return m(
+      'button',
+      {
+        class: 'config-button load',
+        disabled: loadedConfigEqual(configType, globals.state.lastLoadedConfig),
+        onclick: () => {
+          globals.dispatch(Actions.setRecordConfig({config, configType}));
+          globals.rafScheduler.scheduleFullRedraw();
+        }
+      },
+      'load');
+}
+
 function displayRecordConfigs() {
-  return recordConfigStore.recordConfigs.map((item) => {
-    return m('.config', [
+  const configs = [];
+  if (autosaveConfigStore.hasSavedConfig) {
+    configs.push(m('.config', [
+      m('span.title-config', m('strong', 'Latest started recording')),
+      loadConfigButton(autosaveConfigStore.get(), {type: 'AUTOMATIC'}),
+    ]));
+  }
+  for (const item of recordConfigStore.recordConfigs) {
+    configs.push(m('.config', [
       m('span.title-config', item.title),
-      m('button',
-        {
-          class: 'config-button load',
-          onclick: () => {
-            globals.dispatch(Actions.setRecordConfig({config: item.config}));
-            globals.rafScheduler.scheduleFullRedraw();
-          }
-        },
-        'load'),
+      loadConfigButton(item.config, {type: 'NAMED', name: item.title}),
       m('button',
         {
           class: 'config-button delete',
@@ -921,15 +944,9 @@
           }
         },
         'delete'),
-    ]);
-  });
-}
-
-function getSavedConfigList() {
-  if (recordConfigStore.recordConfigs.length === 0) {
-    return [];
+    ]));
   }
-  return displayRecordConfigs();
+  return configs;
 }
 
 export const ConfigTitleState = {
@@ -946,6 +963,7 @@
 };
 
 function Configurations(cssClass: string) {
+  const canSave = recordConfigStore.canSave(ConfigTitleState.getTitle());
   return m(
       `.record-section${cssClass}`,
       m('header', 'Save and load configurations'),
@@ -956,11 +974,14 @@
             placeholder: 'Title for config',
             oninput() {
               ConfigTitleState.setTitle(this.value);
+              globals.rafScheduler.scheduleFullRedraw();
             }
           }),
           m('button',
             {
               class: 'config-button save',
+              disabled: !canSave,
+              title: canSave ? '' : 'Duplicate name, saving disabled',
               onclick: () => {
                 recordConfigStore.save(
                     globals.state.recordConfig, ConfigTitleState.getTitle());
@@ -970,7 +991,23 @@
             },
             'Save current config')
         ]),
-      getSavedConfigList());
+      m('.reset-wrapper',
+        m('button',
+          {
+            class: 'config-button reset',
+            onclick: () => {
+              if (confirm(
+                      'Current configuration will be cleared. Are you sure?')) {
+                globals.dispatch(Actions.setRecordConfig({
+                  config: createEmptyRecordConfig(),
+                  configType: {type: 'NONE'}
+                }));
+                globals.rafScheduler.scheduleFullRedraw();
+              }
+            }
+          },
+          'Clear current config')),
+      displayRecordConfigs());
 }
 
 function BufferUsageProgressBar() {
@@ -1171,6 +1208,7 @@
 function onStartRecordingPressed() {
   location.href = '#!/record/instructions';
   globals.rafScheduler.scheduleFullRedraw();
+  autosaveConfigStore.save(globals.state.recordConfig);
 
   const target = globals.state.recordingTarget;
   if (isAndroidTarget(target) || isChromeTarget(target)) {
diff --git a/ui/src/frontend/slice_details_panel.ts b/ui/src/frontend/slice_details_panel.ts
new file mode 100644
index 0000000..9e9c462
--- /dev/null
+++ b/ui/src/frontend/slice_details_panel.ts
@@ -0,0 +1,165 @@
+// Copyright (C) 2019 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use size file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import * as m from 'mithril';
+
+import {Actions} from '../common/actions';
+import {drawDoubleHeadedArrow} from '../common/canvas_utils';
+import {translateState} from '../common/thread_state';
+import {timeToCode, toNs} from '../common/time';
+
+import {globals, SliceDetails, ThreadDesc} from './globals';
+import {PanelSize} from './panel';
+import {scrollToTrackAndTs} from './scroll_helper';
+import {SlicePanel} from './slice_panel';
+
+export class SliceDetailsPanel extends SlicePanel {
+  view() {
+    const sliceInfo = globals.sliceDetails;
+    if (sliceInfo.utid === undefined) return;
+    const threadInfo = globals.threads.get(sliceInfo.utid);
+
+    return m(
+        '.details-panel',
+        m('.details-panel-heading',
+          m('h2.split', `Slice Details`),
+          (sliceInfo.wakeupTs && sliceInfo.wakerUtid) ?
+              m('h2.split', 'Scheduling Latency') :
+              ''),
+        this.getDetails(sliceInfo, threadInfo));
+  }
+
+  getDetails(sliceInfo: SliceDetails, threadInfo: ThreadDesc|undefined) {
+    if (!threadInfo || sliceInfo.ts === undefined ||
+        sliceInfo.dur === undefined) {
+      return null;
+    } else {
+      return m(
+          '.details-table',
+          m('table.half-width',
+            [
+              m('tr',
+                m('th', `Process`),
+                m('td', `${threadInfo.procName} [${threadInfo.pid}]`)),
+              m('tr',
+                m('th', `Thread`),
+                m('td',
+                  `${threadInfo.threadName} [${threadInfo.tid}]`,
+                  m('i.material-icons.grey',
+                    {onclick: () => this.goToThread(), title: 'Go to thread'},
+                    'call_made'))),
+              m('tr', m('th', `Cmdline`), m('td', threadInfo.cmdline)),
+              m('tr',
+                m('th', `Start time`),
+                m('td', `${timeToCode(sliceInfo.ts)}`)),
+              m('tr',
+                m('th', `Duration`),
+                m('td',
+                  `${this.computeDuration(sliceInfo.ts, sliceInfo.dur)}`)),
+              m('tr', m('th', `Prio`), m('td', `${sliceInfo.priority}`)),
+              m('tr',
+                m('th', `End State`),
+                m('td', translateState(sliceInfo.endState))),
+              m('tr',
+                m('th', `Slice ID`),
+                m('td', sliceInfo.id ? sliceInfo.id.toString() : 'Unknown'))
+            ]),
+      );
+    }
+  }
+
+  goToThread() {
+    const sliceInfo = globals.sliceDetails;
+    if (sliceInfo.utid === undefined) return;
+    const threadInfo = globals.threads.get(sliceInfo.utid);
+
+    if (sliceInfo.id === undefined || sliceInfo.ts === undefined ||
+        sliceInfo.dur === undefined || sliceInfo.cpu === undefined ||
+        threadInfo === undefined) {
+      return;
+    }
+
+    let trackId: string|number|undefined;
+    for (const track of Object.values(globals.state.tracks)) {
+      if (track.kind === 'ThreadStateTrack' &&
+          (track.config as {utid: number}).utid === threadInfo.utid) {
+        trackId = track.id;
+      }
+    }
+
+    if (trackId && sliceInfo.threadStateId) {
+      globals.makeSelection(Actions.selectThreadState({
+        id: sliceInfo.threadStateId,
+        trackId: trackId.toString(),
+      }));
+
+      scrollToTrackAndTs(
+          trackId, toNs(sliceInfo.ts + globals.state.traceTime.startSec), true);
+    }
+  }
+
+
+  renderCanvas(ctx: CanvasRenderingContext2D, size: PanelSize) {
+    const details = globals.sliceDetails;
+    // Show expanded details on the scheduling of the currently selected slice.
+    if (details.wakeupTs && details.wakerUtid !== undefined) {
+      const threadInfo = globals.threads.get(details.wakerUtid);
+      // Draw diamond and vertical line.
+      const startDraw = {x: size.width / 2 + 20, y: 52};
+      ctx.beginPath();
+      ctx.moveTo(startDraw.x, startDraw.y + 28);
+      ctx.fillStyle = 'black';
+      ctx.lineTo(startDraw.x + 6, startDraw.y + 20);
+      ctx.lineTo(startDraw.x, startDraw.y + 12);
+      ctx.lineTo(startDraw.x - 6, startDraw.y + 20);
+      ctx.fill();
+      ctx.closePath();
+      ctx.fillRect(startDraw.x - 1, startDraw.y, 2, 100);
+
+      // Wakeup explanation text.
+      ctx.font = '13px Roboto Condensed';
+      ctx.fillStyle = '#3c4b5d';
+      if (threadInfo) {
+        const displayText = `Wakeup @ ${
+            timeToCode(
+                details.wakeupTs - globals.state.traceTime.startSec)} on CPU ${
+            details.wakerCpu} by`;
+        const processText = `P: ${threadInfo.procName} [${threadInfo.pid}]`;
+        const threadText = `T: ${threadInfo.threadName} [${threadInfo.tid}]`;
+        ctx.fillText(displayText, startDraw.x + 20, startDraw.y + 20);
+        ctx.fillText(processText, startDraw.x + 20, startDraw.y + 37);
+        ctx.fillText(threadText, startDraw.x + 20, startDraw.y + 55);
+      }
+
+      // Draw latency arrow and explanation text.
+      drawDoubleHeadedArrow(ctx, startDraw.x, startDraw.y + 80, 60, true);
+      if (details.ts) {
+        const displayLatency = `Scheduling latency: ${
+            timeToCode(
+                details.ts -
+                (details.wakeupTs - globals.state.traceTime.startSec))}`;
+        ctx.fillText(displayLatency, startDraw.x + 70, startDraw.y + 86);
+        const explain1 =
+            'This is the interval from when the task became eligible to run';
+        const explain2 =
+            '(e.g. because of notifying a wait queue it was suspended on) to';
+        const explain3 = 'when it started running.';
+        ctx.font = '10px Roboto Condensed';
+        ctx.fillText(explain1, startDraw.x + 70, startDraw.y + 86 + 16);
+        ctx.fillText(explain2, startDraw.x + 70, startDraw.y + 86 + 16 + 12);
+        ctx.fillText(explain3, startDraw.x + 70, startDraw.y + 86 + 16 + 24);
+      }
+    }
+  }
+}
diff --git a/ui/src/frontend/slice_panel.ts b/ui/src/frontend/slice_panel.ts
index e83c9bf..3f25b95 100644
--- a/ui/src/frontend/slice_panel.ts
+++ b/ui/src/frontend/slice_panel.ts
@@ -1,4 +1,4 @@
-// Copyright (C) 2019 The Android Open Source Project
+// Copyright (C) 2021 The Android Open Source Project
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use size file except in compliance with the License.
@@ -12,152 +12,15 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-import * as m from 'mithril';
-
-import {Actions} from '../common/actions';
-import {drawDoubleHeadedArrow} from '../common/canvas_utils';
-import {translateState} from '../common/thread_state';
 import {timeToCode, toNs} from '../common/time';
 
-import {globals, SliceDetails, ThreadDesc} from './globals';
-import {Panel, PanelSize} from './panel';
-import {scrollToTrackAndTs} from './scroll_helper';
+import {globals} from './globals';
+import {Panel} from './panel';
 
-export class SliceDetailsPanel extends Panel {
-  view() {
-    const sliceInfo = globals.sliceDetails;
-    if (sliceInfo.utid === undefined) return;
-    const threadInfo = globals.threads.get(sliceInfo.utid);
-
-    return m(
-        '.details-panel',
-        m('.details-panel-heading',
-          m('h2.split', `Slice Details`),
-          (sliceInfo.wakeupTs && sliceInfo.wakerUtid) ?
-              m('h2.split', 'Scheduling Latency') :
-              ''),
-        this.getDetails(sliceInfo, threadInfo));
+export abstract class SlicePanel extends Panel {
+  protected computeDuration(ts: number, dur: number) {
+    return toNs(dur) === -1 ?
+        `${globals.state.traceTime.endSec - ts} (Did not end)` :
+        timeToCode(dur);
   }
-
-  getDetails(sliceInfo: SliceDetails, threadInfo: ThreadDesc|undefined) {
-    if (!threadInfo || sliceInfo.ts === undefined ||
-        sliceInfo.dur === undefined) {
-      return null;
-    } else {
-      return m(
-          '.details-table',
-          m('table.half-width',
-            [
-              m('tr',
-                m('th', `Process`),
-                m('td', `${threadInfo.procName} [${threadInfo.pid}]`)),
-              m('tr',
-                m('th', `Thread`),
-                m('td',
-                  `${threadInfo.threadName} [${threadInfo.tid}]`,
-                  m('i.material-icons.grey',
-                    {onclick: () => this.goToThread(), title: 'Go to thread'},
-                    'call_made'))),
-              m('tr', m('th', `Cmdline`), m('td', threadInfo.cmdline)),
-              m('tr',
-                m('th', `Start time`),
-                m('td', `${timeToCode(sliceInfo.ts)}`)),
-              m('tr',
-                m('th', `Duration`),
-                m('td', `${timeToCode(sliceInfo.dur)}`)),
-              m('tr', m('th', `Prio`), m('td', `${sliceInfo.priority}`)),
-              m('tr',
-                m('th', `End State`),
-                m('td', translateState(sliceInfo.endState))),
-              m('tr',
-                m('th', `Slice ID`),
-                m('td', sliceInfo.id ? sliceInfo.id.toString() : 'Unknown'))
-            ]),
-      );
-    }
-  }
-
-  goToThread() {
-    const sliceInfo = globals.sliceDetails;
-    if (sliceInfo.utid === undefined) return;
-    const threadInfo = globals.threads.get(sliceInfo.utid);
-
-    if (sliceInfo.id === undefined || sliceInfo.ts === undefined ||
-        sliceInfo.dur === undefined || sliceInfo.cpu === undefined ||
-        threadInfo === undefined) {
-      return;
-    }
-
-    let trackId: string|number|undefined;
-    for (const track of Object.values(globals.state.tracks)) {
-      if (track.kind === 'ThreadStateTrack' &&
-          (track.config as {utid: number}).utid === threadInfo.utid) {
-        trackId = track.id;
-      }
-    }
-
-    if (trackId && sliceInfo.threadStateId) {
-      globals.makeSelection(Actions.selectThreadState({
-        id: sliceInfo.threadStateId,
-        trackId: trackId.toString(),
-      }));
-
-      scrollToTrackAndTs(
-          trackId, toNs(sliceInfo.ts + globals.state.traceTime.startSec), true);
-    }
-  }
-
-
-  renderCanvas(ctx: CanvasRenderingContext2D, size: PanelSize) {
-    const details = globals.sliceDetails;
-    // Show expanded details on the scheduling of the currently selected slice.
-    if (details.wakeupTs && details.wakerUtid !== undefined) {
-      const threadInfo = globals.threads.get(details.wakerUtid);
-      // Draw diamond and vertical line.
-      const startDraw = {x: size.width / 2 + 20, y: 52};
-      ctx.beginPath();
-      ctx.moveTo(startDraw.x, startDraw.y + 28);
-      ctx.fillStyle = 'black';
-      ctx.lineTo(startDraw.x + 6, startDraw.y + 20);
-      ctx.lineTo(startDraw.x, startDraw.y + 12);
-      ctx.lineTo(startDraw.x - 6, startDraw.y + 20);
-      ctx.fill();
-      ctx.closePath();
-      ctx.fillRect(startDraw.x - 1, startDraw.y, 2, 100);
-
-      // Wakeup explanation text.
-      ctx.font = '13px Roboto Condensed';
-      ctx.fillStyle = '#3c4b5d';
-      if (threadInfo) {
-        const displayText = `Wakeup @ ${
-            timeToCode(
-                details.wakeupTs - globals.state.traceTime.startSec)} on CPU ${
-            details.wakerCpu} by`;
-        const processText = `P: ${threadInfo.procName} [${threadInfo.pid}]`;
-        const threadText = `T: ${threadInfo.threadName} [${threadInfo.tid}]`;
-        ctx.fillText(displayText, startDraw.x + 20, startDraw.y + 20);
-        ctx.fillText(processText, startDraw.x + 20, startDraw.y + 37);
-        ctx.fillText(threadText, startDraw.x + 20, startDraw.y + 55);
-      }
-
-      // Draw latency arrow and explanation text.
-      drawDoubleHeadedArrow(ctx, startDraw.x, startDraw.y + 80, 60, true);
-      if (details.ts) {
-        const displayLatency = `Scheduling latency: ${
-            timeToCode(
-                details.ts -
-                (details.wakeupTs - globals.state.traceTime.startSec))}`;
-        ctx.fillText(displayLatency, startDraw.x + 70, startDraw.y + 86);
-        const explain1 =
-            'This is the interval from when the task became eligible to run';
-        const explain2 =
-            '(e.g. because of notifying a wait queue it was suspended on) to';
-        const explain3 = 'when it started running.';
-        ctx.font = '10px Roboto Condensed';
-        ctx.fillText(explain1, startDraw.x + 70, startDraw.y + 86 + 16);
-        ctx.fillText(explain2, startDraw.x + 70, startDraw.y + 86 + 16 + 12);
-        ctx.fillText(explain3, startDraw.x + 70, startDraw.y + 86 + 16 + 24);
-      }
-    }
-  }
-}
+}
\ No newline at end of file
diff --git a/ui/src/frontend/track.ts b/ui/src/frontend/track.ts
index fdef99b..fed8355 100644
--- a/ui/src/frontend/track.ts
+++ b/ui/src/frontend/track.ts
@@ -13,11 +13,13 @@
 // limitations under the License.
 
 import * as m from 'mithril';
+
 import {assertExists} from '../base/logging';
+import {Engine} from '../common/engine';
 import {TrackState} from '../common/state';
 import {TrackData} from '../common/track_data';
-import {checkerboard} from './checkerboard';
 
+import {checkerboard} from './checkerboard';
 import {globals} from './globals';
 import {TrackButtonAttrs} from './track_panel';
 
@@ -26,6 +28,7 @@
  */
 export interface NewTrackArgs {
   trackId: string;
+  engine: Engine;
 }
 
 /**
@@ -57,6 +60,7 @@
 export abstract class Track<Config = {}, Data extends TrackData = TrackData> {
   // The UI-generated track ID (not to be confused with the SQL track.id).
   private trackId: string;
+  protected readonly engine: Engine;
 
   // Caches the last state.track[this.trackId]. This is to deal with track
   // deletion, see comments in trackState() below.
@@ -64,6 +68,7 @@
 
   constructor(args: NewTrackArgs) {
     this.trackId = args.trackId;
+    this.engine = args.engine;
     this.lastTrackState = assertExists(globals.state.tracks[this.trackId]);
   }
 
diff --git a/ui/src/frontend/track_group_panel.ts b/ui/src/frontend/track_group_panel.ts
index 2171bbf..e842d94 100644
--- a/ui/src/frontend/track_group_panel.ts
+++ b/ui/src/frontend/track_group_panel.ts
@@ -49,14 +49,18 @@
   private readonly trackGroupId: string;
   private shellWidth = 0;
   private backgroundColor = '#ffffff';  // Updated from CSS later.
-  private summaryTrack: Track;
+  private summaryTrack: Track|undefined;
 
   constructor({attrs}: m.CVnode<Attrs>) {
     super();
     this.trackGroupId = attrs.trackGroupId;
     const trackCreator = trackRegistry.get(this.summaryTrackState.kind);
-    this.summaryTrack =
-        trackCreator.create({trackId: this.summaryTrackState.id});
+    const engineId = this.summaryTrackState.engineId;
+    const engine = globals.engines.get(engineId);
+    if (engine !== undefined) {
+      this.summaryTrack =
+          trackCreator.create({trackId: this.summaryTrackState.id, engine});
+    }
   }
 
   get trackGroupState(): TrackGroupState {
diff --git a/ui/src/frontend/track_panel.ts b/ui/src/frontend/track_panel.ts
index c89b061..084342b 100644
--- a/ui/src/frontend/track_panel.ts
+++ b/ui/src/frontend/track_panel.ts
@@ -279,26 +279,44 @@
 }
 
 export class TrackPanel extends Panel<TrackPanelAttrs> {
-  private track: Track;
-  private trackState: TrackState;
+  // TODO(hjd): It would be nicer if these could not be undefined here.
+  // We should implement a NullTrack which can be used if the trackState
+  // has disappeared.
+  private track: Track|undefined;
+  private trackState: TrackState|undefined;
+
   constructor(vnode: m.CVnode<TrackPanelAttrs>) {
     super();
     const trackId = vnode.attrs.id;
-    this.trackState = globals.state.tracks[trackId];
-    const trackCreator = trackRegistry.get(this.trackState.kind);
-    this.track = trackCreator.create({trackId});
+    const trackState = globals.state.tracks[trackId];
+    if (trackState === undefined) {
+      return;
+    }
+    const engine = globals.engines.get(trackState.engineId);
+    if (engine === undefined) {
+      return;
+    }
+    const trackCreator = trackRegistry.get(trackState.kind);
+    this.track = trackCreator.create({trackId, engine});
+    this.trackState = trackState;
   }
 
   view() {
+    if (this.track === undefined || this.trackState === undefined) {
+      return m('div', 'No such track');
+    }
     return m(TrackComponent, {trackState: this.trackState, track: this.track});
   }
 
   highlightIfTrackSelected(ctx: CanvasRenderingContext2D, size: PanelSize) {
     const localState = globals.frontendLocalState;
     const selection = globals.state.currentSelection;
-    if (!selection || selection.kind !== 'AREA') return;
+    const trackState = this.trackState;
+    if (!selection || selection.kind !== 'AREA' || trackState === undefined) {
+      return;
+    }
     const selectedArea = globals.state.areas[selection.areaId];
-    if (selectedArea.tracks.includes(this.trackState.id)) {
+    if (selectedArea.tracks.includes(trackState.id)) {
       const timeScale = localState.timeScale;
       ctx.fillStyle = 'rgba(131, 152, 230, 0.3)';
       ctx.fillRect(
@@ -320,7 +338,9 @@
         size.height);
 
     ctx.translate(TRACK_SHELL_WIDTH, 0);
-    this.track.render(ctx);
+    if (this.track !== undefined) {
+      this.track.render(ctx);
+    }
     ctx.restore();
 
     this.highlightIfTrackSelected(ctx, size);
@@ -392,6 +412,9 @@
 
   getSliceRect(tStart: number, tDur: number, depth: number): SliceRect
       |undefined {
+    if (this.track === undefined) {
+      return undefined;
+    }
     return this.track.getSliceRect(tStart, tDur, depth);
   }
 }
diff --git a/ui/src/tracks/thread_state/controller.ts b/ui/src/tracks/thread_state/controller.ts
index ca8355f..bc7c917 100644
--- a/ui/src/tracks/thread_state/controller.ts
+++ b/ui/src/tracks/thread_state/controller.ts
@@ -68,6 +68,7 @@
       select
         (ts + ${bucketNs / 2}) / ${bucketNs} * ${bucketNs} as tsq,
         ts,
+        state = 'S' as is_sleep,
         max(dur) as dur,
         ifnull(cast(cpu as integer), -1) as cpu,
         state,
@@ -77,7 +78,7 @@
       where
         ts >= ${startNs - this.maxDurNs} and
         ts <= ${endNs}
-      group by tsq
+      group by tsq, is_sleep
       order by tsq
     `;
 
diff --git a/ui/src/tracks/thread_state/frontend.ts b/ui/src/tracks/thread_state/frontend.ts
index 62cd005..9372588 100644
--- a/ui/src/tracks/thread_state/frontend.ts
+++ b/ui/src/tracks/thread_state/frontend.ts
@@ -69,6 +69,14 @@
     ctx.font = '10px Roboto Condensed';
 
     for (let i = 0; i < data.starts.length; i++) {
+      // NOTE: Unlike userspace and scheduling slices, thread state slices are
+      // allowed to overlap; specifically, sleeping slices are allowed to
+      // overlap with non-sleeping slices. We do this because otherwise
+      // sleeping slices generally dominate traces making it seem like there are
+      // no running/runnable etc. slices until you zoom in. By drawing both,
+      // we get a more accurate representation of the trace and prevent weird
+      // artifacts when zooming.
+      // See b/201793731 for an example of why we do this.
       const tStart = data.starts[i];
       const tEnd = data.ends[i];
       const state = data.strings[data.state[i]];
@@ -80,6 +88,7 @@
       if (state === 'x') continue;
       const rectStart = timeScale.timeToPx(tStart);
       const rectEnd = timeScale.timeToPx(tEnd);
+      const rectWidth = rectEnd - rectStart;
 
       const currentSelection = globals.state.currentSelection;
       const isSelected = currentSelection &&
@@ -94,7 +103,6 @@
       }
       ctx.fillStyle = colorStr;
 
-      const rectWidth = rectEnd - rectStart;
       ctx.fillRect(rectStart, MARGIN_TOP, rectWidth, RECT_HEIGHT);
 
       // Don't render text when we have less than 10px to play with.