[sysmem] Add sysmem unused_page_check
We'll be checking unused pages to make sure they're
not being modified by DMA-write-after-free. The
logging to Cobalt will be buffered to avoid spamming
Cobalt.
Bug: fxbug.dev/86491
Change-Id: I5bd391900de0f3d56fdf59e34e54916dc5938e15
Reviewed-on: https://fuchsia-review.googlesource.com/c/cobalt-registry/+/594001
Reviewed-by: Cameron Dale <camrdale@google.com>
Reviewed-by: John Bauman <jbauman@google.com>
Privacy-Approval: Cameron Dale <camrdale@google.com>
Commit-Queue: Dustin Green <dustingreen@google.com>
(cherry picked from commit e315d0481fb36fe978b75f2e53b855d66ce5f4a1)
diff --git a/fuchsia/media/metrics.yaml b/fuchsia/media/metrics.yaml
index f62d037..fc06267 100644
--- a/fuchsia/media/metrics.yaml
+++ b/fuchsia/media/metrics.yaml
@@ -392,7 +392,6 @@
meta_data:
max_release_stage: GA
expiration_date: "2021/08/18"
- # also_log_locally: true
################################################################################
# audio_objects_created
@@ -624,7 +623,6 @@
meta_data:
max_release_stage: GA
expiration_date: "2021/08/18"
- # also_log_locally: true
- id: 109
metric_name: stream_processor_events_2_migrated
@@ -665,7 +663,6 @@
meta_data:
max_release_stage: GA
expiration_date: "2021/08/18"
- # also_log_locally: true
################################################################################
# audio_thermal_state_duration
diff --git a/fuchsia/sysmem/metrics.yaml b/fuchsia/sysmem/metrics.yaml
new file mode 100644
index 0000000..3809b24
--- /dev/null
+++ b/fuchsia/sysmem/metrics.yaml
@@ -0,0 +1,139 @@
+# Copyright 2021 The Fuchsia Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+################################################################################
+# Cobalt Project: sysmem
+################################################################################
+
+metric_definitions:
+
+#####################################################################
+# Next Metric ID: 3
+#####################################################################
+
+################################################################################
+# unused_page_check_old
+#
+# TODO: Log directly to unused_page_check, once it's clear we won't be merging
+# this (and associated CLs) into any branch that wants to log via 1.0.
+#
+# When a sysmem page becomes unused, sysmem puts a pattern into the page. When
+# the pattern is later checked, this metric tracks successful page checks and
+# failed page checks.
+#
+# Pages are checked when allocations occur, and every 10 minutes. Successful
+# checks are buffered for 30 minutes before being flushed to Cobalt. Failed
+# checks shouldn't happen, but if they do, they're buffered for 5 seconds before
+# flushing to Cobalt.
+#
+# Expected frequency: Batch logging frequency capped at no more than once every
+# 5 seconds.
+################################################################################
+- id: 1
+ replacement_metric_id: 2
+ metric_name: unused_page_check_old
+ metric_type: EVENT_COUNT
+ metric_dimensions: &upc_dimensions
+ - dimension: event
+ event_codes:
+ # We performed a pattern check (typically of several pages) and the
+ # pattern was intact ...
+ 1: PatternCheckOk
+ # ... or the pattern check was not intact.
+ 2: PatternCheckFailed
+ # Pattern checking is enabled and can log to Cobalt. Basically we get
+ # this event from a device as soon as sysmem has a way to create a
+ # cobalt logger iff the device has pattern checking enabled. The intent
+ # here is as a sanity check that we're getting full stack metric
+ # connectivity (for this particular metric) even if many devices don't
+ # see much sysmem churn.
+ 3: Connectivity
+ reports:
+ - report_name: device_histogram
+ id: 1
+ report_type: PER_DEVICE_HISTOGRAM
+ int_buckets: &upcdh_buckets
+ exponential:
+ floor: 0
+ num_buckets: 48
+ initial_step: 1
+ step_multiplier: 2
+ window_size: [1]
+ aggregation_type: SUM
+ system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
+ - report_name: device_stats
+ id: 2
+ report_type: PER_DEVICE_NUMERIC_STATS
+ window_size: [1]
+ aggregation_type: SUM
+ system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
+ - report_name: fleet_histogram
+ id: 3
+ report_type: INT_RANGE_HISTOGRAM
+ int_buckets: &upcfh_buckets
+ exponential:
+ floor: 0
+ num_buckets: 63
+ initial_step: 1
+ step_multiplier: 2
+ system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
+ - report_name: fleet_stats
+ id: 4
+ report_type: NUMERIC_AGGREGATION
+ percentiles: [0, 1, 2, 3, 4, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 96, 97, 98, 99, 100]
+ system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
+ meta_data:
+ max_release_stage: GA
+ expiration_date: "2022/10/14"
+
+################################################################################
+# unused_page_check
+#
+# When a sysmem page becomes unused, sysmem puts a pattern into the page. When
+# the pattern is later checked, this metric tracks successful page checks and
+# failed page checks.
+#
+# Pages are checked when allocations occur, and every 10 minutes. Successful
+# checks are buffered for 30 minutes before being flushed to Cobalt. Failed
+# checks shouldn't happen, but if they do, they're buffered for 5 seconds before
+# flushing to Cobalt.
+#
+# Expected frequency: Batch logging frequency capped at no more than once every
+# 5 seconds.
+################################################################################
+- id: 2
+ metric_name: unused_page_check
+ metric_type: OCCURRENCE
+ metric_semantics: [USAGE_COUNTING]
+ metric_dimensions: *upc_dimensions
+ reports:
+ - report_name: device_histogram
+ id: 1
+ report_type: UNIQUE_DEVICE_HISTOGRAMS
+ system_profile_selection: SELECT_LAST
+ int_buckets: *upcdh_buckets
+ local_aggregation_period: WINDOW_1_DAY
+ privacy_level: NO_ADDED_PRIVACY
+ system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
+ - report_name: device_stats
+ id: 2
+ report_type: UNIQUE_DEVICE_NUMERIC_STATS
+ local_aggregation_period: WINDOW_1_DAY
+ privacy_level: NO_ADDED_PRIVACY
+ system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
+ - report_name: fleet_histogram
+ id: 3
+ report_type: HOURLY_VALUE_HISTOGRAMS
+ system_profile_selection: SELECT_LAST
+ int_buckets: *upcfh_buckets
+ privacy_level: NO_ADDED_PRIVACY
+ system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
+ - report_name: fleet_stats
+ id: 4
+ report_type: HOURLY_VALUE_NUMERIC_STATS
+ privacy_level: NO_ADDED_PRIVACY
+ system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
+ meta_data:
+ max_release_stage: GA
+ expiration_date: "2022/10/14"
diff --git a/projects.yaml b/projects.yaml
index 9f85098..7dcf774 100644
--- a/projects.yaml
+++ b/projects.yaml
@@ -26,7 +26,7 @@
# When adding new projects, also make sure to update the internal list of
# Cobalt projects: go/cobalt-internal-registry-metadata
- # Next project ID: 15
+ # Next project ID: 16
projects:
################################################################################
@@ -111,3 +111,6 @@
- project_name: audio_drivers
project_id: 14
project_contact: fuchsia-cobalt-dashboard-owners@google.com
+ - project_name: sysmem
+ project_id: 15
+ project_contact: fuchsia-cobalt-dashboard-owners@google.com