blob: b551c5336070c8d87744be0a4a5fa110a614c6c2 [file] [log] [blame]
# Copyright 2019 The Fuchsia Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
################################################################################
# Cobalt Project: local_storage
################################################################################
metric_definitions:
################################################################################
# Corruption Event
#
# A corruption event logs a '1' every time a component detects corruption. This
# can be either data or metadata corruption depending on the source.
################################################################################
- id: 48
metric_name: corruption_events_migrated
# This will always log a single event (1).
metric_type: OCCURRENCE
metric_semantics: [USAGE_COUNTING]
metric_dimensions:
- dimension: "source"
event_codes:
0: Unknown
1: Fvm
2: BlobFs
3: MinFs
4: FxFs
- dimension: "corruption_type"
event_codes:
0: Unknown
1: Data
2: Metadata
reports:
############################################################################
# A fleet-wide summary.
############################################################################
- report_name: corruption_per_device
id: 2
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Compression format
#
# Total file sizes (in bytes) per compression format. Calculated on mount.
#
# Currently only supported by blobfs. When blobfs is starting up after boot, we
# compute the total (uncompressed) sizes of all blobs per compression format,
# and log the counts to Cobalt.
#
################################################################################
- id: 49
metric_name: compression_format_migrated
metric_type: INTEGER
metric_units: BYTES
metric_semantics: [DATA_SIZE]
metric_dimensions:
- dimension: "source"
event_codes:
0: Unknown
1: BlobFs
- dimension: "format"
event_codes:
0: Unknown
1: Uncompressed
2: LZ4
3: ZSTD
4: ZSTDSeekable
5: ZSTDChunked
reports:
############################################################################
# A per-device histogram report.
############################################################################
- report_name: compression_per_device_histogram
id: 1
report_type: UNIQUE_DEVICE_HISTOGRAMS
system_profile_selection: SELECT_LAST
##########################################################################
# Since we only log the count once per boot, we use MAX here in case a
# device reboots several times during the day.
##########################################################################
local_aggregation_procedure: MAX_PROCEDURE
int_buckets:
linear:
step_size: 20971520 # 20MB
num_buckets: 100
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Page fault latency
#
# Histogram of latencies in nanoseconds for satisfying a page fault.
#
# Each time a page fault occurs (a client accesses a not-present page in a
# pager-backed VMO), the latency of the userspace fault handling is recorded
# and its latency (in nanoseconds) is added to the histogram.
#
# Blobfs currently flushes this metric to Cobalt every five minutes (at the
# same time all other metrics are flushed.)
#
# Currently only supported by blobfs, and only when blobfs.userpager=true is set
# in the build.
#
################################################################################
- id: 50
metric_name: page_fault_latency_migrated
metric_type: INTEGER_HISTOGRAM
metric_units: NANOSECONDS
metric_semantics: [LATENCY]
int_buckets:
exponential:
floor: 0
num_buckets: 15
initial_step: 10000
step_multiplier: 2
metric_dimensions:
- dimension: "source"
event_codes:
0: Unknown
1: BlobFs
- dimension: "format"
event_codes:
0: Unknown
1: Uncompressed
2: LZ4
3: ZSTD
4: ZSTDSeekable
5: ZSTDChunked
- dimension: "read_ahead_size"
event_codes:
0: 8KiB
1: 16KiB
2: 32KiB
3: 64KiB
4: 128KiB
5: 256KiB
6: 512KiB
7: LargerThan512KiB
reports:
############################################################################
# A fleet-wide histogram report
############################################################################
- report_name: page_fault_latency_histogram
id: 1
report_type: FLEETWIDE_HISTOGRAMS
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# On-disk version
#
# The component string field tracks the version in one of two formats:
# * `${major}/${oldest_rev}` for Blobfs, Minfs
# * `${major}.${minor}` for Fxfs
# A dimension is used to track the real storage sub-component. The enumeration of possible version
# number combinations is tracked by versions.txt. This metric is incremented upon every mount.
################################################################################
- id: 61
metric_name: version_migrated
metric_type: STRING
string_candidate_file: fuchsia/local_storage/versions.txt
metric_semantics: [USAGE_COUNTING]
metric_dimensions:
- &source_dimension
dimension: source
event_codes:
0: unknown
1: fvm
2: blobfs
3: minfs
4: fxfs
reports:
- report_name: version_mount_counts
id: 1
report_type: STRING_COUNTS
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
string_buffer_max: 10
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
#####################################################################
# Total inodes
#
# Calculated on mount.
#
# Total number of inodes, either free or in use, in the system.
# The metrics will help us understand the size of inode table. This number in
# might also help understand other fragmentation metrics, namely file_in_use and
# extent_containers_in_use better.
#####################################################################
- id: 62
metric_name: total_inodes_migrated
metric_type: INTEGER
metric_dimensions: &fragmentation_dimensions
- *source_dimension
metric_units_other: "inodes"
metric_semantics: [MEMORY_USAGE]
reports:
- report_name: total_inodes_count
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
#####################################################################
# Files in use
#
# Calculated on mount.
#
# Total number of files, (or blobs in case of blobfs), in use in the system.
# This number may vary largely across builds or when the device has additional
# packages installed other than system base packages.
#####################################################################
- id: 63
metric_name: file_in_use_migrated
metric_type: INTEGER
metric_dimensions: *fragmentation_dimensions
metric_units_other: "files"
metric_semantics: [MEMORY_USAGE]
reports:
- report_name: files_in_use_count
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
#####################################################################
# Extent containers in use
#
# Calculated on mount.
#
# File/blob may use one or more extents, a pointer to a set of blocks. Extent
# containers are collection of one or more extents. Large the number of extent
# containers in use indicate higher fragmentation of the storage.
#####################################################################
- id: 64
metric_name: extent_containers_in_use_migrated
metric_type: INTEGER
metric_dimensions: *fragmentation_dimensions
metric_units_other: "extent containers"
metric_semantics: [MEMORY_USAGE]
reports:
- report_name: extent_containers_in_use_count
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Extents per file fragmentation
#
# A histogram of extents used by a file/blob calculated on mount.
#
# The histogram presents fragmentation at file/blob level. This metric will help
# in understanding how defragmentation of certain files will help either to
# recover extents or to improve read performance.
################################################################################
- id: 65
metric_name: extents_per_file_migrated
metric_type: INTEGER_HISTOGRAM
metric_dimensions: *fragmentation_dimensions
metric_units_other: "extent containers"
metric_semantics: [MEMORY_USAGE]
int_buckets:
exponential:
floor: 0
num_buckets: 10
initial_step: 10
step_multiplier: 2
reports:
############################################################################
# A fleet-wide histogram report
############################################################################
- report_name: fragmentation_extents_per_file_histogram
id: 1
report_type: FLEETWIDE_HISTOGRAMS
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# In use block fragmentation
#
# A histogram of used contiguous blocks (representing contiguous offsets of a
# file) calculated on mount.
#
# The histogram shows used block fragmentation. This metric will help
# in understand the effects of block allocation policy. The metric will also
# help in targeting what blocks to move if we decide to defragment the system.
################################################################################
- id: 66
metric_name: in_use_fragments_migrated
metric_type: INTEGER_HISTOGRAM
metric_dimensions: *fragmentation_dimensions
metric_units_other: "fragments"
metric_semantics: [MEMORY_USAGE]
int_buckets:
exponential:
floor: 0
num_buckets: 10
initial_step: 10
step_multiplier: 2
reports:
############################################################################
# A fleet-wide histogram report
############################################################################
- report_name: fragmentation_in_use_fragments_histogram
id: 1
report_type: FLEETWIDE_HISTOGRAMS
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Free fragments.
#
# A histogram of contiguous free blocks calculated on each mount.
#
# The histogram shows free block fragmentation. This metric will help
# in understand likelyhood of running out of extents during a OTA.
################################################################################
- id: 67
metric_name: free_fragments_migrated
metric_type: INTEGER_HISTOGRAM
metric_dimensions: *fragmentation_dimensions
metric_units_other: "fragments"
metric_semantics: [MEMORY_USAGE]
int_buckets:
exponential:
floor: 0
num_buckets: 10
initial_step: 10
step_multiplier: 2
reports:
############################################################################
# A fleet-wide histogram report
############################################################################
- report_name: fragmentation_free_fragments_histogram
id: 1
report_type: FLEETWIDE_HISTOGRAMS
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Vnode Operation Latency
#
# A histogram of the latency of Vnode-level operations, in microseconds, logged
# every few minutes. This is a replacement for metrics 35-47.
################################################################################
- id: 70
metric_name: vnode_operation_latency
metric_type: INTEGER_HISTOGRAM
metric_dimensions: &vnode_op_dimensions
- *source_dimension
- dimension: operation
event_codes:
0: unknown
1: append
2: close
3: create
4: get_attr
5: link
6: lookup
7: read
8: read_dir
9: set_attr
10: sync
11: truncate
12: unlink
13: write
metric_units: MICROSECONDS
metric_semantics: [LATENCY]
int_buckets:
exponential:
floor: 0
initial_step: 5
step_multiplier: 2
num_buckets: 16
reports:
############################################################################
# A fleet-wide histogram report
############################################################################
- report_name: histogram
id: 1
report_type: FLEETWIDE_HISTOGRAMS
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
# Next ID: 71
################################################################################
# FTL and Nand Metrics
# Reserved Range: [2000, 3000)
################################################################################
# FTL: Wear Count
#
# A counter representing the highest wear count of the FTL driver.
#
# A NAND block has an expected lifetime in erase cycles. An erase must occur
# before anything can be written (updating data).
#
# This count is the highest number of erase cycles per block, across all
# erase blocks in the NAND device.
#
# This metric allows observing how the FTL wear leveling algorithm is performing,
# such that the device lifetime is maximized, because the higher the wear the
# more probable the block is expected to fail.
#
# Sampling Frequency: 1 sample per hour.
################################################################################
- id: 2003
metric_name: ftl_wear_count_migrated
metric_type: INTEGER
metric_units_other: wear count
metric_semantics: [USAGE_COUNTING]
reports:
############################################################################
# A fleet-wide summary.
############################################################################
- report_name: ftl_wear_count_per_device
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
- report_name: ftl_wear_count_histogram_per_device
id: 2
report_type: FLEETWIDE_HISTOGRAMS
int_buckets:
exponential:
floor: 0
num_buckets: 20
initial_step: 1
step_multiplier: 2
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# FTL: Block Operation Count
#
# A counter representing the number of block operations the FTL receives.
#
# Allows measuring the IO pressure on the FTL. When combined with the
# |ftl_nand_operation_count| an approximate operation amplification
# can be estimated.
#
# Sampling Frequency: 1 sample per hour.
#
# |operation_type|: Refers to the type of block operation received in the FTL Layer.
#
################################################################################
- id: 2004
metric_name: ftl_block_operation_count_migrated
metric_type: INTEGER
metric_semantics: [USAGE_COUNTING]
metric_units_other: "block operations"
metric_dimensions:
- dimension: operation_type
event_codes:
0: Unknown
1: BLOCK_READ
2: BLOCK_WRITE
3: BLOCK_FLUSH
4: BLOCK_TRIM
reports:
############################################################################
# A fleet-wide summary.
############################################################################
- report_name: ftl_block_operation_per_device
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: SUM_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
- report_name: ftl_block_max_operations_per_device
id: 2
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
- report_name: ftl_block_operation_histogram_per_device
id: 3
report_type: FLEETWIDE_HISTOGRAMS
int_buckets:
exponential:
floor: 0
num_buckets: 20
initial_step: 1
step_multiplier: 2
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# FTL: NAND Operation Count
#
# A counter representing the number of block operations the FTL receives.
#
# Allows measuring the IO pressure on the FTL. When combined with the
# |ftl_nand_operation_count| an approximate operation amplification
# can be estimated.
#
# Sampling Frequency: 1 sample per hour.
#
# |operation_type|: Refers to the type of NAND operation issued to the NAND layer by the ftl,
# in response to a given operation type.
# |source_operation_type|: Refers to the type of block operation received in the FTL Layer.
#
#
################################################################################
- id: 2005
metric_name: ftl_nand_operation_count_migrated
metric_type: INTEGER
metric_units_other: "nand operations"
metric_semantics: [USAGE_COUNTING]
metric_dimensions:
- dimension: operation_type
event_codes:
0: Unknown
1: NAND_READ
2: NAND_WRITE
3: NAND_ERASE
- dimension: source_operation_type
event_codes:
0: Unknown
1: BLOCK_READ
2: BLOCK_WRITE
3: BLOCK_FLUSH
4: BLOCK_TRIM
reports:
############################################################################
# A fleet-wide summary.
############################################################################
- report_name: ftl_nand_operations_per_device
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: SUM_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
- report_name: ftl_nand_operation_max_per_device
id: 2
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
- report_name: ftl_nand_operation_histogram_per_device
id: 3
report_type: FLEETWIDE_HISTOGRAMS
int_buckets:
exponential:
floor: 0
num_buckets: 20
initial_step: 1
step_multiplier: 2
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Nand bit flips corrected by ECC per call.
#
# A histogram of the number of bitflips correct by ECC on a read. Excess bit
# flips that cannot be corrected will be set as the max number of correctable
# plus one. For all current use cases, 30 is the max correctable bits.
#
# Provides insight into the frequency and extremity of bit flips observed.
################################################################################
- id: 2006
metric_name: nand_read_ecc_bit_flips
metric_type: INTEGER_HISTOGRAM
metric_semantics: [USAGE_COUNTING]
metric_units_other: "bitflips"
int_buckets:
linear:
# 0, 1, 2... 29, 30, 31
step_size: 1
num_buckets: 32
reports:
############################################################################
# A fleet-wide histogram of bit flips on each read.
############################################################################
- report_name: nand_ecc_bit_flips_histogram
id: 1
report_type: FLEETWIDE_HISTOGRAMS
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Repeated read attempts to nand before a successful read, or giving up.
#
# A histogram of the number of read attempts used on a single read, always at
# least 1 for succeeding on the first attempt, and if it gives up it reports
# MAX_ULONG and ends up in the overflow bucket.
#
# For insight into how many consecutive read attempts are actually productive,
# and how much benefit we get from our current maximum.
################################################################################
- id: 2007
metric_name: nand_read_attempts_per_read
metric_type: INTEGER_HISTOGRAM
metric_semantics: [USAGE_COUNTING]
metric_units_other: "attempts"
int_buckets:
exponential:
# 0, 1, 2, 4, 8... 64, 128
floor: 0
num_buckets: 9
initial_step: 1
step_multiplier: 2
reports:
############################################################################
# A fleet-wide histogram of the number of attempts required to read.
############################################################################
- report_name: attempts_histogram
id: 1
report_type: FLEETWIDE_HISTOGRAMS
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Number of failed read attempts to the nand.
#
# A count of how many read attempts are sent to the nand which result in
# failure.
#
# For insight into how fleetwide failures might be clustered in a subset of
# devices, and how bad some of those devices might be. Expecting daily values
# to be normally >1,000 but <100,000.
################################################################################
- id: 2008
metric_name: nand_read_attempt_failures
metric_type: OCCURRENCE
metric_semantics: [USAGE_COUNTING]
reports:
############################################################################
# A daily histogram of read failures per device.
############################################################################
- report_name: failures_histogram
id: 1
report_type: UNIQUE_DEVICE_HISTOGRAMS
local_aggregation_period: WINDOW_1_DAY
int_buckets:
exponential:
# 0, 1, 2, 4, 8... 32768, 65536
floor: 0
num_buckets: 18
initial_step: 1
step_multiplier: 2
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_selection: SELECT_LAST
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Number of failed reads where we exhausted all attempts
#
# A count of how many reads where no attempts sent to the nand succeed.
#
# For insight into how fleetwide failures might be clustered in a subset of
# devices, and how bad some of those devices might be. Expecting daily values
# to be normally >1 but <100. Could in theory be higher if instances of this
# didn't usually result in the system becoming inoperable.
################################################################################
- id: 2009
metric_name: nand_read_attempts_exhausted
metric_type: OCCURRENCE
metric_semantics: [USAGE_COUNTING]
reports:
############################################################################
# A daily histogram of exhausted read attempts
############################################################################
- report_name: exhausted_histogram
id: 1
report_type: UNIQUE_DEVICE_HISTOGRAMS
local_aggregation_period: WINDOW_1_DAY
int_buckets:
exponential:
# 0, 1, 2, 4, 8... 64, 128
floor: 0
num_buckets: 9
initial_step: 1
step_multiplier: 2
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_selection: SELECT_LAST
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Number of bad blocks found on a device
#
# A count of how many blocks are found bad as marked by the initial factory
# mark, and how many are found and marked as bad during run time.
#
# For insight into the fleetwide state regarding the availability of the spare
# block pools, determined by how many blocks are currently marked bad. Also the
# split of initial bad blocks vs running bad blocks. For current use-cases 44
# is really the maximum.
################################################################################
- id: 2010
metric_name: ftl_bad_blocks
metric_type: INTEGER
metric_semantics: [USAGE_COUNTING]
metric_units_other: "blocks"
metric_dimensions:
- dimension: bad_block_type
event_codes:
0: Unknown
1: INITIAL
2: RUNNING
reports:
############################################################################
# A daily histogram of bad block information per device.
############################################################################
- report_name: bad_blocks_histogram
id: 1
report_type: UNIQUE_DEVICE_HISTOGRAMS
local_aggregation_period: WINDOW_1_DAY
local_aggregation_procedure: MAX_PROCEDURE
int_buckets:
exponential:
# 0, 1, 2, 4, 8... 32, 64
floor: 0
num_buckets: 8
initial_step: 1
step_multiplier: 2
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_selection: SELECT_LAST
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Intermittent failures reading the last page in a map block
#
# Counts of occurrences where the FTL has encountered a failure reading a page
# in a map block that has subsequently succeeded. A dimension indicates the
# reason for the initial failure.
################################################################################
- id: 2011
metric_name: ftl_map_block_end_page_failures
metric_type: OCCURRENCE
metric_semantics: [USAGE_COUNTING]
metric_dimensions:
- dimension: reason
event_codes:
# Whilst using event code 0 is not recommended, the reporting code always
# reports a code and reason 0 is used. This metric is also intended to
# be temporary.
0: Reason 0 (invalid page)
1: Reason 1 (erased page)
2: Reason 2 (uncorrectable ecc error)
3: Reason 3 (bad block-count)
4: Reason 4 (bad map page number)
5: Reason 5 (reserved for future use)
6: Reason 6 (reserved for future use)
7: Reason 7 (reserved for future use)
8: Reason 8 (reserved for future use)
9: Reason 9 (reserved for future use)
reports:
############################################################################
# A daily histogram of bad block information per device.
############################################################################
- report_name: counts
id: 1
report_type: FLEETWIDE_OCCURRENCE_COUNTS
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
# Next ID: 2012
################################################################################
# FVM Metrics
# Reserved Range: [3000, 4000)
################################################################################
# FVM: Slice Allocation Counts
#
# Collects per-partition slice allocation counts from fvm. This assumes there are two fvm
# partitions - blobfs and minfs.
#
# Sampling Frequency: 1 sample per hour.
#
# |partition|: The partition allocating the slices.
################################################################################
- id: 3001
metric_name: fvm_slice_allocations_migrated
metric_type: OCCURRENCE
metric_semantics: [USAGE_COUNTING]
metric_dimensions:
- dimension: partition
event_codes:
0: Blobfs
1: Minfs
reports:
- report_name: fvm_slice_allocations
id: 3000
report_type: HOURLY_VALUE_NUMERIC_STATS
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
# Next ID: 3002
################################################################################
# Space Metrics
# Reserved Range: [4000, 5000)
################################################################################
# Data Bytes Used
#
# Used to track the number of bytes a partition is using for storing data.
# This is generally updated on a regular interval via Lapis, which queries
# filesystem information from fshost.
################################################################################
- id: 4001
metric_name: data_bytes_used_migrated
metric_type: INTEGER
metric_units: BYTES
metric_semantics: [USAGE_COUNTING]
metric_dimensions: &space_dimensions
- *source_dimension
reports:
- report_name: per_device_daily_max
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Data Bytes Allocated
#
# Used to track the number of bytes a partition has allocated (free + used) for
# storing data.
# This is generally updated on a regular interval via Lapis, which queries
# filesystem information from fshost.
################################################################################
- id: 4003
metric_name: data_bytes_allocated_migrated
metric_type: INTEGER
metric_units: BYTES
metric_semantics: [USAGE_COUNTING]
metric_dimensions: *space_dimensions
reports:
- report_name: per_device_daily_max
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Inodes Used
#
# Used to track the number of inodes a partition has in use.
# This is generally updated on a regular interval via Lapis, which queries
# filesystem information from fshost.
################################################################################
- id: 4005
metric_name: inodes_used_migrated
metric_type: INTEGER
metric_units_other: "inodes"
metric_semantics: [USAGE_COUNTING]
metric_dimensions: *space_dimensions
reports:
- report_name: per_device_daily_max
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Inodes Allocated
#
# Used to track the number of inodes a partition has capacity for (free + used).
# This is generally updated on a regular interval via Lapis, which queries
# filesystem information from fshost.
################################################################################
- id: 4007
metric_name: inodes_allocated_migrated
metric_type: INTEGER
metric_units_other: "inodes"
metric_semantics: [USAGE_COUNTING]
metric_dimensions: *space_dimensions
reports:
- report_name: per_device_daily_max
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Out-Of-Space Events
#
# Per-filesystem count of times a device fails to extend the underlying volume
# after running out of space.
#
# Can only increase at most 1 every 5 minutes, so the daily max is 288.
################################################################################
- id: 4008
metric_name: out_of_space_events
metric_type: OCCURRENCE
metric_semantics: [USAGE_COUNTING]
metric_dimensions:
- *source_dimension
reports:
- report_name: per_device_daily_counts
id: 1
report_type: UNIQUE_DEVICE_HISTOGRAMS
system_profile_selection: SELECT_LAST
local_aggregation_period: WINDOW_1_DAY
int_buckets:
exponential:
# 0, 1, 2, 4, 8... 64, 128, 256
floor: 0
num_buckets: 10
initial_step: 1
step_multiplier: 2
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
# Next ID: 4009
################################################################################
# Fxfs Metrics
# Reserved Range: [8000, 9000)
################################################################################
################################################################################
# Bytes Reserved
#
# Number of bytes which are internally reserved by Fxfs. See Fxfs'
# object_manager.rs for details.
#
# Updated via Sampler which (at the time of writing) polls every 5 minutes.
################################################################################
- id: 8000
metric_name: fxfs_bytes_reserved
metric_type: INTEGER
metric_units: BYTES
metric_semantics: [DATA_SIZE]
reports:
- report_name: per_device_daily_max
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
# Same as above, but as a percentage of the total disk size (integer, 0-100)
- id: 8001
metric_name: fxfs_bytes_reserved_percentage
metric_type: INTEGER
metric_units_other: "percent"
metric_semantics: [DATA_SIZE]
reports:
- report_name: per_device_daily_max
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Bytes Borrowed
#
# Percentage of reserved bytes which are used for borrows. The inverse of this
# percentage gives the percentage which is used for metadata. See Fxfs'
# object_manager.rs for details.
#
# Updated via Sampler which (at the time of writing) polls every 5 minutes.
################################################################################
- id: 8002
metric_name: fxfs_bytes_borrowed_to_reserved_percentage
metric_type: INTEGER
metric_units_other: "percent"
metric_semantics: [DATA_SIZE]
reports:
- report_name: per_device_daily_max
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Journal Size
#
# Current size of the journal in bytes. (The journal may be slightly larger in
# practice, but the extra bytes are discardable at any time and will usually be
# trimmed on the next transaction). See Fxfs' journal.rs for details.
#
# Updated via Sampler which (at the time of writing) polls every 5 minutes.
################################################################################
- id: 8003
metric_name: fxfs_journal_size
metric_type: INTEGER
metric_units: BYTES
metric_semantics: [DATA_SIZE]
reports:
- report_name: per_device_daily_max
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
# Same as above, but as a percentage of the total disk size (integer, 0-100).
- id: 8004
metric_name: fxfs_journal_size_percentage
metric_type: INTEGER
metric_units_other: "percent"
metric_semantics: [DATA_SIZE]
reports:
- report_name: per_device_daily_max
id: 1
report_type: UNIQUE_DEVICE_NUMERIC_STATS
local_aggregation_procedure: MAX_PROCEDURE
local_aggregation_period: WINDOW_1_DAY
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
# Next ID: 8005
################################################################################
# Temporary Metrics
# Reserved Range: [9000, 10000)
################################################################################
# Data partition upgrade
#
# Used to track the progress of a one-time data partition upgrade.
# Each `stage` is triggered as a one-off event when that stage begins, which
# occurs during boot. Since the overall procedure is intended to be a one-time
# event, this should only trigger once per device.
################################################################################
- id: 9003
metric_name: data_partition_upgrade_migrated
metric_type: OCCURRENCE
metric_semantics: [USAGE_COUNTING]
metric_dimensions:
- dimension: stage
event_codes:
0: Unknown
1: Skipped
2: DetectedFailedUpgrade
3: ReadOldData
4: WriteNewData
5: Done
max_event_code: 15
reports:
- report_name: counts
id: 1
report_type: FLEETWIDE_OCCURRENCE_COUNTS
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Recovered Out-Of-Space Events
#
# Indicates a successful volume extension after performing a corrective action.
# Used to track how many times a filesystem was successfully able to recover
# from a volume extension failure (e.g. by force-flushing the journal).
#
# Can only increase at most 1 every 5 minutes, so the daily max is 288.
################################################################################
- id: 9004
metric_name: recovered_space_from_sync_events
metric_type: OCCURRENCE
metric_semantics: [USAGE_COUNTING]
metric_dimensions:
- *source_dimension
reports:
- report_name: per_device_daily_counts
id: 1
report_type: UNIQUE_DEVICE_HISTOGRAMS
system_profile_selection: SELECT_LAST
local_aggregation_period: WINDOW_1_DAY
int_buckets:
exponential:
# 0, 1, 2, 4, 8... 64, 128, 256
floor: 0
num_buckets: 10
initial_step: 1
step_multiplier: 2
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
################################################################################
# Minfs to Fxfs migration Events
#
# Indicates the status of any attempted disk-based migration flows at boot time.
# Used to track how successful the minfs to fxfs workflow is and give an
# indication of how often it has failed (and fallen back to minfs) in the field.
#
# This maps from ZX_ERR_* codes to events.
################################################################################
- id: 9005
metric_name: minfs_to_fxfs_migration
metric_type: OCCURRENCE
metric_semantics: [USAGE_COUNTING]
metric_dimensions:
- dimension: err_code
event_codes:
0: SUCCESS
1: OUT_OF_SPACE
2: OTHER_ERROR
reports:
############################################################################
# A daily histogram of migration failures per device.
############################################################################
- report_name: failures_histogram
id: 1
report_type: UNIQUE_DEVICE_HISTOGRAMS
local_aggregation_period: WINDOW_1_DAY
int_buckets:
exponential:
# 0, 1, 2, 4, 8... 32768, 65536
floor: 0
num_buckets: 18
initial_step: 1
step_multiplier: 2
privacy_level: NO_ADDED_PRIVACY
privacy_mechanism: DE_IDENTIFICATION
system_profile_selection: SELECT_LAST
system_profile_field: [BOARD_NAME, PRODUCT_NAME, SYSTEM_VERSION, CHANNEL]
meta_data:
max_release_stage: GA
expiration_date: "2025/04/05"
# Next ID: 9006
# Deleted metric IDs that must not be reused.
deleted_metric_ids: [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,51,52,53,54,55,56,57,58,59,60,68,69,2000,2001,2002,3000,4000,4002,4004,4006,9000,9001,9002]