[counters] Support a "max" mode for kcounters
This adds a new mode for kcounters that treats the value as a maximum
across cores, rather than a sum. These are declared with KCOUNTER_MAX()
and modified with kcounter_max().
Additionally, adds two new max counters, one for live handles, and one
for maximum allocation size in cmpct_alloc().
In implementing this I noticed that I think `kernel.handles.live`
probably doesn't do quite what it wishes it was doing (maybe?) and filed
ZX-3015.
ZX-3092 #comment [counters] Support a "max" mode for kcounters
ZX-3105 #comment [counters] Support a "max" mode for kcounters
Test: CQ, manual inspection of `k counters view`
Change-Id: I556fe74dfcb54e1fd61421fb13b6be65d4687243
diff --git a/kernel/include/kernel/atomic.h b/kernel/include/kernel/atomic.h
index e998c30..feae36d 100644
--- a/kernel/include/kernel/atomic.h
+++ b/kernel/include/kernel/atomic.h
@@ -130,6 +130,10 @@
return __atomic_load_n(ptr, __ATOMIC_SEQ_CST);
}
+static inline int64_t atomic_load_64_relaxed(volatile int64_t* ptr) {
+ return __atomic_load_n(ptr, __ATOMIC_RELAXED);
+}
+
static inline void atomic_store_64(volatile int64_t* ptr, int64_t newval) {
__atomic_store_n(ptr, newval, __ATOMIC_SEQ_CST);
}
@@ -188,6 +192,12 @@
return __atomic_fetch_add(ptr, val, __ATOMIC_RELAXED);
}
+static inline bool atomic_cmpxchg_64_relaxed(volatile int64_t* ptr, int64_t* oldval,
+ int64_t newval) {
+ return __atomic_compare_exchange_n(ptr, oldval, newval, false,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+}
+
static inline bool atomic_cmpxchg_u64_relaxed(volatile uint64_t* ptr, uint64_t* oldval,
uint64_t newval) {
return __atomic_compare_exchange_n(ptr, oldval, newval, false,
diff --git a/kernel/kernel.ld b/kernel/kernel.ld
index 5389416..7db2aa3 100644
--- a/kernel/kernel.ld
+++ b/kernel/kernel.ld
@@ -221,13 +221,12 @@
KEEP(*(SORT_BY_NAME(.bss.kcounter.*)))
/*
- * Sanity check that the aggregate size of kcounters_arena
- * SMP_MAX_CPUS slots for each counter. The k_counter_desc structs
- * in .kcounter.desc are 8 bytes each, which matches the size of a
- * single counter. (It's only for this sanity check that we need
- * to care how big k_counter_desc is.)
+ * Sanity check that the aggregate size of kcounters_arena SMP_MAX_CPUS
+ * slots for each counter. The k_counter_desc structs in .kcounter.desc
+ * are 16 bytes each. (It's only for this sanity check that we need to
+ * care how big k_counter_desc is.)
*/
- ASSERT(. - kcounters_arena == SIZEOF(.kcounter.desc) * SMP_MAX_CPUS,
+ ASSERT(. - kcounters_arena == SIZEOF(.kcounter.desc) * 8 * SMP_MAX_CPUS / 16,
"kcounters_arena size mismatch");
*(.bss*)
diff --git a/kernel/lib/counters/counters.cpp b/kernel/lib/counters/counters.cpp
index 39c6924..276ffb3 100644
--- a/kernel/lib/counters/counters.cpp
+++ b/kernel/lib/counters/counters.cpp
@@ -147,17 +147,29 @@
static void dump_counter(const k_counter_desc* desc, bool verbose) {
size_t counter_index = kcounter_index(desc);
- uint64_t sum = 0;
+ uint64_t summary = 0;
uint64_t values[SMP_MAX_CPUS];
- for (size_t ix = 0; ix != SMP_MAX_CPUS; ++ix) {
- // This value is not atomically consistent, therefore is just
- // an approximation. TODO(cpu): for ARM this might need some magic.
- values[ix] = percpu[ix].counters[counter_index];
- sum += values[ix];
+
+ if (desc->type == k_counter_type::max) {
+ for (size_t ix = 0; ix != SMP_MAX_CPUS; ++ix) {
+ // This value is not atomically consistent, therefore is just
+ // an approximation. TODO(cpu): for ARM this might need some magic.
+ values[ix] = percpu[ix].counters[counter_index];
+ if (values[ix] > summary) {
+ summary = values[ix];
+ }
+ }
+ } else {
+ for (size_t ix = 0; ix != SMP_MAX_CPUS; ++ix) {
+ // This value is not atomically consistent, therefore is just
+ // an approximation. TODO(cpu): for ARM this might need some magic.
+ values[ix] = percpu[ix].counters[counter_index];
+ summary += values[ix];
+ }
}
- printf("[%.2zu] %s = %lu\n", counter_index, desc->name, sum);
- if (sum == 0u) {
+ printf("[%.2zu] %s = %lu\n", counter_index, desc->name, summary);
+ if (summary == 0u) {
return;
}
diff --git a/kernel/lib/counters/include/lib/counters.h b/kernel/lib/counters/include/lib/counters.h
index 88618e8..04d4c63 100644
--- a/kernel/lib/counters/include/lib/counters.h
+++ b/kernel/lib/counters/include/lib/counters.h
@@ -24,9 +24,16 @@
// Kernel counters public API:
// 1- define a new counter.
// KCOUNTER(counter_name, "<counter name>");
+// KCOUNTER_MAX(counter_name, "<counter name>");
//
// 2- counters start at zero, increment the counter:
// kcounter_add(counter_name, 1);
+// or
+// kcounter_max(counter_name, value);
+//
+// By default with KCOUNTER, the `k counters` presentation will calculate a
+// sum() across cores rather than summing. KCOUNTER_MAX() calculates the max()
+// of the counters across cores.
//
//
// Naming the counters
@@ -40,12 +47,19 @@
// operations are never used to set their value so they are both
// imprecise and reflect only the operations on a particular core.
+enum class k_counter_type : uint64_t {
+ sum = 1,
+ min = 2,
+ max = 3,
+};
+
struct k_counter_desc {
const char* name;
+ k_counter_type type;
};
-static_assert(sizeof(struct k_counter_desc) ==
- sizeof(((struct percpu){}).counters[0]),
- "the kernel.ld ASSERT knows that these sizes match");
+static_assert(
+ sizeof(struct k_counter_desc) == 16,
+ "kernel.ld uses this size to ASSERT that enough space has been reserved in the counters arena");
// Define the descriptor and reserve the arena space for the counters.
// Because of -fdata-sections, each kcounter_arena_* array will be
@@ -55,11 +69,15 @@
// slots used for this particular counter (that would have terrible
// cache effects); it just reserves enough space for counters_init() to
// dole out in per-CPU chunks.
-#define KCOUNTER(var, name) \
- __USED int64_t kcounter_arena_##var[SMP_MAX_CPUS] \
- __asm__("kcounter." name); \
- __USED __SECTION("kcountdesc." name) \
- static const struct k_counter_desc var[] = { { name } }
+#define KCOUNTER(var, name) \
+ __USED int64_t kcounter_arena_##var[SMP_MAX_CPUS] __asm__("kcounter." name); \
+ __USED __SECTION("kcountdesc." name) static const struct k_counter_desc var[] = { \
+ {name, k_counter_type::sum}}
+
+#define KCOUNTER_MAX(var, name) \
+ __USED int64_t kcounter_arena_##var[SMP_MAX_CPUS] __asm__("kcounter." name); \
+ __USED __SECTION("kcountdesc." name) static const struct k_counter_desc var[] = { \
+ {name, k_counter_type::max}}
// Via magic in kernel.ld, all the descriptors wind up in a contiguous
// array bounded by these two symbols, sorted by name.
@@ -89,3 +107,15 @@
*kcounter_slot(var) += add;
#endif
}
+
+// TODO(travisg|scottmg): Revisit, consider more efficient arm-specific
+// instruction sequence here.
+static inline void kcounter_max(const struct k_counter_desc* var, int64_t value) {
+ int64_t prev_value = atomic_load_64_relaxed(kcounter_slot(var));
+ while (prev_value < value && !atomic_cmpxchg_64_relaxed(kcounter_slot(var), &prev_value, value))
+ ;
+}
+
+static inline void kcounter_max_counter(const struct k_counter_desc* var, const struct k_counter_desc* other_var) {
+ kcounter_max(var, *kcounter_slot(other_var));
+}
diff --git a/kernel/lib/heap/cmpctmalloc/cmpctmalloc.cpp b/kernel/lib/heap/cmpctmalloc/cmpctmalloc.cpp
index 62ace3c..596d32e 100644
--- a/kernel/lib/heap/cmpctmalloc/cmpctmalloc.cpp
+++ b/kernel/lib/heap/cmpctmalloc/cmpctmalloc.cpp
@@ -18,10 +18,11 @@
#include <kernel/mutex.h>
#include <kernel/spinlock.h>
#include <kernel/thread.h>
-#include <vm/vm.h>
+#include <lib/counters.h>
#include <lib/heap.h>
#include <platform.h>
#include <trace.h>
+#include <vm/vm.h>
// Malloc implementation tuned for space.
//
@@ -115,6 +116,8 @@
#define LOCAL_TRACE 0
+KCOUNTER_MAX(max_allocation, "kernel.heap.max_allocation");
+
// Use HEAP_ENABLE_TESTS to enable internal testing. The tests are not useful
// when the target system is up. By that time we have done hundreds of allocations
// already.
@@ -902,6 +905,8 @@
return NULL;
}
+ kcounter_max(max_allocation, size);
+
// Large allocations are no longer allowed. See ZX-1318 for details.
if (size > (HEAP_LARGE_ALLOC_BYTES - sizeof(header_t))) {
return NULL;
diff --git a/kernel/object/handle.cpp b/kernel/object/handle.cpp
index a6c1cff..219307c 100644
--- a/kernel/object/handle.cpp
+++ b/kernel/object/handle.cpp
@@ -24,6 +24,7 @@
KCOUNTER(handle_count_made, "kernel.handles.made");
KCOUNTER(handle_count_duped, "kernel.handles.duped");
KCOUNTER(handle_count_live, "kernel.handles.live");
+KCOUNTER(handle_count_max_live, "kernel.handles.max_live");
// Masks for building a Handle's base_value, which ProcessDispatcher
// uses to create zx_handle_t values.
@@ -119,6 +120,7 @@
return nullptr;
kcounter_add(handle_count_made, 1);
kcounter_add(handle_count_live, 1);
+ kcounter_max_counter(handle_count_max_live, handle_count_live);
return HandleOwner(new (addr) Handle(ktl::move(dispatcher),
rights, base_value));
}
@@ -139,6 +141,7 @@
return nullptr;
kcounter_add(handle_count_duped, 1);
kcounter_add(handle_count_live, 1);
+ kcounter_max_counter(handle_count_max_live, handle_count_live);
return HandleOwner(new (addr) Handle(source, rights, base_value));
}