[kernel][vm] Keep per cpu counts of vm_page states
The purpose of this change is to improve the performance of
ZX_INFO_KMEM_STATS. By maintaining counters of vm_page states we no
longer need to walk all pages to compute ZX_INFO_KMEM_STATS.
A previous version of this change used global atomic integers, but
that proved to be too expensive (ZX-3916). Instead, we use per cpu
counters and access them with preemption disabled.
Move vm_page_state to it's own file to prevent a cicular dependency
between page and percpu.
Add a couple static helper methods to Percpus to facilitate accessing
per cpu counters like vm_page_counts.
Ran zircon_benchmarks on a NUC to ensure this doesn't significantly
regress performance. Looked at Channel/WriteRead/64bytes as it seems
to be a bellwether.
Before --
Mean Std dev Min Max Median Unit Mean Mbytes/sec Test case
562 33 542 6403 555 nanoseconds 108.552 Channel/WriteRead/64bytes
277 19 263 4391 272 nanoseconds N/A Channel/WriteRead/64bytes.write
285 17 274 2680 282 nanoseconds N/A Channel/WriteRead/64bytes.read
After --
Mean Std dev Min Max Median Unit Mean Mbytes/sec Test case
571 13 558 6263 569 nanoseconds 106.880 Channel/WriteRead/64bytes
284 10 275 4458 282 nanoseconds N/A Channel/WriteRead/64bytes.write
287 7 279 1805 286 nanoseconds N/A Channel/WriteRead/64bytes.read
Bug: ZX-833 #comment per cpu counts
Test: booted and ran 'kstats -m'
Change-Id: I18083c4dbc3d0423cc59cf3700a93dc72de8d2e3
diff --git a/zircon/docs/syscalls/object_get_info.md b/zircon/docs/syscalls/object_get_info.md
index 2a18cfc..67410e6 100644
--- a/zircon/docs/syscalls/object_get_info.md
+++ b/zircon/docs/syscalls/object_get_info.md
@@ -608,11 +608,12 @@
*buffer* type: `zx_info_kmem_stats_t[1]`
-Returns information about kernel memory usage. It can be expensive to gather.
+Returns information about kernel memory usage.
```
typedef struct zx_info_kmem_stats {
// The total amount of physical memory available to the system.
+ // Note, the values below may not exactly add up to this total.
size_t total_bytes;
// The amount of unallocated memory.
diff --git a/zircon/kernel/include/kernel/percpu.h b/zircon/kernel/include/kernel/percpu.h
index 7f58450..a6865f3 100644
--- a/zircon/kernel/include/kernel/percpu.h
+++ b/zircon/kernel/include/kernel/percpu.h
@@ -14,6 +14,7 @@
#include <kernel/timer.h>
#include <list.h>
#include <sys/types.h>
+#include <vm/page_state.h>
#include <zircon/compiler.h>
struct percpu;
@@ -41,7 +42,19 @@
// Unused argument is so this can be passed to LK_INIT_HOOK() directly.
static void HeapInit(uint32_t);
- private:
+ // Call |Func| with the current CPU's percpu struct with preemption disabled.
+ //
+ // |Func| should accept a |percpu*| and should not block.
+ template <typename Func>
+ static void WithCurrentPreemptDisable(Func&& func);
+
+ // Call |Func| once per CPU with each CPU's percpu struct with preemption disabled.
+ //
+ // |Func| should accept a |percpu*| and should not block.
+ template <typename Func>
+ static void ForEachPreemptDisable(Func&& func);
+
+private:
// Number of percpu entries.
static size_t count_;
@@ -102,6 +115,12 @@
// each cpu has a dedicated thread for processing dpcs
thread_t* dpc_thread;
+ // Page state counts are percpu because they change frequently and we don't want to pay for
+ // synchronization.
+ //
+ // When accessing, be sure to do so with preemption disabled. See |WithCurrent| and |ForEach|.
+ vm_page_counts_t vm_page_counts;
+
// Initialize this percpu object, |cpu_num| will be used to initialize
// embedded objects.
void Init(cpu_num_t cpu_num);
@@ -128,3 +147,19 @@
return &Percpus::Get(arch_curr_cpu_num());
}
+template <typename Func>
+void Percpus::WithCurrentPreemptDisable(Func&& func) {
+ thread_preempt_disable();
+ func(&Percpus::Get(arch_curr_cpu_num()));
+ thread_preempt_reenable();
+}
+
+template <typename Func>
+void Percpus::ForEachPreemptDisable(Func&& func) {
+ thread_preempt_disable();
+ const size_t count = Percpus::Count();
+ for (cpu_num_t cpu_num = 0; cpu_num < count; ++cpu_num) {
+ func(&Percpus::Get(cpu_num));
+ }
+ thread_preempt_reenable();
+}
diff --git a/zircon/kernel/syscalls/object.cpp b/zircon/kernel/syscalls/object.cpp
index 92457fe..b009c50 100644
--- a/zircon/kernel/syscalls/object.cpp
+++ b/zircon/kernel/syscalls/object.cpp
@@ -509,46 +509,54 @@
// TODO: figure out a better handle to hang this off to and push this copy code into
// that dispatcher.
- size_t state_count[VM_PAGE_STATE_COUNT_] = {};
- pmm_count_total_states(state_count);
-
- size_t total = 0;
+ // |get_count| returns an estimate so the sum of the counts may not equal the total.
+ uint64_t state_count[VM_PAGE_STATE_COUNT_] = {};
for (uint32_t i = 0; i < VM_PAGE_STATE_COUNT_; i++) {
- total += state_count[i];
+ state_count[i] = vm_page_t::get_count(vm_page_state(i));
}
- size_t unused_size = 0;
- size_t free_heap_bytes = 0;
+ uint64_t unused_size = 0;
+ uint64_t free_heap_bytes = 0;
heap_get_info(&unused_size, &free_heap_bytes);
// Note that this intentionally uses uint64_t instead of
// size_t in case we ever have a 32-bit userspace but more
// than 4GB physical memory.
zx_info_kmem_stats_t stats = {};
- stats.total_bytes = total * PAGE_SIZE;
- size_t other_bytes = stats.total_bytes;
+ stats.total_bytes = pmm_count_total_bytes();
+
+ // Holds the sum of bytes in the broken out states. This sum could be less than the total
+ // because we aren't counting all possible states (e.g. VM_PAGE_STATE_ALLOC). This sum could
+ // be greater than the total because per-state counts are approximate.
+ uint64_t sum_bytes = 0;
stats.free_bytes = state_count[VM_PAGE_STATE_FREE] * PAGE_SIZE;
- other_bytes -= stats.free_bytes;
+ sum_bytes += stats.free_bytes;
stats.wired_bytes = state_count[VM_PAGE_STATE_WIRED] * PAGE_SIZE;
- other_bytes -= stats.wired_bytes;
+ sum_bytes += stats.wired_bytes;
stats.total_heap_bytes = state_count[VM_PAGE_STATE_HEAP] * PAGE_SIZE;
- other_bytes -= stats.total_heap_bytes;
+ sum_bytes += stats.total_heap_bytes;
stats.free_heap_bytes = free_heap_bytes;
stats.vmo_bytes = state_count[VM_PAGE_STATE_OBJECT] * PAGE_SIZE;
- other_bytes -= stats.vmo_bytes;
+ sum_bytes += stats.vmo_bytes;
stats.mmu_overhead_bytes = state_count[VM_PAGE_STATE_MMU] * PAGE_SIZE;
- other_bytes -= stats.mmu_overhead_bytes;
+ sum_bytes += stats.mmu_overhead_bytes;
stats.ipc_bytes = state_count[VM_PAGE_STATE_IPC] * PAGE_SIZE;
- other_bytes -= stats.ipc_bytes;
+ sum_bytes += stats.ipc_bytes;
- // All other VM_PAGE_STATE_* counts get lumped into other_bytes.
- stats.other_bytes = other_bytes;
+ // Is there unaccounted memory?
+ if (stats.total_bytes > sum_bytes) {
+ // Everything else gets counted as "other".
+ stats.other_bytes = stats.total_bytes - sum_bytes;
+ } else {
+ // One or more of our per-state counts may have been off. We'll ignore it.
+ stats.other_bytes = 0;
+ }
return single_record_result(
_buffer, buffer_size, _actual, _avail, &stats, sizeof(stats));
diff --git a/zircon/kernel/vm/include/vm/page.h b/zircon/kernel/vm/include/vm/page.h
index 482342b..326ebf5 100644
--- a/zircon/kernel/vm/include/vm/page.h
+++ b/zircon/kernel/vm/include/vm/page.h
@@ -7,28 +7,12 @@
#pragma once
-#include <fbl/algorithm.h>
#include <list.h>
#include <stdint.h>
#include <sys/types.h>
+#include <vm/page_state.h>
#include <zircon/compiler.h>
-enum vm_page_state : uint32_t {
- VM_PAGE_STATE_FREE = 0,
- VM_PAGE_STATE_ALLOC,
- VM_PAGE_STATE_OBJECT,
- VM_PAGE_STATE_WIRED,
- VM_PAGE_STATE_HEAP,
- VM_PAGE_STATE_MMU, // allocated to serve arch-specific mmu purposes
- VM_PAGE_STATE_IOMMU, // allocated for platform-specific iommu structures
- VM_PAGE_STATE_IPC,
-
- VM_PAGE_STATE_COUNT_
-};
-
-#define VM_PAGE_STATE_BITS 3
-static_assert((1u << VM_PAGE_STATE_BITS) >= VM_PAGE_STATE_COUNT_, "");
-
// core per page structure allocated at pmm arena creation time
typedef struct vm_page {
struct list_node queue_node;
@@ -66,6 +50,16 @@
void set_state(vm_page_state new_state);
+ // Return the approximate number of pages in state |state|.
+ //
+ // When called concurrently with |set_state|, the count may be off by a small amount.
+ static uint64_t get_count(vm_page_state state);
+
+ // Add |n| to the count of pages in state |state|.
+ //
+ // Should be used when first constructing pages.
+ static void add_to_initial_count(vm_page_state state, uint64_t n);
+
} vm_page_t;
// assert that the page structure isn't growing uncontrollably
diff --git a/zircon/kernel/vm/include/vm/page_state.h b/zircon/kernel/vm/include/vm/page_state.h
new file mode 100644
index 0000000..61f1e50
--- /dev/null
+++ b/zircon/kernel/vm/include/vm/page_state.h
@@ -0,0 +1,31 @@
+// Copyright 2019 The Fuchsia Authors
+//
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT
+#pragma once
+
+#include <stdint.h>
+
+// Defines the state of a VM page (|vm_page_t|).
+//
+// Be sure to keep this enum in sync with the definition of |vm_page_t|.
+enum vm_page_state : uint32_t {
+ VM_PAGE_STATE_FREE = 0,
+ VM_PAGE_STATE_ALLOC,
+ VM_PAGE_STATE_OBJECT,
+ VM_PAGE_STATE_WIRED,
+ VM_PAGE_STATE_HEAP,
+ VM_PAGE_STATE_MMU, // allocated to serve arch-specific mmu purposes
+ VM_PAGE_STATE_IOMMU, // allocated for platform-specific iommu structures
+ VM_PAGE_STATE_IPC,
+
+ VM_PAGE_STATE_COUNT_
+};
+
+#define VM_PAGE_STATE_BITS 3
+static_assert((1u << VM_PAGE_STATE_BITS) >= VM_PAGE_STATE_COUNT_, "");
+
+typedef struct vm_page_counts {
+ int64_t by_state[VM_PAGE_STATE_COUNT_];
+} vm_page_counts_t;
diff --git a/zircon/kernel/vm/include/vm/pmm.h b/zircon/kernel/vm/include/vm/pmm.h
index 480598d5..7ce8757 100644
--- a/zircon/kernel/vm/include/vm/pmm.h
+++ b/zircon/kernel/vm/include/vm/pmm.h
@@ -63,11 +63,6 @@
// Return amount of physical memory in system, in bytes.
uint64_t pmm_count_total_bytes();
-// Counts the number of pages in every state. For every page in every arena,
-// increments the corresponding VM_PAGE_STATE_*-indexed entry of
-// |state_count|. Does not zero out the entries first.
-void pmm_count_total_states(size_t state_count[VM_PAGE_STATE_COUNT_]) __NONNULL((1));
-
// virtual to physical
paddr_t vaddr_to_paddr(const void* va);
diff --git a/zircon/kernel/vm/page.cpp b/zircon/kernel/vm/page.cpp
index f456e25..7784f04 100644
--- a/zircon/kernel/vm/page.cpp
+++ b/zircon/kernel/vm/page.cpp
@@ -5,13 +5,15 @@
// license that can be found in the LICENSE file or at
// https://opensource.org/licenses/MIT
+#include <vm/page.h>
+
#include <err.h>
#include <inttypes.h>
+#include <kernel/percpu.h>
#include <lib/console.h>
#include <stdio.h>
#include <string.h>
#include <trace.h>
-#include <vm/page.h>
#include <vm/physmap.h>
#include <vm/pmm.h>
#include <vm/vm.h>
@@ -48,7 +50,29 @@
constexpr uint32_t kMask = (1 << VM_PAGE_STATE_BITS) - 1;
DEBUG_ASSERT_MSG(new_state == (new_state & kMask), "invalid state %u\n", new_state);
+ const vm_page_state old_state = vm_page_state(state_priv);
state_priv = (new_state & kMask);
+
+ Percpus::WithCurrentPreemptDisable(
+ [&old_state, &new_state](percpu* p) {
+ p->vm_page_counts.by_state[old_state] -= 1;
+ p->vm_page_counts.by_state[new_state] += 1;
+ });
+}
+
+uint64_t vm_page::get_count(vm_page_state state) {
+ int64_t result = 0;
+ Percpus::ForEachPreemptDisable([&state, &result](percpu* p) {
+ result += p->vm_page_counts.by_state[state];
+ });
+ return result >= 0 ? result : 0;
+}
+
+void vm_page::add_to_initial_count(vm_page_state state, uint64_t n) {
+ Percpus::WithCurrentPreemptDisable(
+ [&state, &n](percpu* p) {
+ p->vm_page_counts.by_state[state] += n;
+ });
}
static int cmd_vm_page(int argc, const cmd_args* argv, uint32_t flags) {
diff --git a/zircon/kernel/vm/pmm.cpp b/zircon/kernel/vm/pmm.cpp
index 5f5002f..7635266 100644
--- a/zircon/kernel/vm/pmm.cpp
+++ b/zircon/kernel/vm/pmm.cpp
@@ -107,10 +107,6 @@
return pmm_node.CountTotalBytes();
}
-void pmm_count_total_states(size_t state_count[VM_PAGE_STATE_COUNT_]) {
- pmm_node.CountTotalStates(state_count);
-}
-
static void pmm_dump_timer(struct timer* t, zx_time_t now, void*) {
zx_time_t deadline = zx_time_add_duration(now, ZX_SEC(1));
timer_set_oneshot(t, deadline, &pmm_dump_timer, nullptr);
diff --git a/zircon/kernel/vm/pmm_arena.cpp b/zircon/kernel/vm/pmm_arena.cpp
index b70d48a1..3d2ecaa 100644
--- a/zircon/kernel/vm/pmm_arena.cpp
+++ b/zircon/kernel/vm/pmm_arena.cpp
@@ -54,6 +54,9 @@
page_array_ = (vm_page_t*)raw_page_array;
+ // we've just constructed |page_count| pages in the state VM_PAGE_STATE_FREE
+ vm_page::add_to_initial_count(VM_PAGE_STATE_FREE, page_count);
+
// compute the range of the array that backs the array itself
size_t array_start_index = (PAGE_ALIGN(range.pa) - info_.base) / PAGE_SIZE;
size_t array_end_index = array_start_index + page_array_size / PAGE_SIZE;
@@ -73,7 +76,6 @@
if (i >= array_start_index && i < array_end_index) {
p.set_state(VM_PAGE_STATE_WIRED);
} else {
- p.set_state(VM_PAGE_STATE_FREE);
list_add_tail(&list, &p.queue_node);
}
}
diff --git a/zircon/kernel/vm/pmm_node.cpp b/zircon/kernel/vm/pmm_node.cpp
index 24ea4c2..fe3696b 100644
--- a/zircon/kernel/vm/pmm_node.cpp
+++ b/zircon/kernel/vm/pmm_node.cpp
@@ -326,15 +326,6 @@
return arena_cumulative_size_;
}
-void PmmNode::CountTotalStates(uint64_t state_count[VM_PAGE_STATE_COUNT_]) const {
- // TODO(MG-833): This is extremely expensive, holding a global lock
- // and touching every page/arena. We should keep a running count instead.
- Guard<fbl::Mutex> guard{&lock_};
- for (auto& a : arena_list_) {
- a.CountStates(state_count);
- }
-}
-
void PmmNode::DumpFree() const TA_NO_THREAD_SAFETY_ANALYSIS {
auto megabytes_free = CountFreePages() / 256u;
printf(" %zu free MBs\n", megabytes_free);
diff --git a/zircon/kernel/vm/pmm_node.h b/zircon/kernel/vm/pmm_node.h
index a57e7af..6897cfb 100644
--- a/zircon/kernel/vm/pmm_node.h
+++ b/zircon/kernel/vm/pmm_node.h
@@ -38,7 +38,6 @@
uint64_t CountFreePages() const;
uint64_t CountTotalBytes() const;
- void CountTotalStates(uint64_t state_count[VM_PAGE_STATE_COUNT_]) const;
// printf free and overall state of the internal arenas
// NOTE: both functions skip mutexes and can be called inside timer or crash context