blob: 567506f8df3d871d0c8d4e4411c8a1d2e7144be8 [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <assert.h>
#include <errno.h>
#include <getopt.h>
#include <inttypes.h>
#include <lib/fit/defer.h>
#include <lib/stdcompat/span.h>
#include <lib/zx/bti.h>
#include <lib/zx/channel.h>
#include <lib/zx/clock.h>
#include <lib/zx/event.h>
#include <lib/zx/exception.h>
#include <lib/zx/iommu.h>
#include <lib/zx/pager.h>
#include <lib/zx/port.h>
#include <lib/zx/thread.h>
#include <lib/zx/vmar.h>
#include <lib/zx/vmo.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <threads.h>
#include <unistd.h>
#include <zircon/compiler.h>
#include <zircon/errors.h>
#include <zircon/status.h>
#include <zircon/syscalls-next.h>
#include <zircon/syscalls.h>
#include <zircon/syscalls/debug.h>
#include <zircon/syscalls/exception.h>
#include <zircon/syscalls/iommu.h>
#include <zircon/syscalls/port.h>
#include <zircon/threads.h>
#include <zircon/types.h>
#include <algorithm>
#include <array>
#include <atomic>
#include <iterator>
#include <memory>
#include <optional>
#include <shared_mutex>
#include <thread>
#include <fbl/algorithm.h>
#include <fbl/array.h>
#include <fbl/auto_lock.h>
#include <fbl/mutex.h>
#include <fbl/ref_counted.h>
#include <fbl/ref_ptr.h>
#include "stress_test.h"
#define VMSTRESS_DEBUG 0
// Helper to generate values in the full inclusive range [a,b].
template <typename IntType = uint64_t>
static inline IntType uniform_rand_range(IntType a, IntType b, StressTest::Rng& rng) {
return std::uniform_int_distribution<IntType>(a, b)(rng);
}
// Helper to generate the common [0, max(1,a)). That is, if a range of size 0 is returned, this is
// considered valid and always generates the result 0.
template <typename IntType = uint64_t>
static inline IntType uniform_rand(IntType range, StressTest::Rng& rng) {
if (range == static_cast<IntType>(0)) {
return range;
}
return uniform_rand_range<IntType>(static_cast<IntType>(0), range - 1, rng);
}
class VmStressTest;
// VM Stresser
//
// The current stress test runs multiple independent test instance which get randomly
// initialized and torn down over time. Each creates a single pager vmo and hands it to a
// pool of worker threads. Some of the worker threads randomly commit/decommit/read/write/map/unmap
// the vmo. The rest of the worker threads randomly service pager requests or randomly supply
// their own 'prefetch' pages. This is intended to pick out any internal races with the
// VMO/VMAR/Pager system.
//
// Currently does not validate that any given operation was successfully performed, only
// that the apis do not return an error (or crash).
//
// Will evolve over time to use cloned vmos.
class VmStressTest : public StressTest {
public:
VmStressTest() = default;
virtual ~VmStressTest() = default;
virtual zx_status_t Start();
virtual zx_status_t Stop();
virtual const char* name() const { return "VM Stress"; }
zx::unowned_resource IommuResource() { return zx::unowned_resource{iommu_resource_}; }
private:
int test_thread();
std::atomic<bool> shutdown_{false};
thrd_t test_thread_;
} vmstress;
class TestInstance {
public:
TestInstance(VmStressTest* test, uint64_t instance_id)
: test_(test), test_instance_id_(instance_id) {}
virtual ~TestInstance() {}
virtual zx_status_t Start() = 0;
virtual zx_status_t Stop() = 0;
zx::unowned_resource IommuResource() { return test_->IommuResource(); }
protected:
// TODO: scale based on the number of cores in the system and/or command line arg
static constexpr uint64_t kNumThreads = 6;
template <typename... Args>
void Printf(const char* str, Args... args) const {
test_->Printf(str, args...);
}
template <typename... Args>
void PrintfAlways(const char* str, Args... args) const {
test_->PrintfAlways(str, args...);
}
std::mt19937_64 RngGen() { return test_->RngGen(); }
VmStressTest* const test_;
const uint64_t test_instance_id_;
};
class SingleVmoTestInstance : public TestInstance {
public:
SingleVmoTestInstance(VmStressTest* test, uint64_t instance_id, bool use_pager, bool trap_dirty,
uint64_t vmo_size)
: TestInstance(test, instance_id),
use_pager_(use_pager),
trap_dirty_(trap_dirty),
vmo_size_(vmo_size) {}
zx_status_t Start() final;
zx_status_t Stop() final;
private:
int vmo_thread();
int pager_thread();
void CheckVmoThreadError(zx_status_t status, const char* error);
const bool use_pager_;
const bool trap_dirty_;
const uint64_t vmo_size_;
static constexpr uint64_t kNumVmoThreads = 3;
thrd_t threads_[kNumThreads];
zx::thread thread_handles_[kNumThreads];
// Array used for storing vmo mappings. All mappings are cleaned up by the test thread,
// as vmo threads will sometimes crash if the instance is torn down during a page fault.
std::atomic<uint32_t> vmo_thread_idx_{0};
uintptr_t ptrs_[kNumThreads] = {};
fbl::Array<uint8_t> bufs_[kNumThreads] = {};
// Vector of page requests shared by all pager threads of the instance, to allow
// requests to be serviced out-of-order.
fbl::Mutex mtx_;
fbl::Vector<zx_packet_page_request_t> requests_ __TA_GUARDED(mtx_);
// Flag used to signal shutdown to worker threads.
std::atomic<bool> shutdown_{false};
// Counter that allows the last pager thread to clean up the pager itself.
std::atomic<uint32_t> pager_thread_count_{kNumThreads - kNumVmoThreads};
// Debug states to help diagnose failures.
std::atomic<bool> debug_pager_detached_{false};
std::atomic<bool> debug_pager_reset_{false};
std::atomic<bool> debug_pager_threads_created_{false};
// If using a pager, event to signal when the VMO threads can start generating page requests.
zx::event pager_threads_ready_;
zx::vmo vmo_{};
zx::pager pager_;
zx::port port_;
};
int SingleVmoTestInstance::vmo_thread() {
zx_status_t status;
uint64_t idx = vmo_thread_idx_++;
// allocate a local buffer
const size_t buf_size = zx_system_get_page_size() * 16;
bufs_[idx] = fbl::Array<uint8_t>(new uint8_t[buf_size], buf_size);
const fbl::Array<uint8_t>& buf = bufs_[idx];
auto rng = RngGen();
// local helper routines to calculate a random range within a vmo and
// a range appropriate to read into the local buffer above
auto rand_vmo_range = [this, &rng](uint64_t* out_offset, uint64_t* out_size) {
*out_offset = uniform_rand(vmo_size_, rng);
*out_size = std::min(uniform_rand(vmo_size_, rng), vmo_size_ - *out_offset);
};
auto rand_buffer_range = [this, &rng, buf_size](uint64_t* out_offset, uint64_t* out_size) {
*out_size = uniform_rand(buf_size, rng);
*out_offset = uniform_rand(vmo_size_ - *out_size, rng);
};
ZX_ASSERT(buf_size < vmo_size_);
if (use_pager_) {
// Wait for the pager threads to be created so that page requests can be serviced. Otherwise, we
// will block until the pager threads come up, which can be a while on a heavily loaded system
// (which is true for these stress tests), and could even cause us to hit the pager wait timeout
// and error out/ crash.
status = pager_threads_ready_.wait_one(ZX_USER_SIGNAL_0, zx::time::infinite(), nullptr);
ZX_ASSERT_MSG(status == ZX_OK, "event wait failed with %s\n", zx_status_get_string(status));
}
while (!shutdown_.load()) {
uint64_t off, len;
int r = uniform_rand(100, rng);
switch (r) {
case 0 ... 4: // commit a range of the vmo
Printf("c");
rand_vmo_range(&off, &len);
status = vmo_.op_range(ZX_VMO_OP_COMMIT, off, len, nullptr, 0);
CheckVmoThreadError(status, "Failed to commit range");
break;
case 5 ... 19:
if (ptrs_[idx]) {
// unmap the vmo if it already was
Printf("u");
status = zx::vmar::root_self()->unmap(ptrs_[idx], vmo_size_);
CheckVmoThreadError(status, "failed to unmap range");
ptrs_[idx] = 0;
}
// map it somewhere
Printf("m");
status = zx::vmar::root_self()->map(
ZX_VM_PERM_READ | ZX_VM_PERM_WRITE | (use_pager_ ? ZX_VM_ALLOW_FAULTS : 0), 0, vmo_, 0,
vmo_size_, ptrs_ + idx);
CheckVmoThreadError(status, "failed to map range");
break;
case 20 ... 34:
// read from a random range of the vmo
Printf("r");
rand_buffer_range(&off, &len);
status = vmo_.read(buf.data(), off, len);
CheckVmoThreadError(status, "error reading from vmo");
break;
case 35 ... 49:
// write to a random range of the vmo
Printf("w");
rand_buffer_range(&off, &len);
status = vmo_.write(buf.data(), off, len);
CheckVmoThreadError(status, "error writing to vmo");
break;
case 50 ... 74:
// read from a random range of the vmo via a direct memory reference
if (ptrs_[idx]) {
Printf("R");
rand_buffer_range(&off, &len);
memcpy(buf.data(), reinterpret_cast<const void*>(ptrs_[idx] + off), len);
}
break;
case 75 ... 99:
// write to a random range of the vmo via a direct memory reference
if (ptrs_[idx]) {
Printf("W");
rand_buffer_range(&off, &len);
memcpy(reinterpret_cast<void*>(ptrs_[idx] + off), buf.data(), len);
}
break;
}
fflush(stdout);
}
return 0;
}
void SingleVmoTestInstance::CheckVmoThreadError(zx_status_t status, const char* error) {
// Ignore errors while shutting down, since they're almost certainly due to the
// pager disappearing.
if (!shutdown_ && status != ZX_OK) {
fprintf(stderr, "[test instance 0x%zx]: %s, error %d\n", test_instance_id_, error, status);
if (use_pager_) {
fprintf(stderr,
"[test instance 0x%zx]: pager debug state: thread count %d, created %d, detached %d, "
"reset %d\n",
test_instance_id_, pager_thread_count_.load(), debug_pager_threads_created_.load(),
debug_pager_detached_.load(), debug_pager_reset_.load());
for (size_t i = kNumVmoThreads; i < kNumThreads; i++) {
zx_info_thread_t info;
uint64_t actual, actual_count;
status = zx_object_get_info(thread_handles_[i].get(), ZX_INFO_THREAD, &info, sizeof(info),
&actual, &actual_count);
if (status != ZX_OK) {
fprintf(stderr,
"[test instance 0x%zx]: ZX_INFO_THREAD on pager thread %zu failed with %s\n",
test_instance_id_, i - kNumVmoThreads, zx_status_get_string(status));
} else {
fprintf(
stderr,
"[test instance 0x%zx]: pager thread %zu: state 0x%x, wait_exception_channel_type "
"0x%x\n",
test_instance_id_, i - kNumVmoThreads, info.state, info.wait_exception_channel_type);
}
}
}
}
}
static bool is_thread_blocked(zx_handle_t handle) {
zx_info_thread_t info;
uint64_t actual, actual_count;
ZX_ASSERT(zx_object_get_info(handle, ZX_INFO_THREAD, &info, sizeof(info), &actual,
&actual_count) == ZX_OK);
return info.state == ZX_THREAD_STATE_BLOCKED_PAGER;
}
int SingleVmoTestInstance::pager_thread() {
zx_status_t status;
uint64_t vmo_page_count = vmo_size_ / zx_system_get_page_size();
ZX_ASSERT(vmo_page_count > 0);
auto supply_pages = [this](uint64_t off, uint64_t len) {
zx::vmo tmp_vmo;
zx_status_t status = zx::vmo::create(len, 0, &tmp_vmo);
if (status != ZX_OK) {
fprintf(stderr, "[test instance 0x%zx]: failed to create tmp vmo, error %d (%s)\n",
test_instance_id_, status, zx_status_get_string(status));
return;
}
status = tmp_vmo.op_range(ZX_VMO_OP_COMMIT, 0, len, nullptr, 0);
if (status != ZX_OK) {
fprintf(stderr, "[test instance 0x%zx]: failed to commit tmp vmo, error %d (%s)\n",
test_instance_id_, status, zx_status_get_string(status));
return;
}
// SingleVmoTestInstance doesn't currently resize the VMO, so this should work.
status = pager_.supply_pages(vmo_, off, len, tmp_vmo, 0);
if (status != ZX_OK) {
fprintf(stderr, "[test instance 0x%zx]: failed to supply pages %d, error %d (%s)\n",
test_instance_id_, pager_.get(), status, zx_status_get_string(status));
return;
}
};
auto dirty_pages = [this](uint64_t off, uint64_t len) {
// ZX_PAGER_OP_DIRTY is not supported if the VMO wasn't created with ZX_VMO_TRAP_DIRTY.
ZX_ASSERT(trap_dirty_);
zx_status_t status = pager_.op_range(ZX_PAGER_OP_DIRTY, vmo_, off, len, 0);
// ZX_ERR_NOT_FOUND is an acceptable error status as pages that have yet to be dirtied are clean
// and can get evicted.
if (status != ZX_OK && status != ZX_ERR_NOT_FOUND) {
fprintf(stderr, "[test instance 0x%zx]: failed to dirty pages %d, error %d (%s)\n",
test_instance_id_, pager_.get(), status, zx_status_get_string(status));
return;
}
};
auto writeback_pages = [this](uint64_t off, uint64_t len) {
zx_status_t status = pager_.op_range(ZX_PAGER_OP_WRITEBACK_BEGIN, vmo_, off, len, 0);
if (status != ZX_OK) {
fprintf(stderr, "[test instance 0x%zx]: failed to begin writeback %d, error %d (%s)\n",
test_instance_id_, pager_.get(), status, zx_status_get_string(status));
return;
}
status = pager_.op_range(ZX_PAGER_OP_WRITEBACK_END, vmo_, off, len, 0);
if (status != ZX_OK) {
fprintf(stderr, "[test instance 0x%zx]: failed to end writeback %d, error %d (%s)\n",
test_instance_id_, pager_.get(), status, zx_status_get_string(status));
return;
}
};
auto rng = RngGen();
#if VMSTRESS_DEBUG
// Counters to log pager thread progress for debugging stalls.
zx::duration logging_interval = zx::duration(ZX_MIN(2));
zx::time time_prev = zx::clock::get_monotonic();
uint64_t ops_per_interval = 0;
#endif // VMSTRESS_DEBUG
while (!shutdown_.load()) {
#if VMSTRESS_DEBUG
++ops_per_interval;
zx::time time_now = zx::clock::get_monotonic();
if (time_now > time_prev + logging_interval) {
fprintf(
stderr, "[test instance 0x%zx]: pager ops in last %" PRIi64 " minute(s): %" PRIu64 "\n",
test_instance_id_, (time_now - time_prev) / zx::duration(ZX_MIN(1)), ops_per_interval);
time_prev = time_now;
ops_per_interval = 0;
}
#endif // VMSTRESS_DEBUG
zx::vmo tmp_vmo;
uint64_t off, size;
zx::time deadline;
int r = uniform_rand<int>(100, rng);
switch (r) {
case 0 ... 3: // supply a random range of pages
{
off = uniform_rand(vmo_page_count, rng);
size = std::min(uniform_rand(vmo_page_count, rng), vmo_page_count - off);
supply_pages(off * zx_system_get_page_size(), size * zx_system_get_page_size());
break;
}
case 4 ... 5: // dirty a random range of pages
{
if (trap_dirty_) {
off = uniform_rand(vmo_page_count, rng);
size = std::min(uniform_rand(vmo_page_count, rng), vmo_page_count - off);
dirty_pages(off * zx_system_get_page_size(), size * zx_system_get_page_size());
}
break;
}
case 6 ... 50: // read from the port
{
fbl::AutoLock lock(&mtx_);
if (requests_.size() == kNumVmoThreads) {
break;
} else {
// We still need to at least query the port if all vmo threads are
// blocked, in case we need to read the last thread's packet.
deadline = zx::time::infinite_past();
for (unsigned i = 0; i < kNumVmoThreads; i++) {
if (!is_thread_blocked(thread_handles_[i].get())) {
deadline = zx::clock::get_monotonic() + zx::msec(10);
break;
}
}
}
}
zx_port_packet_t packet;
status = port_.wait(deadline, &packet);
if (status != ZX_OK) {
if (status != ZX_ERR_TIMED_OUT) {
fprintf(stderr, "[test instance 0x%zx]: failed to read port, error %d (%s)\n",
test_instance_id_, status, zx_status_get_string(status));
}
} else if (packet.type != ZX_PKT_TYPE_PAGE_REQUEST ||
(packet.page_request.command != ZX_PAGER_VMO_READ &&
packet.page_request.command != ZX_PAGER_VMO_DIRTY)) {
fprintf(stderr, "[test instance 0x%zx]: unexpected packet, error %d %d\n",
test_instance_id_, packet.type, packet.page_request.command);
} else {
fbl::AutoLock lock(&mtx_);
requests_.push_back(packet.page_request);
}
break;
case 51 ... 90: // fulfill a random request
zx_packet_page_request_t req;
{
fbl::AutoLock lock(&mtx_);
if (requests_.is_empty()) {
break;
}
off = uniform_rand(requests_.size(), rng);
req = requests_.erase(off);
}
if (req.command == ZX_PAGER_VMO_READ) {
supply_pages(req.offset, req.length);
} else if (req.command == ZX_PAGER_VMO_DIRTY) {
dirty_pages(req.offset, req.length);
}
break;
case 91 ... 99: // writeback a random range
{
off = uniform_rand(vmo_page_count, rng);
size = std::min(uniform_rand(vmo_page_count, rng), vmo_page_count - off);
writeback_pages(off * zx_system_get_page_size(), size * zx_system_get_page_size());
break;
}
}
fflush(stdout);
}
// Have the last pager thread tear down the pager. Randomly either detach the vmo (and
// close the pager after all test threads are done) or immediately close the pager handle.
if (--pager_thread_count_ == 0) {
if (uniform_rand(2, rng)) {
status = pager_.detach_vmo(vmo_);
if (status != ZX_OK) {
fprintf(stderr, "[test instance 0x%zx]: failed to detach vmo, error %d (%s)\n",
test_instance_id_, status, zx_status_get_string(status));
}
debug_pager_detached_ = true;
} else {
pager_.reset();
debug_pager_reset_ = true;
}
}
return 0;
}
zx_status_t SingleVmoTestInstance::Start() {
auto status = zx::port::create(0, &port_);
if (status != ZX_OK) {
return status;
}
if (use_pager_) {
status = zx::pager::create(0, &pager_);
if (status != ZX_OK) {
return status;
}
uint32_t options = 0;
if (trap_dirty_) {
options |= ZX_VMO_TRAP_DIRTY;
}
// create a test vmo
status = pager_.create_vmo(options, port_, 0, vmo_size_, &vmo_);
// create an event to signal when the VMO threads can start generating requests.
status = zx::event::create(0, &pager_threads_ready_);
} else {
status = zx::vmo::create(vmo_size_, 0, &vmo_);
}
if (status != ZX_OK) {
return status;
}
// create a pile of threads
auto worker = [](void* arg) -> int {
return static_cast<SingleVmoTestInstance*>(arg)->vmo_thread();
};
auto pager_worker = [](void* arg) -> int {
return static_cast<SingleVmoTestInstance*>(arg)->pager_thread();
};
for (uint32_t i = 0; i < std::size(threads_); i++) {
// vmo threads need to come first, since the pager workers need to reference
// the vmo worker thread handles.
bool is_vmo_worker = i < kNumVmoThreads || !use_pager_;
thrd_create_with_name(threads_ + i, is_vmo_worker ? worker : pager_worker, this,
is_vmo_worker ? "vmstress_worker" : "pager_worker");
zx::unowned_thread unowned(thrd_get_zx_handle(threads_[i]));
ZX_ASSERT(unowned->duplicate(ZX_RIGHT_SAME_RIGHTS, thread_handles_ + i) == ZX_OK);
}
if (use_pager_) {
debug_pager_threads_created_ = true;
status = pager_threads_ready_.signal(0, ZX_USER_SIGNAL_0);
ZX_ASSERT_MSG(status == ZX_OK, "event signal failed with %s\n", zx_status_get_string(status));
}
return ZX_OK;
}
zx_status_t SingleVmoTestInstance::Stop() {
zx::port port;
zx::port::create(0, &port);
std::array<zx::channel, kNumVmoThreads> channels;
if (use_pager_) {
// We need to handle potential crashes in the vmo threads when the pager is torn down. Since
// not all threads will actually crash, we can't stop handling crashes until all threads
// have terminated.
// TODO: Note that these crashes may produce visible output on the system logs and this
// shutdown should maybe be restructured to avoid this from happening.
for (unsigned i = 0; i < kNumVmoThreads; i++) {
zx_status_t status = thread_handles_[i].create_exception_channel(0, &channels[i]);
ZX_ASSERT(status == ZX_OK);
status = channels[i].wait_async(port, i, ZX_CHANNEL_READABLE | ZX_CHANNEL_PEER_CLOSED, 0);
ZX_ASSERT(status == ZX_OK);
}
}
shutdown_.store(true);
if (use_pager_) {
uint64_t running_count = kNumVmoThreads;
while (running_count) {
zx_port_packet_t packet;
ZX_ASSERT(port.wait(zx::time::infinite(), &packet) == ZX_OK);
if (packet.signal.observed & ZX_CHANNEL_READABLE) {
const zx::channel& channel = channels[packet.key];
zx_exception_info_t exception_info;
zx::exception exception;
ZX_ASSERT(channel.read(0, &exception_info, exception.reset_and_get_address(),
sizeof(exception_info), 1, nullptr, nullptr) == ZX_OK);
zx::thread& thrd = thread_handles_[packet.key];
zx_exception_report_t report;
ZX_ASSERT(thrd.get_info(ZX_INFO_THREAD_EXCEPTION_REPORT, &report, sizeof(report), NULL,
NULL) == ZX_OK);
ZX_ASSERT(report.header.type == ZX_EXCP_FATAL_PAGE_FAULT);
// thrd_exit takes a parameter, but we don't actually read it when we join
zx_thread_state_general_regs_t regs;
ZX_ASSERT(thrd.read_state(ZX_THREAD_STATE_GENERAL_REGS, &regs, sizeof(regs)) == ZX_OK);
#if defined(__x86_64__)
regs.rip = reinterpret_cast<uintptr_t>(thrd_exit);
#else
regs.pc = reinterpret_cast<uintptr_t>(thrd_exit);
#endif
ZX_ASSERT(thrd.write_state(ZX_THREAD_STATE_GENERAL_REGS, &regs, sizeof(regs)) == ZX_OK);
uint32_t exception_state = ZX_EXCEPTION_STATE_HANDLED;
ZX_ASSERT(exception.set_property(ZX_PROP_EXCEPTION_STATE, &exception_state,
sizeof(exception_state)) == ZX_OK);
ZX_ASSERT(channel.wait_async(port, packet.key, ZX_CHANNEL_READABLE | ZX_CHANNEL_PEER_CLOSED,
0) == ZX_OK);
} else {
running_count--;
}
}
}
for (unsigned i = 0; i < std::size(threads_); i++) {
thrd_join(threads_[i], nullptr);
}
for (unsigned i = 0; i < (use_pager_ ? kNumVmoThreads : kNumThreads); i++) {
if (ptrs_[i]) {
zx::vmar::root_self()->unmap(ptrs_[i], vmo_size_);
}
}
return ZX_OK;
}
// This test case randomly creates vmos and COW clones, randomly writes into the vmos,
// and performs basic COW integrity checks.
//
// Each created vmo has a 32-bit id. These ids are monotonically increasing. Each vmo has
// its own 32-bit op-id, which is incremented on each write operation. These two 32-bit ids
// are combined into a single 64-bit id which uniquely identifies every write operation. The
// test uses these 64-bit ids to perform various COW integrity checks which are documented
// in more detail within the test implementation.
//
// This test generally does not handle id overflow due to the fact that the random teardown
// of various parts of the test makes the chance of overflow vanishingly small.
class CowCloneTestInstance : public TestInstance {
public:
CowCloneTestInstance(VmStressTest* test, uint64_t instance_id)
: TestInstance(test, instance_id) {}
zx_status_t Start() final;
zx_status_t Stop() final;
private:
int op_thread();
// Aggregate for holding the test info associated with a single vmo.
struct TestData : public fbl::RefCounted<struct test_data> {
TestData(uint32_t id, uint32_t data_idx, zx::vmo test_vmo, uint32_t pc, uint32_t offset_page,
uintptr_t mapped_ptr, fbl::RefPtr<struct TestData> p, uint32_t clone_start_op,
uint32_t clone_end_op)
: vmo_id(id),
idx(data_idx),
vmo(std::move(test_vmo)),
page_count(pc),
offset_page_idx(offset_page),
ptr(mapped_ptr),
parent(std::move(p)),
parent_clone_start_op_id(clone_start_op),
parent_clone_end_op_id(clone_end_op) {}
// An identifer for the vmo.
const uint32_t vmo_id;
// The index of the test data in the test_datas_ array.
const uint32_t idx;
// The vmo under test.
zx::vmo vmo;
// The number of pages in the vmo.
const uint32_t page_count;
// The page offset into the parent where this vmo starts. This has no
// meaning if this vmo has no parent.
const uint32_t offset_page_idx;
// The pointer to the vmo mapping.
const uintptr_t ptr;
// A pointer to the TestData struct which holds the parent of |vmo|, or
// null if |vmo| has no parent.
//
// Note that this reference does not keep the parent->vmo handle from being closed.
const fbl::RefPtr<struct TestData> parent;
// The id of the parent TestData at the beginning and end of the clone operation
// which created this vmo. Used for integrity checks.
const uint32_t parent_clone_start_op_id;
const uint32_t parent_clone_end_op_id;
// This can technically overflow, but the chance of a VMO living
// long enough for that to happen is astronomically low.
std::atomic<uint32_t> next_op_id = 1;
};
// Debug function for printing information associated with a particular access operation.
void DumpTestVmoAccessInfo(const fbl::RefPtr<TestData>& vmo, uint32_t page_index, uint64_t val);
static constexpr uint64_t kMaxTestVmos = 32;
static constexpr uint64_t kMaxVmoPageCount = 128;
struct {
fbl::RefPtr<struct TestData> vmo;
// Shared mutex which protects |vmo|. The mutex is taken in exclusive mode
// when creating/destroying the |vmo| in this slot. It is taken in shared
// mode when writing or when creating a clone based on this slot.
std::shared_mutex mtx;
} test_datas_[kMaxTestVmos];
// Helper function that creates a new test vmo that will be inserted at |idx| in test_datas_.
fbl::RefPtr<TestData> CreateTestVmo(uint32_t idx, StressTest::Rng& rng);
// Helper function that performs a write operation on |TestData|, which is currently
// in |idx| in test_datas_.
bool TestVmoWrite(uint32_t idx, const fbl::RefPtr<TestData>& TestData, StressTest::Rng& rng);
thrd_t threads_[kNumThreads] = {};
std::atomic<bool> shutdown_{false};
// Id counter for vmos.
std::atomic<uint32_t> next_vmo_id_{1};
// Limit for next_vmo_id_ to prevent overflow concerns.
static constexpr uint32_t kMaxVmoId = UINT32_MAX - kNumThreads;
static uint32_t get_op_id(uint64_t full_id) { return static_cast<uint32_t>(full_id >> 32); }
static uint32_t get_vmo_id(uint64_t full_id) { return full_id & 0xffffffff; }
static uint64_t make_full_id(uint32_t vmo_id, uint32_t op_id) {
return vmo_id | (static_cast<uint64_t>(op_id) << 32);
}
};
zx_status_t CowCloneTestInstance::Start() {
for (unsigned i = 0; i < kNumThreads; i++) {
auto fn = [](void* arg) -> int { return static_cast<CowCloneTestInstance*>(arg)->op_thread(); };
thrd_create_with_name(threads_ + i, fn, this, "op_worker");
}
return ZX_OK;
}
zx_status_t CowCloneTestInstance::Stop() {
shutdown_.store(true);
bool success = true;
for (unsigned i = 0; i < kNumThreads; i++) {
int32_t res;
thrd_join(threads_[i], &res);
success &= (res == 0);
}
for (unsigned i = 0; i < kMaxTestVmos; i++) {
if (test_datas_[i].vmo) {
zx::vmar::root_self()->unmap(test_datas_[i].vmo->ptr,
test_datas_[i].vmo->page_count * zx_system_get_page_size());
}
}
if (!success) {
PrintfAlways("Test failure, hanging to preserve state\n");
zx_nanosleep(ZX_TIME_INFINITE);
}
return ZX_OK;
}
void CowCloneTestInstance::DumpTestVmoAccessInfo(const fbl::RefPtr<TestData>& vmo,
uint32_t page_index, uint64_t val) {
PrintfAlways("Got value %lx (%x)\n", val, page_index);
zx_info_vmo_t info;
ZX_ASSERT(vmo->vmo.get_info(ZX_INFO_VMO, &info, sizeof(info), nullptr, nullptr) == ZX_OK);
PrintfAlways("koid=%lx(%lu)\n", info.koid, info.koid);
PrintfAlways("vmo ids are: ");
auto cur = vmo;
while (cur) {
PrintfAlways("%x ", cur->vmo_id);
cur = cur->parent;
}
PrintfAlways("\n");
}
fbl::RefPtr<CowCloneTestInstance::TestData> CowCloneTestInstance::CreateTestVmo(
uint32_t idx, StressTest::Rng& rng) {
uint32_t parent_idx = uniform_rand<uint32_t>(kMaxTestVmos, rng);
auto& parent_vmo = test_datas_[parent_idx];
zx::vmo vmo;
fbl::RefPtr<struct TestData> parent;
uint32_t parent_clone_start_op_id;
uint32_t parent_clone_end_op_id;
uint32_t page_count = uniform_rand_range<uint32_t>(1, kMaxVmoPageCount, rng);
uint32_t page_offset = 0;
if (parent_idx != idx) {
if (!parent_vmo.mtx.try_lock_shared()) {
// If something has an exclusive lock on the target vmo,
// then just abort the operation.
return nullptr;
}
if (parent_vmo.vmo) {
parent = parent_vmo.vmo;
page_offset = uniform_rand(parent->page_count, rng);
parent_clone_start_op_id = parent->next_op_id.load();
zx_status_t status =
parent->vmo.create_child(ZX_VMO_CHILD_SNAPSHOT, page_offset * zx_system_get_page_size(),
page_count * zx_system_get_page_size(), &vmo);
ZX_ASSERT_MSG(status == ZX_OK, "Failed to clone vmo %d", status);
parent_clone_end_op_id = parent->next_op_id.load();
} else {
// There's no parent, so we're just going to create a new vmo.
parent = nullptr;
}
parent_vmo.mtx.unlock_shared();
} else {
// We're creating a vmo at |idx|, so there isn't a vmo there now.
parent = nullptr;
}
if (!parent) {
parent_clone_start_op_id = parent_clone_end_op_id = 0;
zx_status_t status = zx::vmo::create(page_count * zx_system_get_page_size(), 0, &vmo);
ZX_ASSERT_MSG(status == ZX_OK, "Failed to clone vmo %d", status);
}
uintptr_t ptr;
zx::vmar::root_self()->map(ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, vmo, 0,
page_count * zx_system_get_page_size(), &ptr);
uint32_t vmo_id = next_vmo_id_.fetch_add(1);
// The chance that an individual instance lives this long is vanishingly small, and it
// would take a long time. So just abort the test so we don't have to deal with it.
ZX_ASSERT(vmo_id < kMaxVmoId);
auto res = fbl::MakeRefCounted<TestData>(vmo_id, idx, std::move(vmo), page_count, page_offset,
ptr, std::move(parent), parent_clone_start_op_id,
parent_clone_end_op_id);
ZX_ASSERT(res);
return res;
}
bool CowCloneTestInstance::TestVmoWrite(uint32_t idx, const fbl::RefPtr<TestData>& test_data,
StressTest::Rng& rng) {
uint32_t page_idx = uniform_rand(test_data->page_count, rng);
auto p = reinterpret_cast<std::atomic_uint64_t*>(test_data->ptr +
page_idx * zx_system_get_page_size());
// We want the ids to be atomically increasing. To prevent races between two
// threads at the same location mixing up the order of their op-ids, do a cmpxchg
// and regenerate the op-id if we see a race.
uint64_t old = p->load();
uint32_t my_op_id = test_data->next_op_id.fetch_add(1);
uint64_t desired = make_full_id(test_data->vmo_id, my_op_id);
while (!p->compare_exchange_strong(old, desired)) {
my_op_id = test_data->next_op_id.fetch_add(1);
desired = make_full_id(test_data->vmo_id, my_op_id);
}
uint32_t write_vmo_id = get_vmo_id(old);
if (write_vmo_id == test_data->vmo_id) {
// If the vmo id is for this vmo, then the old op id must be
// less than whatever we wrote.
if (get_op_id(old) < get_op_id(desired)) {
return true;
} else {
PrintfAlways("Got high op id for current vmo\n");
DumpTestVmoAccessInfo(test_data, page_idx, old);
return false;
}
} else if (write_vmo_id == 0) {
// Nothing has ever written to the page.
if (old == 0) {
return true;
} else {
PrintfAlways("Got non-zero op id for zero vmo id\n");
DumpTestVmoAccessInfo(test_data, page_idx, old);
return false;
}
}
// Look up the parent chain for the vmo which is responsible for writing
// the old data that we saw.
auto cur = test_data;
uint32_t parent_idx = page_idx;
while (cur != nullptr) {
// cur isn't responsible for writing the data, so it must have an ancestor that did it.
ZX_ASSERT(cur->parent);
parent_idx += cur->offset_page_idx;
// The index we're inspecting lies past the end of the parent, which means
// it was initialized to 0 in cur and we somehow didn't see the vmo originally
// responsible for the write.
if (parent_idx >= cur->parent->page_count) {
PrintfAlways("Parent search overflow\n");
DumpTestVmoAccessInfo(test_data, page_idx, old);
return false;
}
if (cur->parent->vmo_id != write_vmo_id) {
// No match, so continue up the chain.
cur = cur->parent;
continue;
}
// The op id we saw must be smaller than the next op id at the time of the clone op.
if (get_op_id(old) >= cur->parent_clone_end_op_id) {
PrintfAlways("Got op-id from after clone operation\n");
DumpTestVmoAccessInfo(test_data, page_idx, old);
return false;
}
// It's possible that the parent vmo has already been destroyed, so
// lock its index and check if it's what we expect.
auto& maybe_parent = test_datas_[cur->parent->idx];
if (cur->parent->idx != cur->idx && maybe_parent.mtx.try_lock_shared()) {
if (maybe_parent.vmo == cur->parent) {
auto val = reinterpret_cast<std::atomic_uint64_t*>(maybe_parent.vmo->ptr +
parent_idx * zx_system_get_page_size())
->load();
// If the clone sees a particular write_vmo_id, then that means the VMO
// with the associated id wrote to that address before the clone operation.
// Once that happens, the write ID that ancestor sees can't change.
// Furthermore, the op ID can only increase.
if (get_vmo_id(val) != write_vmo_id || (get_op_id(val) < get_op_id(old))) {
DumpTestVmoAccessInfo(test_data, page_idx, old);
DumpTestVmoAccessInfo(maybe_parent.vmo, parent_idx, val);
shutdown_.store(true);
maybe_parent.mtx.unlock_shared();
return false;
}
}
maybe_parent.mtx.unlock_shared();
}
break;
}
if (cur == nullptr) {
// We somehow didn't find what performed the write.
PrintfAlways("Parent search failure\n");
DumpTestVmoAccessInfo(test_data, page_idx, old);
return false;
}
return true;
}
int CowCloneTestInstance::op_thread() {
auto rng = RngGen();
while (!shutdown_.load()) {
uint32_t idx = uniform_rand<uint32_t>(kMaxTestVmos, rng);
auto& test_data = test_datas_[idx];
uint32_t rand_op = uniform_rand(1000, rng);
// 0 -> 14: create vmo
// 15 -> 19: destroy vmo
// 20 -> 999: random write
if (rand_op < 20) {
test_data.mtx.lock();
if (rand_op < 14 && test_data.vmo == nullptr) {
test_data.vmo = CreateTestVmo(idx, rng);
} else if (rand_op >= 15 && test_data.vmo != nullptr) {
for (unsigned i = 0; i < test_data.vmo->page_count; i++) {
auto val = reinterpret_cast<std::atomic_uint64_t*>(test_data.vmo->ptr +
i * zx_system_get_page_size())
->load();
// vmo ids are monotonically increasing, so we shouldn't see
// any ids greater than the current vmo's.
if (get_vmo_id(val) > test_data.vmo->vmo_id) {
DumpTestVmoAccessInfo(test_data.vmo, i, val);
shutdown_.store(true);
test_data.mtx.unlock();
return -1;
}
}
zx::vmar::root_self()->unmap(test_data.vmo->ptr,
test_data.vmo->page_count * zx_system_get_page_size());
test_data.vmo->vmo.reset();
test_data.vmo = nullptr;
}
test_data.mtx.unlock();
} else {
test_data.mtx.lock_shared();
if (test_data.vmo != nullptr) {
if (!TestVmoWrite(idx, test_data.vmo, rng)) {
test_data.mtx.unlock_shared();
return -1;
}
}
test_data.mtx.unlock_shared();
}
}
return 0;
}
// This test instances runs multiple VMOs across multiple threads and is trying to trigger unusual
// race conditions and kernel failures that come from mixing parallelism of all kinds of operations.
// The trade off is that the test almost never knows what the outcome of an operation should be and
// so this only catches bugs where we can ultimately trip a kernel assert or something similar.
class MultiVmoTestInstance : public TestInstance {
public:
MultiVmoTestInstance(VmStressTest* test, uint64_t instance_id, uint64_t mem_limit)
: TestInstance(test, instance_id),
memory_limit_pages_(mem_limit / zx_system_get_page_size()),
// Scale our maximum threads to ensure that if all threads allocate a full size vmo (via
// copy-on-write or otherwise) we wouldn't exceed our memory limit
max_threads_(std::min(memory_limit_pages_ / kMaxVmoPages, kMaxThreads)) {}
zx_status_t Start() override {
// If max threads was calculated smaller than low threads then that means we really don't have
// much memory. Don't try and recover this case, just fail.
if (max_threads_ < low_threads_) {
PrintfAlways("Not enough free memory to run test instance\n");
return ZX_ERR_NO_MEMORY;
}
if (shutdown_) {
return ZX_ERR_INTERNAL;
}
zx::unowned_resource iommu_resource = IommuResource();
if (*iommu_resource) {
zx_iommu_desc_dummy_t desc;
zx_status_t result =
zx::iommu::create(*iommu_resource, ZX_IOMMU_TYPE_DUMMY, &desc, sizeof(desc), &iommu_);
if (result != ZX_OK) {
return result;
}
result = zx::bti::create(iommu_, 0, 0xdeadbeef, &bti_);
if (result != ZX_OK) {
return ZX_OK;
}
}
auto rng = RngGen();
spawn_root_vmo(rng);
return ZX_OK;
}
zx_status_t Stop() override {
// Signal shutdown and wait for everyone. Its possible for living_threads_ to increase after
// shutdown_ is set if a living thread creates another thread. This is fine since it means
// living_threads goes from a non-zero value to a non-zero value, but it will never go from 0
// to non-zero.
shutdown_ = true;
while (living_threads_ > 0) {
zx::nanosleep(zx::deadline_after(zx::msec(500)));
}
return ZX_OK;
}
uint64_t get_max_threads() const { return max_threads_; }
private:
void spawn_root_vmo(StressTest::Rng& rng) {
zx::vmo vmo;
bool reliable_mappings = true;
uint64_t vmo_size = uniform_rand(kMaxVmoPages, rng) * zx_system_get_page_size();
// Mix in contiguous VMOs as a significant fraction, since physical page loaning and borrowing
// needs to get covered heavily. Also, some other VMO operations work differently on
// contiguous VMOs.
if (bti_ && uniform_rand(2, rng) == 0) {
zx_status_t result = zx::vmo::create_contiguous(bti_, vmo_size, 0, &vmo);
if (result != ZX_OK) {
return;
}
is_contiguous_ = true;
} else {
uint32_t options = 0;
// Skew away from resizable VMOs as they are not common and many operations don't work.
if (uniform_rand(4, rng) == 0) {
options |= ZX_VMO_RESIZABLE;
reliable_mappings = false;
}
if (uniform_rand(2, rng) == 0) {
zx::pager pager;
zx::port port;
zx_status_t result = zx::pager::create(0, &pager);
ZX_ASSERT(result == ZX_OK);
result = zx::port::create(0, &port);
ZX_ASSERT(result == ZX_OK);
if (uniform_rand(2, rng) == 0) {
options |= ZX_VMO_TRAP_DIRTY;
}
result = pager.create_vmo(options, port, 0, vmo_size, &vmo);
ZX_ASSERT(result == ZX_OK);
// Randomly discard reliable mappings even though not resizable to give the pager a chance
// to generate faults on non-resizable vmos.
if (reliable_mappings && uniform_rand(4, rng) == 0) {
reliable_mappings = false;
}
// Force spin up the pager thread as it's required to ensure the VMO threads do not block
// forever.
zx::vmo dup_vmo;
result = vmo.duplicate(ZX_RIGHT_SAME_RIGHTS, &dup_vmo);
ZX_ASSERT(result == ZX_OK);
// Stash the pager handle value to be passed to the writeback thread below before
// transferring ownership to the pager thread. A pager handle cannot be duplicated, so we
// cannot create another handle for the writeback thread to use. We also cannot make the
// pager a class member because we might spawn another root VMO which will take a new pager.
const zx_handle_t pager_handle = pager.get();
if (!make_thread([this, pager = std::move(pager), port = std::move(port),
vmo = std::move(dup_vmo)]() mutable {
pager_thread(std::move(pager), std::move(port), std::move(vmo));
})) {
// if the pager thread couldn't spin up bail right now and don't make the client thread as
// that client thread will either block or hard crash, either scenario will make that
// thread unrecoverable.
return;
}
// Now that the pager thread has been successfully created, spin up the writeback thread
// as well.
result = vmo.duplicate(ZX_RIGHT_SAME_RIGHTS, &dup_vmo);
ZX_ASSERT(result == ZX_OK);
make_thread([this, vmo = std::move(dup_vmo), pager_handle]() mutable {
writeback_thread(pager_handle, std::move(vmo));
});
} else {
zx_status_t result = zx::vmo::create(vmo_size, options, &vmo);
ZX_ASSERT(result == ZX_OK);
}
}
auto ops = make_ops(rng);
make_thread([this, vmo = std::move(vmo), ops = std::move(ops), reliable_mappings]() mutable {
op_thread(std::move(vmo), std::move(ops), reliable_mappings);
});
}
// TODO: pager_thread currently just fulfills any page faults correctly. This should be expanded
// to detach, error ranges, pre-supply pages etc.
void pager_thread(zx::pager pager, zx::port port, zx::vmo vmo) {
// To exit the pager thread we need to know once we have the only reference to the vmo. This
// requires two things, our vmo handle be the only handle to that vmo, and the vmo have no
// children. The first condition has no signal and so until we know we have the only handle we
// will used timed waits and poll. Once we are the solo_owner, tracked in this variable, we
// will be able to use the zero children signal.
bool solo_owner = false;
while (1) {
zx_port_packet_t packet;
zx_status_t result =
port.wait(solo_owner ? zx::time::infinite() : zx::deadline_after(zx::msec(100)), &packet);
if (result == ZX_ERR_TIMED_OUT) {
zx_info_handle_count_t info;
result = vmo.get_info(ZX_INFO_HANDLE_COUNT, &info, sizeof(info), nullptr, nullptr);
ZX_ASSERT(result == ZX_OK);
// Check if we have the only handle
if (info.handle_count == 1) {
// Start watching for the zero children signal.
result = vmo.wait_async(port, 1, ZX_VMO_ZERO_CHILDREN, 0);
ZX_ASSERT(result == ZX_OK);
solo_owner = true;
}
continue;
}
if (packet.key == 1) {
ZX_ASSERT(solo_owner);
zx_pager_vmo_stats_t stats;
zx_status_t status = zx_pager_query_vmo_stats(
pager.get(), vmo.get(), ZX_PAGER_RESET_VMO_STATS, &stats, sizeof(stats));
ZX_ASSERT_MSG(status == ZX_OK, "Failed to query VMO stats %s\n",
zx_status_get_string(status));
const bool was_modified = (stats.modified == ZX_PAGER_VMO_STATS_MODIFIED);
// Writeback any dirty pages before exiting.
zx_vmo_dirty_range_t ranges[10];
size_t actual = 0, avail = 0;
size_t vmo_size;
status = vmo.get_size(&vmo_size);
ZX_ASSERT_MSG(status == ZX_OK, "Failed to get VMO size %s\n", zx_status_get_string(status));
uint64_t start = 0, len = vmo_size;
do {
status = zx_pager_query_dirty_ranges(pager.get(), vmo.get(), start, len, &ranges[0],
sizeof(ranges), &actual, &avail);
ZX_ASSERT_MSG(status == ZX_OK, "Failed to query dirty ranges %s\n",
zx_status_get_string(status));
if (actual == 0) {
break;
}
for (size_t i = 0; i < actual; i++) {
status = zx_pager_op_range(pager.get(), ZX_PAGER_OP_WRITEBACK_BEGIN, vmo.get(),
ranges[i].offset, ranges[i].length, ranges[i].options);
ZX_ASSERT_MSG(status == ZX_OK, "Failed to begin writeback %s\n",
zx_status_get_string(status));
status = zx_pager_op_range(pager.get(), ZX_PAGER_OP_WRITEBACK_END, vmo.get(),
ranges[i].offset, ranges[i].length, 0);
ZX_ASSERT_MSG(status == ZX_OK, "Failed to end writeback %s\n",
zx_status_get_string(status));
}
uint64_t new_start = ranges[actual - 1].offset + ranges[actual - 1].length;
len -= (new_start - start);
start = new_start;
} while (avail > actual);
ZX_ASSERT(avail == actual);
status = zx_pager_query_dirty_ranges(pager.get(), vmo.get(), 0, vmo_size, nullptr, 0,
nullptr, &avail);
ZX_ASSERT_MSG(avail == 0, "Found %zu dirty ranges after writeback\n", avail);
// No one should have modified the VMO since the last time we reset modified.
status = zx_pager_query_vmo_stats(pager.get(), vmo.get(), ZX_PAGER_RESET_VMO_STATS, &stats,
sizeof(stats));
ZX_ASSERT_MSG(status == ZX_OK, "Failed to query VMO stats %s\n",
zx_status_get_string(status));
ZX_ASSERT_MSG(stats.modified == 0, "VMO was modified (prev query: %d)\n", was_modified);
// No children, and we have the only handle. Done.
break;
}
ZX_ASSERT(packet.key == 0);
ZX_ASSERT(packet.type == ZX_PKT_TYPE_PAGE_REQUEST);
if (packet.page_request.command == ZX_PAGER_VMO_COMPLETE) {
// VMO is finished, so we have nothing to do. Technically since we have a handle to the vmo
// this case will never happen.
break;
} else if (packet.page_request.command != ZX_PAGER_VMO_READ &&
packet.page_request.command != ZX_PAGER_VMO_DIRTY) {
PrintfAlways("Unknown page_request command %d\n", packet.page_request.command);
return;
}
// No matter what we decide to do we *MUST* ensure we also fullfill the page fault in some way
// otherwise we risk blocking the faulting thread (and also ourselves) forever. Above all we
// must guarantee that the op_thread can progress to the point of closing the VMO such that
// we end up with the only VMO handle.
zx::vmo aux_vmo;
if (packet.page_request.command == ZX_PAGER_VMO_READ) {
if (zx::vmo::create(packet.page_request.length, 0, &aux_vmo) != ZX_OK) {
PrintfAlways("Failed to create VMO of length %" PRIu64 " to fulfill page fault\n",
packet.page_request.length);
return;
}
}
do {
// Get the latest VMO size, since using a cached value could get out of sync since we
// intentionally may call resize from multiple threads concurrently.
uint64_t vmo_size;
zx_status_t get_size_status = vmo.get_size(&vmo_size);
ZX_ASSERT(get_size_status == ZX_OK);
if (packet.page_request.offset >= vmo_size) {
// Nothing to fulfill since the requested range has been seen to be entirely outside the
// VMO after the request was created by the kernel.
result = ZX_OK;
break;
}
ZX_ASSERT(packet.page_request.offset < vmo_size);
uint64_t trimmed_length =
std::min(packet.page_request.length, vmo_size - packet.page_request.offset);
if (packet.page_request.command == ZX_PAGER_VMO_READ) {
ZX_ASSERT(aux_vmo.is_valid());
result = pager.supply_pages(vmo, packet.page_request.offset, trimmed_length, aux_vmo, 0);
} else {
ZX_ASSERT(packet.page_request.command == ZX_PAGER_VMO_DIRTY);
result =
pager.op_range(ZX_PAGER_OP_DIRTY, vmo, packet.page_request.offset, trimmed_length, 0);
// ZX_ERR_NOT_FOUND is an acceptable error status as pages that have yet to be dirtied are
// clean and can get evicted.
if (result == ZX_ERR_NOT_FOUND) {
result = ZX_OK;
}
}
// If the underlying VMO was resized then its possible the destination offset is now out of
// range. This is okay and we can try again if necessary to fulfill the pages that aren't
// beyond the VMO size. In any other case something has gone horribly wrong.
} while (result == ZX_ERR_OUT_OF_RANGE);
if (result != ZX_OK) {
PrintfAlways("Failed to %s pages: %d\n",
packet.page_request.command == ZX_PAGER_VMO_READ ? "supply" : "dirty", result);
return;
}
}
}
void writeback_thread(zx_handle_t pager, zx::vmo vmo) {
uint64_t total_attempts = 0, successful_attempts = 0;
while (!shutdown_) {
// If there are only two handles remaining to the VMO, these will belong to the pager thread
// and the writeback thread, i.e. no op_thread's will access the VMO anymore. Tear down the
// writeback thread too so that the pager thread can clean up. This ensures that no threads
// corresponding to this VMO are left lingering behind and our spawn_root_vmo logic in
// make_thread works as intended, and creates new VMOs (and new pagers) and new threads for
// those VMOs.
zx_info_handle_count_t info;
zx_status_t status =
vmo.get_info(ZX_INFO_HANDLE_COUNT, &info, sizeof(info), nullptr, nullptr);
ZX_ASSERT(status == ZX_OK);
if (info.handle_count == 2) {
break;
}
// Ensure we can query VMO stats successfully.
zx_pager_vmo_stats_t stats;
status = zx_pager_query_vmo_stats(pager, vmo.get(), 0, &stats, sizeof(stats));
ZX_ASSERT_MSG(status == ZX_OK, "Failed to query VMO stats %s\n",
zx_status_get_string(status));
const bool was_modified = (stats.modified == ZX_PAGER_VMO_STATS_MODIFIED);
status = zx_pager_query_vmo_stats(pager, vmo.get(), ZX_PAGER_RESET_VMO_STATS, &stats,
sizeof(stats));
ZX_ASSERT_MSG(status == ZX_OK, "Failed to query VMO stats %s\n",
zx_status_get_string(status));
// The first query did not request a reset, so if the VMO was modified before it should be
// modified now as well. If the VMO was not previously modified though, it's possible for it
// to have gotten modified between the two calls, so we cannot make any assertions in that
// case. For the same reason we cannot check that the modified state is cleared after the
// second query.
ZX_ASSERT_MSG(!was_modified || stats.modified == ZX_PAGER_VMO_STATS_MODIFIED,
"VMO modified state: %u (prev: %d)\n", stats.modified, was_modified);
zx_vmo_dirty_range_t ranges[10];
size_t actual = 0, avail = 0;
size_t vmo_size;
status = vmo.get_size(&vmo_size);
ZX_ASSERT_MSG(status == ZX_OK, "Failed to get VMO size %s\n", zx_status_get_string(status));
uint64_t start = 0, remaining_len = vmo_size;
// Make a single pass over the entire VMO during each writeback attempt. The goal is to write
// back as much as possible without having to keep retrying in the face of competing writes
// and resizes. It is fine if the size we queried above becomes stale; the loop will ignore
// any OUT_OF_RANGE errors in the case of a resize down, and a larger size will be written
// back in the next attempt.
do {
status = zx_pager_query_dirty_ranges(pager, vmo.get(), start, remaining_len, &ranges[0],
sizeof(ranges), &actual, &avail);
ZX_ASSERT_MSG(status == ZX_OK || status == ZX_ERR_OUT_OF_RANGE,
"Failed to query dirty ranges %s\n", zx_status_get_string(status));
// Nothing more to do if we could not query dirty ranges successfully. The VMO was resized
// down from under us. Retry in the next iteration of the outer loop.
if (status == ZX_ERR_OUT_OF_RANGE) {
break;
}
// No (more) dirty ranges.
if (avail == 0) {
remaining_len = 0;
break;
}
ZX_ASSERT(actual > 0);
uint64_t new_start;
for (size_t i = 0; i < actual; i++) {
status = zx_pager_op_range(pager, ZX_PAGER_OP_WRITEBACK_BEGIN, vmo.get(),
ranges[i].offset, ranges[i].length, ranges[i].options);
ZX_ASSERT_MSG(status == ZX_OK || status == ZX_ERR_OUT_OF_RANGE,
"Failed to begin writeback %s\n", zx_status_get_string(status));
if (status == ZX_ERR_OUT_OF_RANGE) {
break;
}
status = zx_pager_op_range(pager, ZX_PAGER_OP_WRITEBACK_END, vmo.get(), ranges[i].offset,
ranges[i].length, 0);
ZX_ASSERT_MSG(status == ZX_OK || status == ZX_ERR_OUT_OF_RANGE,
"Failed to end writeback %s\n", zx_status_get_string(status));
if (status == ZX_ERR_OUT_OF_RANGE) {
break;
}
new_start = ranges[i].offset + ranges[i].length;
}
if (new_start < ranges[actual - 1].offset + ranges[actual - 1].length) {
break;
}
remaining_len -= (new_start - start);
start = new_start;
} while (remaining_len > 0);
if (remaining_len == 0) {
successful_attempts++;
}
total_attempts++;
// The rate of writeback is kept relatively high to stress the system.
zx::nanosleep(zx::deadline_after(zx::msec(200)));
}
if (successful_attempts != total_attempts) {
PrintfAlways("Successful writeback attempts: %lu (out of %lu)\n", successful_attempts,
total_attempts);
}
}
// This is the main function that performs continuous operations on a vmo. It may try to spawn
// additional threads for parallelism, but they all share the same op counter to prevent any
// particular vmo hierarchy living 'forever' by spawning new children all the time.
void op_thread(zx::vmo vmo, std::shared_ptr<std::atomic<uint64_t>> op_count,
bool reliable_mappings) {
auto rng = RngGen();
zx::pmt pmt;
std::optional<cpp20::span<uint8_t>> mapping;
auto unmap_mapping = [&mapping]() {
if (auto span = mapping) {
zx::vmar::root_self()->unmap(reinterpret_cast<uintptr_t>(span->data()), span->size_bytes());
mapping = std::nullopt;
}
};
auto cleanup = fit::defer([&unmap_mapping, &pmt]() {
unmap_mapping();
if (pmt) {
pmt.unpin();
}
});
// Query for the current size of the vmo. This could change due to other threads with handles to
// this vmo calling set-size, but should ensure a decent hit rate of random range operations.
uint64_t vmo_size;
if (vmo.get_size(&vmo_size) != ZX_OK) {
vmo_size = kMaxVmoPages * zx_system_get_page_size();
}
while (!shutdown_ && op_count->fetch_add(1) < kMaxOps) {
// Produce a random offset and size up front since many ops will need it.
uint64_t op_off, op_size;
random_off_size(rng, vmo_size, &op_off, &op_size);
switch (uniform_rand(141, rng)) {
case 0: // give up early
Printf("G");
return;
break;
case 1 ... 10: { // duplicate
Printf("D");
zx::vmo dup_vmo;
zx_status_t result = vmo.duplicate(ZX_RIGHT_SAME_RIGHTS, &dup_vmo);
if (result != ZX_OK) {
PrintfAlways("vmo.duplicate() failed - result: %d\n", result);
}
ZX_ASSERT(result == ZX_OK);
make_thread(
[this, dup = std::move(dup_vmo), ops = op_count, reliable_mappings]() mutable {
op_thread(std::move(dup), std::move(ops), reliable_mappings);
});
break;
}
case 11 ... 20: { // read
Printf("R");
std::vector<uint8_t> buffer;
bool use_map = false;
if (mapping.has_value() && uniform_rand(2, rng) == 0) {
op_off = uniform_rand(mapping.value().size_bytes(), rng);
op_size = uniform_rand(mapping.value().size_bytes() - op_off, rng);
use_map = true;
}
// Avoid ubsan complaints by making the size at least 1, so that &buffer[start] works
// below.
buffer.resize(std::max(1ul, op_size));
// pre-commit some portion of the buffer
const size_t end = uniform_rand(op_size, rng);
const size_t start = uniform_rand(op_size, rng);
memset(&buffer[start], 42, end - std::min(end, start));
if (use_map) {
memcpy(buffer.data(), &mapping.value()[op_off], op_size);
} else {
vmo.read(buffer.data(), op_off, op_size);
}
break;
}
case 21 ... 30: { // write
Printf("W");
std::vector<uint8_t> buffer;
bool use_map = false;
if (mapping.has_value() && uniform_rand(2, rng) == 0) {
op_off = uniform_rand(mapping.value().size_bytes(), rng);
op_size = uniform_rand(mapping.value().size_bytes() - op_off, rng);
use_map = true;
}
// Avoid ubsan complaints by making the size at least 1, so that &buffer[start] works
// below.
buffer.resize(std::max(1ul, op_size));
// write some portion of the buffer with 'random' data.
const size_t end = uniform_rand(op_size, rng);
const size_t start = uniform_rand(op_size, rng);
memset(&buffer[start], 42, end - std::min(end, start));
if (use_map) {
memcpy(&mapping.value()[op_off], buffer.data(), op_size);
} else {
vmo.write(buffer.data(), op_off, op_size);
}
break;
}
case 31 ... 40: // vmo_set_size
Printf("S");
vmo.set_size(uniform_rand(kMaxVmoPages * zx_system_get_page_size(), rng));
break;
case 81 ... 100: // commit
Printf("c");
vmo.op_range(ZX_VMO_OP_COMMIT, op_off, op_size, nullptr, 0);
break;
case 101 ... 120: // decommit
Printf("d");
vmo.op_range(ZX_VMO_OP_DECOMMIT, op_off, op_size, nullptr, 0);
break;
case 121 ... 130: { // vmo_op_range (other than COMMIT/DECOMMIT which have their own cases)
Printf("O");
static const uint32_t ops[] = {ZX_VMO_OP_ZERO,
ZX_VMO_OP_LOCK,
ZX_VMO_OP_UNLOCK,
ZX_VMO_OP_CACHE_SYNC,
ZX_VMO_OP_CACHE_INVALIDATE,
ZX_VMO_OP_CACHE_CLEAN,
ZX_VMO_OP_CACHE_CLEAN_INVALIDATE,
ZX_VMO_OP_DONT_NEED,
ZX_VMO_OP_TRY_LOCK};
vmo.op_range(ops[uniform_rand(std::size(ops), rng)], op_off, op_size, nullptr, 0);
break;
}
case 131 ... 140: { // transfer_data
Printf("T");
// Create a second VMO that will be used as either the source or the destination for a
// data transfer.
zx::vmo vmo2;
uint64_t vmo2_num_pages = uniform_rand(kMaxVmoPages, rng);
zx::vmo::create(vmo2_num_pages * zx_system_get_page_size(), 0, &vmo2);
// Compute the maximum number of pages we could transfer between these two VMOs, then
// pick a random number of pages below that threshold.
uint64_t vmo_num_pages = vmo_size / zx_system_get_page_size();
uint64_t max_num_transfer_pages =
vmo2_num_pages > vmo_num_pages ? vmo_num_pages : vmo2_num_pages;
uint64_t num_transfer_pages = uniform_rand(max_num_transfer_pages, rng);
// Compute possible offsets within the source and destination VMOs.
uint64_t max_offset_pages = max_num_transfer_pages - num_transfer_pages;
uint64_t src_offset_pages = uniform_rand(max_offset_pages, rng);
uint64_t dst_offset_pages = uniform_rand(max_offset_pages, rng);
// Translate all of our sizes and offsets, which are in units of PAGE_SIZE, to units of
// bytes.
uint64_t src_offset = src_offset_pages * zx_system_get_page_size();
uint64_t dst_offset = dst_offset_pages * zx_system_get_page_size();
uint64_t transfer_size = num_transfer_pages * zx_system_get_page_size();
// Transfer data from vmo to vmo2 on even random numbers, and from vmo2 to vmo on odd
// random numbers.
if (uniform_rand(100, rng) % 2) {
vmo.transfer_data(0, dst_offset, transfer_size, &vmo2, src_offset);
} else {
vmo2.transfer_data(0, dst_offset, transfer_size, &vmo, src_offset);
}
break;
}
case 41 ... 50: { // vmo_set_cache_policy
Printf("P");
static const uint32_t policies[] = {ZX_CACHE_POLICY_CACHED,
// TODO(https://fxbug.dev/42081323): On arm64, unaligned access to Device memory generates
// an alignment exception. Our current memcpy/memset implementations are designed to work
// on Normal memory and will result in unaligned access if the pointer arguments are not
// aligned. We should fix vmstress to never issue unaligned memcpy/memset to Device
// memory.
#if !defined(__aarch64__)
ZX_CACHE_POLICY_UNCACHED,
ZX_CACHE_POLICY_UNCACHED_DEVICE,
ZX_CACHE_POLICY_WRITE_COMBINING
#endif
};
vmo.set_cache_policy(policies[uniform_rand(std::size(policies), rng)]);
break;
}
case 51 ... 60: { // vmo_create_child
Printf("C");
static const uint32_t type[] = {
ZX_VMO_CHILD_SNAPSHOT, ZX_VMO_CHILD_SNAPSHOT_AT_LEAST_ON_WRITE, ZX_VMO_CHILD_SLICE,
ZX_VMO_CHILD_REFERENCE, ZX_VMO_CHILD_SNAPSHOT_MODIFIED};
uint32_t options = type[uniform_rand(std::size(type), rng)];
bool child_reliable_mappings = reliable_mappings;
if (uniform_rand(3, rng) == 0) {
options |= ZX_VMO_CHILD_RESIZABLE;
child_reliable_mappings = false;
}
if (uniform_rand(4, rng)) {
options |= ZX_VMO_CHILD_NO_WRITE;
}
zx::vmo child;
if (vmo.create_child(options, op_off, op_size, &child) == ZX_OK) {
make_thread([this, child = std::move(child), ops = op_count,
child_reliable_mappings]() mutable {
op_thread(std::move(child), std::move(ops), child_reliable_mappings);
});
}
break;
}
case 61 ... 70: { // vmar_map/unmap
// If reliable mappings is true it means we know that no one else is going to mess with
// the VMO in a way that would cause access to a valid mapping to generate a fault.
// Generally this means that the VMO is not resizable.
Printf("V");
if (reliable_mappings) {
if (!mapping.has_value() || uniform_rand(2, rng) == 0) {
uint32_t options = ZX_VM_PERM_READ | ZX_VM_PERM_WRITE;
if (uniform_rand(2, rng) == 0) {
options |= ZX_VM_MAP_RANGE;
}
zx_info_vmo_t info;
ZX_ASSERT(vmo.get_info(ZX_INFO_VMO, &info, sizeof(info), nullptr, nullptr) == ZX_OK);
if (info.flags & ZX_INFO_VMO_PAGER_BACKED) {
options |= ZX_VM_ALLOW_FAULTS;
}
zx_vaddr_t addr;
// Currently fault prevention isn't enforced in mappings and so we must be *very*
// careful to not map in outside the actual range of the vmo.
if (op_off + op_size <= vmo_size &&
zx::vmar::root_self()->map(options, 0, vmo, op_off, op_size, &addr) == ZX_OK) {
unmap_mapping();
mapping = cpp20::span<uint8_t>{reinterpret_cast<uint8_t*>(addr), op_size};
}
} else {
unmap_mapping();
}
}
break;
}
case 71 ... 80: { // bti_pin/bti_unpin
Printf("I");
if (bti_) {
if (pmt || uniform_rand(2, rng) == 0) {
zx::pmt new_pmt;
std::vector<zx_paddr_t> paddrs{op_size / zx_system_get_page_size(), 0};
if (bti_.pin(ZX_BTI_PERM_READ | ZX_BTI_PERM_WRITE, vmo, op_off, op_size,
paddrs.data(), paddrs.size(), &new_pmt) == ZX_OK) {
if (pmt) {
pmt.unpin();
}
pmt = std::move(new_pmt);
}
} else {
if (pmt) {
pmt.unpin();
}
}
}
break;
}
}
}
if (!shutdown_) {
// Achieved max ops.
Printf("M");
}
}
static void random_off_size(StressTest::Rng& rng, uint64_t vmo_size, uint64_t* off_out,
uint64_t* size_out) {
// When calculating out of bounds values pick a limit that still gives chance to be in bounds
constexpr uint64_t kOobLimitPages = kMaxVmoPages * 2;
// We don't want a uniform distribution of offsets and sizes as a lot of interesting things
// happen with page alignment and entire vmo ranges.
switch (uniform_rand(5, rng)) {
case 0: // Anchor offset at 0
*off_out = 0;
break;
case 1: // Page aligned offset, in bounds
*off_out =
uniform_rand(vmo_size / zx_system_get_page_size(), rng) * zx_system_get_page_size();
break;
case 2: // Page aligned offset, out of bounds
*off_out = uniform_rand(kOobLimitPages, rng) * zx_system_get_page_size();
break;
case 3: // In bounds
*off_out = uniform_rand(vmo_size, rng);
break;
case 4: // Out of bounds
*off_out = uniform_rand(kOobLimitPages * zx_system_get_page_size(), rng);
break;
}
const uint64_t remaining = vmo_size - std::min(vmo_size, *off_out);
switch (uniform_rand(5, rng)) {
case 0: // Maximum remaining vmo size
*size_out = remaining;
break;
case 1: // In range page aligned size
*size_out =
uniform_rand(remaining / zx_system_get_page_size(), rng) * zx_system_get_page_size();
break;
case 2: // Out of range page aligned size
*size_out = uniform_rand(kOobLimitPages, rng) * zx_system_get_page_size();
break;
case 3: // In range size
*size_out = uniform_rand(remaining, rng);
break;
case 4: // Out of range size
*size_out = uniform_rand(kOobLimitPages * zx_system_get_page_size(), rng);
break;
}
}
// This wrapper spawns a new thread to run F and automatically updates the living_threads_ count
// and spawns any new root threads should we start running low.
template <typename F>
bool make_thread(F func) {
uint64_t prev_count = living_threads_.fetch_add(1);
if (prev_count >= max_threads_) {
living_threads_.fetch_sub(1);
return false;
}
std::thread t{[this, func = std::move(func)]() mutable {
func();
// Spawn threads *before* decrementing our count as the shutdown logic assumes once shutdown_
// then once living_threads_ becomes 0 it must never increment.
while (!shutdown_ && living_threads_ < low_threads_) {
auto rng = RngGen();
spawn_root_vmo(rng);
}
living_threads_.fetch_sub(1);
}};
t.detach();
return true;
}
std::shared_ptr<std::atomic<uint64_t>> make_ops(StressTest::Rng& rng) {
uint64_t start_ops = uniform_rand(kMaxOps, rng);
return std::make_shared<std::atomic<uint64_t>>(start_ops);
}
// To explore interesting scenarios, especially involving parallelism, we want to run every VMO
// tree for a decent number of ops, but not too long as at some point running longer is the same
// as just spawning a new tree. This number was chosen fairly arbitrarily, but given that all
// previous VM bugs had unit test reproductions in the <20 ops, this seems reasonable.
static constexpr uint64_t kMaxOps = 4096;
// 128 pages in a vmo should be all we need to create sufficiently interesting hierarchies, so
// cap our spending there. This allows us to spin up more threads and copy-on-write hierarchies
// without worrying that they all commit and blow the memory limit.
static constexpr uint64_t kMaxVmoPages = 128;
// While it might be possible for max_threads_ to be arbitrarily high and still not exhaust free
// memory, cap the number so that we don't end up spinning an arbitrarily large number of threads
// on a system with large amounts of memory. An exceedingly large number of threads can slow down
// the overall system and cause test timeouts in certain environments (like in CQ, where the
// ssh-keep-alive does not get a chance to get scheduled and the ssh connection to the target gets
// dropped). The parallelism offered by 300 threads should be sufficiently interesting.
static constexpr uint64_t kMaxThreads = 300;
// This will be set to the total memory limit (in pages) that this test instance is constructed
// with. We should not spend more than that.
const uint64_t memory_limit_pages_;
// The maximum number of threads we can create that will not cause us to exceed our memory limit.
const uint64_t max_threads_;
// Generally we don't want too many threads, so set low_threads (which is the threshold at which
// we start spawning more root threads) to be fairly low. max_threads_ can be arbitrarily high,
// which allows our low amount of root threads to (potentially) spin up a lot of parallelism.
const uint64_t low_threads_ = 8;
// Set to true when we are trying to shutdown.
std::atomic<bool> shutdown_ = false;
// Number of alive threads. Used to coordinate shutdown.
std::atomic<uint64_t> living_threads_ = 0;
// Valid if we got the iommu resource.
zx::iommu iommu_;
zx::bti bti_;
bool is_contiguous_ = false;
};
// Test thread which initializes/tears down TestInstances
int VmStressTest::test_thread() {
#if defined(__x86_64__)
constexpr uint64_t kMaxInstances = 8;
#else
// Scale down the number of instances on less powerful hardware.
constexpr uint64_t kMaxInstances = 4;
#endif
constexpr uint64_t kVariableInstances = kMaxInstances - 1;
std::unique_ptr<TestInstance> test_instances[kMaxInstances] = {};
const uint64_t free_bytes = kmem_stats_.free_bytes;
// scale the size of the VMO we create based on the size of memory in the system.
// 1/64th the size of total memory generates a fairly sizeable vmo (16MB per 1GB)
const uint64_t vmo_test_size = free_bytes / 64 / kMaxInstances;
PrintfAlways("VM stress test: using vmo of size %" PRIu64 "\n", vmo_test_size);
uint64_t total_test_instances = 0;
// The MultiVmoTestInstance already does spin up / tear down of threads internally and there is
// no benefit in also spinning up and tearing down the whole thing. So we just run 1 of them
// explicitly as a static instance and randomize the others as variable instances. We give this
// instance a 'full slice' of free memory as it is incredibly unlikely that it even allocates
// anywhere near that.
auto multi_vmo = std::make_unique<MultiVmoTestInstance>(this, total_test_instances++,
free_bytes / kMaxInstances);
PrintfAlways("VM stress test: multi vmo instance max threads %lu\n",
multi_vmo->get_max_threads());
test_instances[kVariableInstances] = std::move(multi_vmo);
test_instances[kVariableInstances]->Start();
zx::time deadline = zx::clock::get_monotonic();
auto rng = RngGen();
size_t iter = 0;
size_t max_threads = 0;
while (!shutdown_.load()) {
iter++;
// Log the number of threads every so often. Serves as a hint for whether we are leaking
// threads. Does not need to be precise.
if (iter % 1000 == 0) {
size_t num_threads;
if (zx::process::self()->get_info(ZX_INFO_PROCESS_THREADS, nullptr, 0, nullptr,
&num_threads) == ZX_OK) {
max_threads = std::max(max_threads, num_threads);
PrintfAlways("Number of threads: %lu (peak %lu)\n", num_threads, max_threads);
}
}
uint64_t r = uniform_rand(kVariableInstances, rng);
if (test_instances[r]) {
size_t num_threads;
if (zx::process::self()->get_info(ZX_INFO_PROCESS_THREADS, nullptr, 0, nullptr,
&num_threads) == ZX_OK) {
max_threads = std::max(max_threads, num_threads);
}
test_instances[r]->Stop();
test_instances[r].reset();
} else {
switch (uniform_rand(3, rng)) {
case 0:
if (uniform_rand(2, rng)) {
test_instances[r] = std::make_unique<SingleVmoTestInstance>(
this, total_test_instances++, true, true, vmo_test_size);
} else {
test_instances[r] = std::make_unique<SingleVmoTestInstance>(
this, total_test_instances++, true, false, vmo_test_size);
}
break;
case 1:
test_instances[r] = std::make_unique<SingleVmoTestInstance>(this, total_test_instances++,
false, false, vmo_test_size);
break;
case 2:
test_instances[r] = std::make_unique<CowCloneTestInstance>(this, total_test_instances++);
break;
}
if (test_instances[r]) {
ZX_ASSERT(test_instances[r]->Start() == ZX_OK);
}
}
constexpr uint64_t kOpsPerSec = 25;
deadline += zx::duration(ZX_SEC(1) / kOpsPerSec);
zx::nanosleep(deadline);
}
for (uint64_t i = 0; i < kMaxInstances; i++) {
if (test_instances[i]) {
test_instances[i]->Stop();
}
}
return 0;
}
zx_status_t VmStressTest::Start() {
auto test_worker = [](void* arg) -> int {
return static_cast<VmStressTest*>(arg)->test_thread();
};
thrd_create_with_name(&test_thread_, test_worker, this, "test_worker");
return ZX_OK;
}
zx_status_t VmStressTest::Stop() {
shutdown_.store(true);
thrd_join(test_thread_, nullptr);
return ZX_OK;
}