src/zircon/bin/kstress/vmstress.cc - fuchsia - Git at Google

 // Copyright 2018 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include <assert.h>
 #include <errno.h>
 #include <getopt.h>
 #include <inttypes.h>
 #include <lib/fit/defer.h>
 #include <lib/stdcompat/span.h>
 #include <lib/zx/bti.h>
 #include <lib/zx/channel.h>
 #include <lib/zx/clock.h>
 #include <lib/zx/event.h>
 #include <lib/zx/exception.h>
 #include <lib/zx/iommu.h>
 #include <lib/zx/pager.h>
 #include <lib/zx/port.h>
 #include <lib/zx/thread.h>
 #include <lib/zx/vmar.h>
 #include <lib/zx/vmo.h>
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <threads.h>
 #include <unistd.h>
 #include <zircon/compiler.h>
 #include <zircon/errors.h>
 #include <zircon/status.h>
 #include <zircon/syscalls-next.h>
 #include <zircon/syscalls.h>
 #include <zircon/syscalls/debug.h>
 #include <zircon/syscalls/exception.h>
 #include <zircon/syscalls/iommu.h>
 #include <zircon/syscalls/port.h>
 #include <zircon/threads.h>
 #include <zircon/types.h>

 #include <algorithm>
 #include <array>
 #include <atomic>
 #include <iterator>
 #include <memory>
 #include <optional>
 #include <shared_mutex>
 #include <thread>

 #include <fbl/algorithm.h>
 #include <fbl/array.h>
 #include <fbl/auto_lock.h>
 #include <fbl/mutex.h>
 #include <fbl/ref_counted.h>
 #include <fbl/ref_ptr.h>

 #include "stress_test.h"

 #define VMSTRESS_DEBUG 0

 // Helper to generate values in the full inclusive range [a,b].
 template <typename IntType = uint64_t>
 static inline IntType uniform_rand_range(IntType a, IntType b, StressTest::Rng& rng) {
   return std::uniform_int_distribution<IntType>(a, b)(rng);
 }

 // Helper to generate the common [0, max(1,a)). That is, if a range of size 0 is returned, this is
 // considered valid and always generates the result 0.
 template <typename IntType = uint64_t>
 static inline IntType uniform_rand(IntType range, StressTest::Rng& rng) {
   if (range == static_cast<IntType>(0)) {
     return range;
   }
   return uniform_rand_range<IntType>(static_cast<IntType>(0), range - 1, rng);
 }

 class VmStressTest;

 // VM Stresser
 //
 // The current stress test runs multiple independent test instance which get randomly
 // initialized and torn down over time. Each creates a single pager vmo and hands it to a
 // pool of worker threads. Some of the worker threads randomly commit/decommit/read/write/map/unmap
 // the vmo. The rest of the worker threads randomly service pager requests or randomly supply
 // their own 'prefetch' pages. This is intended to pick out any internal races with the
 // VMO/VMAR/Pager system.
 //
 // Currently does not validate that any given operation was successfully performed, only
 // that the apis do not return an error (or crash).
 //
 // Will evolve over time to use cloned vmos.

 class VmStressTest : public StressTest {
  public:
   VmStressTest() = default;
   virtual ~VmStressTest() = default;

   virtual zx_status_t Start();
   virtual zx_status_t Stop();

   virtual const char* name() const { return "VM Stress"; }

   zx::unowned_resource IommuResource() { return zx::unowned_resource{iommu_resource_}; }

  private:
   int test_thread();

   std::atomic<bool> shutdown_{false};

   thrd_t test_thread_;
 } vmstress;

 class TestInstance {
  public:
   TestInstance(VmStressTest* test, uint64_t instance_id)
       : test_(test), test_instance_id_(instance_id) {}
   virtual ~TestInstance() {}

   virtual zx_status_t Start() = 0;
   virtual zx_status_t Stop() = 0;

   zx::unowned_resource IommuResource() { return test_->IommuResource(); }

  protected:
   // TODO: scale based on the number of cores in the system and/or command line arg
   static constexpr uint64_t kNumThreads = 6;

   template <typename... Args>
   void Printf(const char* str, Args... args) const {
     test_->Printf(str, args...);
   }
   template <typename... Args>
   void PrintfAlways(const char* str, Args... args) const {
     test_->PrintfAlways(str, args...);
   }

   std::mt19937_64 RngGen() { return test_->RngGen(); }

   VmStressTest* const test_;
   const uint64_t test_instance_id_;
 };

 class SingleVmoTestInstance : public TestInstance {
  public:
   SingleVmoTestInstance(VmStressTest* test, uint64_t instance_id, bool use_pager, bool trap_dirty,
                         uint64_t vmo_size)
       : TestInstance(test, instance_id),
         use_pager_(use_pager),
         trap_dirty_(trap_dirty),
         vmo_size_(vmo_size) {}

   zx_status_t Start() final;
   zx_status_t Stop() final;

  private:
   int vmo_thread();
   int pager_thread();

   void CheckVmoThreadError(zx_status_t status, const char* error);

   const bool use_pager_;
   const bool trap_dirty_;
   const uint64_t vmo_size_;

   static constexpr uint64_t kNumVmoThreads = 3;
   thrd_t threads_[kNumThreads];
   zx::thread thread_handles_[kNumThreads];

   // Array used for storing vmo mappings. All mappings are cleaned up by the test thread,
   // as vmo threads will sometimes crash if the instance is torn down during a page fault.
   std::atomic<uint32_t> vmo_thread_idx_{0};
   uintptr_t ptrs_[kNumThreads] = {};
   fbl::Array<uint8_t> bufs_[kNumThreads] = {};

   // Vector of page requests shared by all pager threads of the instance, to allow
   // requests to be serviced out-of-order.
   fbl::Mutex mtx_;
   fbl::Vector<zx_packet_page_request_t> requests_ __TA_GUARDED(mtx_);

   // Flag used to signal shutdown to worker threads.
   std::atomic<bool> shutdown_{false};

   // Counter that allows the last pager thread to clean up the pager itself.
   std::atomic<uint32_t> pager_thread_count_{kNumThreads - kNumVmoThreads};

   // Debug states to help diagnose failures.
   std::atomic<bool> debug_pager_detached_{false};
   std::atomic<bool> debug_pager_reset_{false};
   std::atomic<bool> debug_pager_threads_created_{false};

   // If using a pager, event to signal when the VMO threads can start generating page requests.
   zx::event pager_threads_ready_;

   zx::vmo vmo_{};
   zx::pager pager_;
   zx::port port_;
 };

 int SingleVmoTestInstance::vmo_thread() {
   zx_status_t status;

   uint64_t idx = vmo_thread_idx_++;

   // allocate a local buffer
   const size_t buf_size = zx_system_get_page_size() * 16;
   bufs_[idx] = fbl::Array<uint8_t>(new uint8_t[buf_size], buf_size);
   const fbl::Array<uint8_t>& buf = bufs_[idx];

   auto rng = RngGen();

   // local helper routines to calculate a random range within a vmo and
   // a range appropriate to read into the local buffer above
   auto rand_vmo_range = [this, &rng](uint64_t* out_offset, uint64_t* out_size) {
     *out_offset = uniform_rand(vmo_size_, rng);
     *out_size = std::min(uniform_rand(vmo_size_, rng), vmo_size_ - *out_offset);
   };
   auto rand_buffer_range = [this, &rng, buf_size](uint64_t* out_offset, uint64_t* out_size) {
     *out_size = uniform_rand(buf_size, rng);
     *out_offset = uniform_rand(vmo_size_ - *out_size, rng);
   };

   ZX_ASSERT(buf_size < vmo_size_);

   if (use_pager_) {
     // Wait for the pager threads to be created so that page requests can be serviced. Otherwise, we
     // will block until the pager threads come up, which can be a while on a heavily loaded system
     // (which is true for these stress tests), and could even cause us to hit the pager wait timeout
     // and error out/ crash.
     status = pager_threads_ready_.wait_one(ZX_USER_SIGNAL_0, zx::time::infinite(), nullptr);
     ZX_ASSERT_MSG(status == ZX_OK, "event wait failed with %s\n", zx_status_get_string(status));
   }

   while (!shutdown_.load()) {
     uint64_t off, len;

     int r = uniform_rand(100, rng);
     switch (r) {
       case 0 ... 4:  // commit a range of the vmo
         Printf("c");
         rand_vmo_range(&off, &len);
         status = vmo_.op_range(ZX_VMO_OP_COMMIT, off, len, nullptr, 0);
         CheckVmoThreadError(status, "Failed to commit range");
         break;
       case 5 ... 19:
         if (ptrs_[idx]) {
           // unmap the vmo if it already was
           Printf("u");
           status = zx::vmar::root_self()->unmap(ptrs_[idx], vmo_size_);
           CheckVmoThreadError(status, "failed to unmap range");
           ptrs_[idx] = 0;
         }
         // map it somewhere
         Printf("m");
         status = zx::vmar::root_self()->map(
             ZX_VM_PERM_READ | ZX_VM_PERM_WRITE | (use_pager_ ? ZX_VM_ALLOW_FAULTS : 0), 0, vmo_, 0,
             vmo_size_, ptrs_ + idx);
         CheckVmoThreadError(status, "failed to map range");
         break;
       case 20 ... 34:
         // read from a random range of the vmo
         Printf("r");
         rand_buffer_range(&off, &len);
         status = vmo_.read(buf.data(), off, len);
         CheckVmoThreadError(status, "error reading from vmo");
         break;
       case 35 ... 49:
         // write to a random range of the vmo
         Printf("w");
         rand_buffer_range(&off, &len);
         status = vmo_.write(buf.data(), off, len);
         CheckVmoThreadError(status, "error writing to vmo");
         break;
       case 50 ... 74:
         // read from a random range of the vmo via a direct memory reference
         if (ptrs_[idx]) {
           Printf("R");
           rand_buffer_range(&off, &len);
           memcpy(buf.data(), reinterpret_cast<const void*>(ptrs_[idx] + off), len);
         }
         break;
       case 75 ... 99:
         // write to a random range of the vmo via a direct memory reference
         if (ptrs_[idx]) {
           Printf("W");
           rand_buffer_range(&off, &len);
           memcpy(reinterpret_cast<void*>(ptrs_[idx] + off), buf.data(), len);
         }
         break;
     }

     fflush(stdout);
   }

   return 0;
 }

 void SingleVmoTestInstance::CheckVmoThreadError(zx_status_t status, const char* error) {
   // Ignore errors while shutting down, since they're almost certainly due to the
   // pager disappearing.
   if (!shutdown_ && status != ZX_OK) {
     fprintf(stderr, "[test instance 0x%zx]: %s, error %d\n", test_instance_id_, error, status);
     if (use_pager_) {
       fprintf(stderr,
               "[test instance 0x%zx]: pager debug state: thread count %d, created %d, detached %d, "
               "reset %d\n",
               test_instance_id_, pager_thread_count_.load(), debug_pager_threads_created_.load(),
               debug_pager_detached_.load(), debug_pager_reset_.load());
       for (size_t i = kNumVmoThreads; i < kNumThreads; i++) {
         zx_info_thread_t info;
         uint64_t actual, actual_count;
         status = zx_object_get_info(thread_handles_[i].get(), ZX_INFO_THREAD, &info, sizeof(info),
                                     &actual, &actual_count);
         if (status != ZX_OK) {
           fprintf(stderr,
                   "[test instance 0x%zx]: ZX_INFO_THREAD on pager thread %zu failed with %s\n",
                   test_instance_id_, i - kNumVmoThreads, zx_status_get_string(status));
         } else {
           fprintf(
               stderr,
               "[test instance 0x%zx]: pager thread %zu: state 0x%x, wait_exception_channel_type "
               "0x%x\n",
               test_instance_id_, i - kNumVmoThreads, info.state, info.wait_exception_channel_type);
         }
       }
     }
   }
 }

 static bool is_thread_blocked(zx_handle_t handle) {
   zx_info_thread_t info;
   uint64_t actual, actual_count;
   ZX_ASSERT(zx_object_get_info(handle, ZX_INFO_THREAD, &info, sizeof(info), &actual,
                                &actual_count) == ZX_OK);
   return info.state == ZX_THREAD_STATE_BLOCKED_PAGER;
 }

 int SingleVmoTestInstance::pager_thread() {
   zx_status_t status;

   uint64_t vmo_page_count = vmo_size_ / zx_system_get_page_size();
   ZX_ASSERT(vmo_page_count > 0);

   auto supply_pages = [this](uint64_t off, uint64_t len) {
     zx::vmo tmp_vmo;
     zx_status_t status = zx::vmo::create(len, 0, &tmp_vmo);
     if (status != ZX_OK) {
       fprintf(stderr, "[test instance 0x%zx]: failed to create tmp vmo, error %d (%s)\n",
               test_instance_id_, status, zx_status_get_string(status));
       return;
     }
     status = tmp_vmo.op_range(ZX_VMO_OP_COMMIT, 0, len, nullptr, 0);
     if (status != ZX_OK) {
       fprintf(stderr, "[test instance 0x%zx]: failed to commit tmp vmo, error %d (%s)\n",
               test_instance_id_, status, zx_status_get_string(status));
       return;
     }
     // SingleVmoTestInstance doesn't currently resize the VMO, so this should work.
     status = pager_.supply_pages(vmo_, off, len, tmp_vmo, 0);
     if (status != ZX_OK) {
       fprintf(stderr, "[test instance 0x%zx]: failed to supply pages %d, error %d (%s)\n",
               test_instance_id_, pager_.get(), status, zx_status_get_string(status));
       return;
     }
   };

   auto dirty_pages = [this](uint64_t off, uint64_t len) {
     // ZX_PAGER_OP_DIRTY is not supported if the VMO wasn't created with ZX_VMO_TRAP_DIRTY.
     ZX_ASSERT(trap_dirty_);
     zx_status_t status = pager_.op_range(ZX_PAGER_OP_DIRTY, vmo_, off, len, 0);
     // ZX_ERR_NOT_FOUND is an acceptable error status as pages that have yet to be dirtied are clean
     // and can get evicted.
     if (status != ZX_OK && status != ZX_ERR_NOT_FOUND) {
       fprintf(stderr, "[test instance 0x%zx]: failed to dirty pages %d, error %d (%s)\n",
               test_instance_id_, pager_.get(), status, zx_status_get_string(status));
       return;
     }
   };

   auto writeback_pages = [this](uint64_t off, uint64_t len) {
     zx_status_t status = pager_.op_range(ZX_PAGER_OP_WRITEBACK_BEGIN, vmo_, off, len, 0);
     if (status != ZX_OK) {
       fprintf(stderr, "[test instance 0x%zx]: failed to begin writeback %d, error %d (%s)\n",
               test_instance_id_, pager_.get(), status, zx_status_get_string(status));
       return;
     }
     status = pager_.op_range(ZX_PAGER_OP_WRITEBACK_END, vmo_, off, len, 0);
     if (status != ZX_OK) {
       fprintf(stderr, "[test instance 0x%zx]: failed to end writeback %d, error %d (%s)\n",
               test_instance_id_, pager_.get(), status, zx_status_get_string(status));
       return;
     }
   };

   auto rng = RngGen();

 #if VMSTRESS_DEBUG
   // Counters to log pager thread progress for debugging stalls.
   zx::duration logging_interval = zx::duration(ZX_MIN(2));
   zx::time time_prev = zx::clock::get_monotonic();
   uint64_t ops_per_interval = 0;
 #endif  // VMSTRESS_DEBUG

   while (!shutdown_.load()) {
 #if VMSTRESS_DEBUG
     ++ops_per_interval;
     zx::time time_now = zx::clock::get_monotonic();
     if (time_now > time_prev + logging_interval) {
       fprintf(
           stderr, "[test instance 0x%zx]: pager ops in last %" PRIi64 " minute(s): %" PRIu64 "\n",
           test_instance_id_, (time_now - time_prev) / zx::duration(ZX_MIN(1)), ops_per_interval);
       time_prev = time_now;
       ops_per_interval = 0;
     }
 #endif  // VMSTRESS_DEBUG

     zx::vmo tmp_vmo;
     uint64_t off, size;
     zx::time deadline;

     int r = uniform_rand<int>(100, rng);
     switch (r) {
       case 0 ... 3:  // supply a random range of pages
       {
         off = uniform_rand(vmo_page_count, rng);
         size = std::min(uniform_rand(vmo_page_count, rng), vmo_page_count - off);
         supply_pages(off * zx_system_get_page_size(), size * zx_system_get_page_size());
         break;
       }
       case 4 ... 5:  // dirty a random range of pages
       {
         if (trap_dirty_) {
           off = uniform_rand(vmo_page_count, rng);
           size = std::min(uniform_rand(vmo_page_count, rng), vmo_page_count - off);
           dirty_pages(off * zx_system_get_page_size(), size * zx_system_get_page_size());
         }
         break;
       }
       case 6 ... 50:  // read from the port
       {
         fbl::AutoLock lock(&mtx_);
         if (requests_.size() == kNumVmoThreads) {
           break;
         } else {
           // We still need to at least query the port if all vmo threads are
           // blocked, in case we need to read the last thread's packet.
           deadline = zx::time::infinite_past();
           for (unsigned i = 0; i < kNumVmoThreads; i++) {
             if (!is_thread_blocked(thread_handles_[i].get())) {
               deadline = zx::clock::get_monotonic() + zx::msec(10);
               break;
             }
           }
         }
       }

         zx_port_packet_t packet;
         status = port_.wait(deadline, &packet);
         if (status != ZX_OK) {
           if (status != ZX_ERR_TIMED_OUT) {
             fprintf(stderr, "[test instance 0x%zx]: failed to read port, error %d (%s)\n",
                     test_instance_id_, status, zx_status_get_string(status));
           }
         } else if (packet.type != ZX_PKT_TYPE_PAGE_REQUEST ||
                    (packet.page_request.command != ZX_PAGER_VMO_READ &&
                     packet.page_request.command != ZX_PAGER_VMO_DIRTY)) {
           fprintf(stderr, "[test instance 0x%zx]: unexpected packet, error %d %d\n",
                   test_instance_id_, packet.type, packet.page_request.command);
         } else {
           fbl::AutoLock lock(&mtx_);
           requests_.push_back(packet.page_request);
         }
         break;
       case 51 ... 90:  // fulfill a random request
         zx_packet_page_request_t req;
         {
           fbl::AutoLock lock(&mtx_);
           if (requests_.is_empty()) {
             break;
           }
           off = uniform_rand(requests_.size(), rng);
           req = requests_.erase(off);
         }

         if (req.command == ZX_PAGER_VMO_READ) {
           supply_pages(req.offset, req.length);
         } else if (req.command == ZX_PAGER_VMO_DIRTY) {
           dirty_pages(req.offset, req.length);
         }
         break;
       case 91 ... 99:  // writeback a random range
       {
         off = uniform_rand(vmo_page_count, rng);
         size = std::min(uniform_rand(vmo_page_count, rng), vmo_page_count - off);
         writeback_pages(off * zx_system_get_page_size(), size * zx_system_get_page_size());
         break;
       }
     }

     fflush(stdout);
   }

   // Have the last pager thread tear down the pager. Randomly either detach the vmo (and
   // close the pager after all test threads are done) or immediately close the pager handle.
   if (--pager_thread_count_ == 0) {
     if (uniform_rand(2, rng)) {
       status = pager_.detach_vmo(vmo_);
       if (status != ZX_OK) {
         fprintf(stderr, "[test instance 0x%zx]: failed to detach vmo, error %d (%s)\n",
                 test_instance_id_, status, zx_status_get_string(status));
       }
       debug_pager_detached_ = true;
     } else {
       pager_.reset();
       debug_pager_reset_ = true;
     }
   }

   return 0;
 }

 zx_status_t SingleVmoTestInstance::Start() {
   auto status = zx::port::create(0, &port_);
   if (status != ZX_OK) {
     return status;
   }

   if (use_pager_) {
     status = zx::pager::create(0, &pager_);
     if (status != ZX_OK) {
       return status;
     }

     uint32_t options = 0;
     if (trap_dirty_) {
       options |= ZX_VMO_TRAP_DIRTY;
     }
     // create a test vmo
     status = pager_.create_vmo(options, port_, 0, vmo_size_, &vmo_);

     // create an event to signal when the VMO threads can start generating requests.
     status = zx::event::create(0, &pager_threads_ready_);
   } else {
     status = zx::vmo::create(vmo_size_, 0, &vmo_);
   }

   if (status != ZX_OK) {
     return status;
   }

   // create a pile of threads
   auto worker = [](void* arg) -> int {
     return static_cast<SingleVmoTestInstance*>(arg)->vmo_thread();
   };
   auto pager_worker = [](void* arg) -> int {
     return static_cast<SingleVmoTestInstance*>(arg)->pager_thread();
   };

   for (uint32_t i = 0; i < std::size(threads_); i++) {
     // vmo threads need to come first, since the pager workers need to reference
     // the vmo worker thread handles.
     bool is_vmo_worker = i < kNumVmoThreads || !use_pager_;
     thrd_create_with_name(threads_ + i, is_vmo_worker ? worker : pager_worker, this,
                           is_vmo_worker ? "vmstress_worker" : "pager_worker");

     zx::unowned_thread unowned(thrd_get_zx_handle(threads_[i]));
     ZX_ASSERT(unowned->duplicate(ZX_RIGHT_SAME_RIGHTS, thread_handles_ + i) == ZX_OK);
   }

   if (use_pager_) {
     debug_pager_threads_created_ = true;
     status = pager_threads_ready_.signal(0, ZX_USER_SIGNAL_0);
     ZX_ASSERT_MSG(status == ZX_OK, "event signal failed with %s\n", zx_status_get_string(status));
   }
   return ZX_OK;
 }

 zx_status_t SingleVmoTestInstance::Stop() {
   zx::port port;
   zx::port::create(0, &port);
   std::array<zx::channel, kNumVmoThreads> channels;

   if (use_pager_) {
     // We need to handle potential crashes in the vmo threads when the pager is torn down. Since
     // not all threads will actually crash, we can't stop handling crashes until all threads
     // have terminated.
     // TODO: Note that these crashes may produce visible output on the system logs and this
     // shutdown should maybe be restructured to avoid this from happening.
     for (unsigned i = 0; i < kNumVmoThreads; i++) {
       zx_status_t status = thread_handles_[i].create_exception_channel(0, &channels[i]);
       ZX_ASSERT(status == ZX_OK);
       status = channels[i].wait_async(port, i, ZX_CHANNEL_READABLE | ZX_CHANNEL_PEER_CLOSED, 0);
       ZX_ASSERT(status == ZX_OK);
     }
   }

   shutdown_.store(true);

   if (use_pager_) {
     uint64_t running_count = kNumVmoThreads;
     while (running_count) {
       zx_port_packet_t packet;
       ZX_ASSERT(port.wait(zx::time::infinite(), &packet) == ZX_OK);

       if (packet.signal.observed & ZX_CHANNEL_READABLE) {
         const zx::channel& channel = channels[packet.key];

         zx_exception_info_t exception_info;
         zx::exception exception;
         ZX_ASSERT(channel.read(0, &exception_info, exception.reset_and_get_address(),
                                sizeof(exception_info), 1, nullptr, nullptr) == ZX_OK);

         zx::thread& thrd = thread_handles_[packet.key];

         zx_exception_report_t report;
         ZX_ASSERT(thrd.get_info(ZX_INFO_THREAD_EXCEPTION_REPORT, &report, sizeof(report), NULL,
                                 NULL) == ZX_OK);
         ZX_ASSERT(report.header.type == ZX_EXCP_FATAL_PAGE_FAULT);

         // thrd_exit takes a parameter, but we don't actually read it when we join
         zx_thread_state_general_regs_t regs;
         ZX_ASSERT(thrd.read_state(ZX_THREAD_STATE_GENERAL_REGS, &regs, sizeof(regs)) == ZX_OK);
 #if defined(__x86_64__)
         regs.rip = reinterpret_cast<uintptr_t>(thrd_exit);
 #else
         regs.pc = reinterpret_cast<uintptr_t>(thrd_exit);
 #endif
         ZX_ASSERT(thrd.write_state(ZX_THREAD_STATE_GENERAL_REGS, &regs, sizeof(regs)) == ZX_OK);

         uint32_t exception_state = ZX_EXCEPTION_STATE_HANDLED;
         ZX_ASSERT(exception.set_property(ZX_PROP_EXCEPTION_STATE, &exception_state,
                                          sizeof(exception_state)) == ZX_OK);

         ZX_ASSERT(channel.wait_async(port, packet.key, ZX_CHANNEL_READABLE | ZX_CHANNEL_PEER_CLOSED,
                                      0) == ZX_OK);
       } else {
         running_count--;
       }
     }
   }

   for (unsigned i = 0; i < std::size(threads_); i++) {
     thrd_join(threads_[i], nullptr);
   }

   for (unsigned i = 0; i < (use_pager_ ? kNumVmoThreads : kNumThreads); i++) {
     if (ptrs_[i]) {
       zx::vmar::root_self()->unmap(ptrs_[i], vmo_size_);
     }
   }

   return ZX_OK;
 }

 // This test case randomly creates vmos and COW clones, randomly writes into the vmos,
 // and performs basic COW integrity checks.
 //
 // Each created vmo has a 32-bit id. These ids are monotonically increasing. Each vmo has
 // its own 32-bit op-id, which is incremented on each write operation. These two 32-bit ids
 // are combined into a single 64-bit id which uniquely identifies every write operation. The
 // test uses these 64-bit ids to perform various COW integrity checks which are documented
 // in more detail within the test implementation.
 //
 // This test generally does not handle id overflow due to the fact that the random teardown
 // of various parts of the test makes the chance of overflow vanishingly small.
 class CowCloneTestInstance : public TestInstance {
  public:
   CowCloneTestInstance(VmStressTest* test, uint64_t instance_id)
       : TestInstance(test, instance_id) {}

   zx_status_t Start() final;
   zx_status_t Stop() final;

  private:
   int op_thread();

   // Aggregate for holding the test info associated with a single vmo.
   struct TestData : public fbl::RefCounted<struct test_data> {
     TestData(uint32_t id, uint32_t data_idx, zx::vmo test_vmo, uint32_t pc, uint32_t offset_page,
              uintptr_t mapped_ptr, fbl::RefPtr<struct TestData> p, uint32_t clone_start_op,
              uint32_t clone_end_op)
         : vmo_id(id),
           idx(data_idx),
           vmo(std::move(test_vmo)),
           page_count(pc),
           offset_page_idx(offset_page),
           ptr(mapped_ptr),
           parent(std::move(p)),
           parent_clone_start_op_id(clone_start_op),
           parent_clone_end_op_id(clone_end_op) {}

     // An identifer for the vmo.
     const uint32_t vmo_id;
     // The index of the test data in the test_datas_ array.
     const uint32_t idx;
     // The vmo under test.
     zx::vmo vmo;
     // The number of pages in the vmo.
     const uint32_t page_count;
     // The page offset into the parent where this vmo starts. This has no
     // meaning if this vmo has no parent.
     const uint32_t offset_page_idx;
     // The pointer to the vmo mapping.
     const uintptr_t ptr;

     // A pointer to the TestData struct which holds the parent of |vmo|, or
     // null if |vmo| has no parent.
     //
     // Note that this reference does not keep the parent->vmo handle from being closed.
     const fbl::RefPtr<struct TestData> parent;

     // The id of the parent TestData at the beginning and end of the clone operation
     // which created this vmo. Used for integrity checks.
     const uint32_t parent_clone_start_op_id;
     const uint32_t parent_clone_end_op_id;

     // This can technically overflow, but the chance of a VMO living
     // long enough for that to happen is astronomically low.
     std::atomic<uint32_t> next_op_id = 1;
   };

   // Debug function for printing information associated with a particular access operation.
   void DumpTestVmoAccessInfo(const fbl::RefPtr<TestData>& vmo, uint32_t page_index, uint64_t val);

   static constexpr uint64_t kMaxTestVmos = 32;
   static constexpr uint64_t kMaxVmoPageCount = 128;
   struct {
     fbl::RefPtr<struct TestData> vmo;

     // Shared mutex which protects |vmo|. The mutex is taken in exclusive mode
     // when creating/destroying the |vmo| in this slot. It is taken in shared
     // mode when writing or when creating a clone based on this slot.
     std::shared_mutex mtx;
   } test_datas_[kMaxTestVmos];

   // Helper function that creates a new test vmo that will be inserted at |idx| in test_datas_.
   fbl::RefPtr<TestData> CreateTestVmo(uint32_t idx, StressTest::Rng& rng);
   // Helper function that performs a write operation on |TestData|, which is currently
   // in |idx| in test_datas_.
   bool TestVmoWrite(uint32_t idx, const fbl::RefPtr<TestData>& TestData, StressTest::Rng& rng);

   thrd_t threads_[kNumThreads] = {};
   std::atomic<bool> shutdown_{false};

   // Id counter for vmos.
   std::atomic<uint32_t> next_vmo_id_{1};
   // Limit for next_vmo_id_ to prevent overflow concerns.
   static constexpr uint32_t kMaxVmoId = UINT32_MAX - kNumThreads;

   static uint32_t get_op_id(uint64_t full_id) { return static_cast<uint32_t>(full_id >> 32); }
   static uint32_t get_vmo_id(uint64_t full_id) { return full_id & 0xffffffff; }
   static uint64_t make_full_id(uint32_t vmo_id, uint32_t op_id) {
     return vmo_id | (static_cast<uint64_t>(op_id) << 32);
   }
 };

 zx_status_t CowCloneTestInstance::Start() {
   for (unsigned i = 0; i < kNumThreads; i++) {
     auto fn = [](void* arg) -> int { return static_cast<CowCloneTestInstance*>(arg)->op_thread(); };
     thrd_create_with_name(threads_ + i, fn, this, "op_worker");
   }
   return ZX_OK;
 }

 zx_status_t CowCloneTestInstance::Stop() {
   shutdown_.store(true);

   bool success = true;
   for (unsigned i = 0; i < kNumThreads; i++) {
     int32_t res;
     thrd_join(threads_[i], &res);
     success &= (res == 0);
   }

   for (unsigned i = 0; i < kMaxTestVmos; i++) {
     if (test_datas_[i].vmo) {
       zx::vmar::root_self()->unmap(test_datas_[i].vmo->ptr,
                                    test_datas_[i].vmo->page_count * zx_system_get_page_size());
     }
   }

   if (!success) {
     PrintfAlways("Test failure, hanging to preserve state\n");
     zx_nanosleep(ZX_TIME_INFINITE);
   }

   return ZX_OK;
 }

 void CowCloneTestInstance::DumpTestVmoAccessInfo(const fbl::RefPtr<TestData>& vmo,
                                                  uint32_t page_index, uint64_t val) {
   PrintfAlways("Got value %lx (%x)\n", val, page_index);
   zx_info_vmo_t info;
   ZX_ASSERT(vmo->vmo.get_info(ZX_INFO_VMO, &info, sizeof(info), nullptr, nullptr) == ZX_OK);
   PrintfAlways("koid=%lx(%lu)\n", info.koid, info.koid);
   PrintfAlways("vmo ids are: ");
   auto cur = vmo;
   while (cur) {
     PrintfAlways("%x ", cur->vmo_id);
     cur = cur->parent;
   }
   PrintfAlways("\n");
 }

 fbl::RefPtr<CowCloneTestInstance::TestData> CowCloneTestInstance::CreateTestVmo(
     uint32_t idx, StressTest::Rng& rng) {
   uint32_t parent_idx = uniform_rand<uint32_t>(kMaxTestVmos, rng);
   auto& parent_vmo = test_datas_[parent_idx];

   zx::vmo vmo;
   fbl::RefPtr<struct TestData> parent;
   uint32_t parent_clone_start_op_id;
   uint32_t parent_clone_end_op_id;
   uint32_t page_count = uniform_rand_range<uint32_t>(1, kMaxVmoPageCount, rng);
   uint32_t page_offset = 0;

   if (parent_idx != idx) {
     if (!parent_vmo.mtx.try_lock_shared()) {
       // If something has an exclusive lock on the target vmo,
       // then just abort the operation.
       return nullptr;
     }

     if (parent_vmo.vmo) {
       parent = parent_vmo.vmo;

       page_offset = uniform_rand(parent->page_count, rng);

       parent_clone_start_op_id = parent->next_op_id.load();
       zx_status_t status =
           parent->vmo.create_child(ZX_VMO_CHILD_SNAPSHOT, page_offset * zx_system_get_page_size(),
                                    page_count * zx_system_get_page_size(), &vmo);
       ZX_ASSERT_MSG(status == ZX_OK, "Failed to clone vmo %d", status);
       parent_clone_end_op_id = parent->next_op_id.load();
     } else {
       // There's no parent, so we're just going to create a new vmo.
       parent = nullptr;
     }

     parent_vmo.mtx.unlock_shared();
   } else {
     // We're creating a vmo at |idx|, so there isn't a vmo there now.
     parent = nullptr;
   }

   if (!parent) {
     parent_clone_start_op_id = parent_clone_end_op_id = 0;
     zx_status_t status = zx::vmo::create(page_count * zx_system_get_page_size(), 0, &vmo);
     ZX_ASSERT_MSG(status == ZX_OK, "Failed to clone vmo %d", status);
   }

   uintptr_t ptr;
   zx::vmar::root_self()->map(ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, vmo, 0,
                              page_count * zx_system_get_page_size(), &ptr);

   uint32_t vmo_id = next_vmo_id_.fetch_add(1);
   // The chance that an individual instance lives this long is vanishingly small, and it
   // would take a long time. So just abort the test so we don't have to deal with it.
   ZX_ASSERT(vmo_id < kMaxVmoId);

   auto res = fbl::MakeRefCounted<TestData>(vmo_id, idx, std::move(vmo), page_count, page_offset,
                                            ptr, std::move(parent), parent_clone_start_op_id,
                                            parent_clone_end_op_id);
   ZX_ASSERT(res);
   return res;
 }

 bool CowCloneTestInstance::TestVmoWrite(uint32_t idx, const fbl::RefPtr<TestData>& test_data,
                                         StressTest::Rng& rng) {
   uint32_t page_idx = uniform_rand(test_data->page_count, rng);

   auto p = reinterpret_cast<std::atomic_uint64_t*>(test_data->ptr +
                                                    page_idx * zx_system_get_page_size());

   // We want the ids to be atomically increasing. To prevent races between two
   // threads at the same location mixing up the order of their op-ids, do a cmpxchg
   // and regenerate the op-id if we see a race.
   uint64_t old = p->load();
   uint32_t my_op_id = test_data->next_op_id.fetch_add(1);
   uint64_t desired = make_full_id(test_data->vmo_id, my_op_id);
   while (!p->compare_exchange_strong(old, desired)) {
     my_op_id = test_data->next_op_id.fetch_add(1);
     desired = make_full_id(test_data->vmo_id, my_op_id);
   }

   uint32_t write_vmo_id = get_vmo_id(old);

   if (write_vmo_id == test_data->vmo_id) {
     // If the vmo id is for this vmo, then the old op id must be
     // less than whatever we wrote.
     if (get_op_id(old) < get_op_id(desired)) {
       return true;
     } else {
       PrintfAlways("Got high op id for current vmo\n");
       DumpTestVmoAccessInfo(test_data, page_idx, old);
       return false;
     }
   } else if (write_vmo_id == 0) {
     // Nothing has ever written to the page.
     if (old == 0) {
       return true;
     } else {
       PrintfAlways("Got non-zero op id for zero vmo id\n");
       DumpTestVmoAccessInfo(test_data, page_idx, old);
       return false;
     }
   }

   // Look up the parent chain for the vmo which is responsible for writing
   // the old data that we saw.
   auto cur = test_data;
   uint32_t parent_idx = page_idx;
   while (cur != nullptr) {
     // cur isn't responsible for writing the data, so it must have an ancestor that did it.
     ZX_ASSERT(cur->parent);

     parent_idx += cur->offset_page_idx;

     // The index we're inspecting lies past the end of the parent, which means
     // it was initialized to 0 in cur and we somehow didn't see the vmo originally
     // responsible for the write.
     if (parent_idx >= cur->parent->page_count) {
       PrintfAlways("Parent search overflow\n");
       DumpTestVmoAccessInfo(test_data, page_idx, old);
       return false;
     }

     if (cur->parent->vmo_id != write_vmo_id) {
       // No match, so continue up the chain.
       cur = cur->parent;
       continue;
     }

     // The op id we saw must be smaller than the next op id at the time of the clone op.
     if (get_op_id(old) >= cur->parent_clone_end_op_id) {
       PrintfAlways("Got op-id from after clone operation\n");
       DumpTestVmoAccessInfo(test_data, page_idx, old);
       return false;
     }

     // It's possible that the parent vmo has already been destroyed, so
     // lock its index and check if it's what we expect.
     auto& maybe_parent = test_datas_[cur->parent->idx];
     if (cur->parent->idx != cur->idx && maybe_parent.mtx.try_lock_shared()) {
       if (maybe_parent.vmo == cur->parent) {
         auto val = reinterpret_cast<std::atomic_uint64_t*>(maybe_parent.vmo->ptr +
                                                            parent_idx * zx_system_get_page_size())
                        ->load();
         // If the clone sees a particular write_vmo_id, then that means the VMO
         // with the associated id wrote to that address before the clone operation.
         // Once that happens, the write ID that ancestor sees can't change.
         // Furthermore, the op ID can only increase.
         if (get_vmo_id(val) != write_vmo_id || (get_op_id(val) < get_op_id(old))) {
           DumpTestVmoAccessInfo(test_data, page_idx, old);
           DumpTestVmoAccessInfo(maybe_parent.vmo, parent_idx, val);

           shutdown_.store(true);
           maybe_parent.mtx.unlock_shared();
           return false;
         }
       }
       maybe_parent.mtx.unlock_shared();
     }
     break;
   }
   if (cur == nullptr) {
     // We somehow didn't find what performed the write.
     PrintfAlways("Parent search failure\n");
     DumpTestVmoAccessInfo(test_data, page_idx, old);
     return false;
   }
   return true;
 }

 int CowCloneTestInstance::op_thread() {
   auto rng = RngGen();

   while (!shutdown_.load()) {
     uint32_t idx = uniform_rand<uint32_t>(kMaxTestVmos, rng);
     auto& test_data = test_datas_[idx];
     uint32_t rand_op = uniform_rand(1000, rng);

     // 0 -> 14: create vmo
     // 15 -> 19: destroy vmo
     // 20 -> 999: random write
     if (rand_op < 20) {
       test_data.mtx.lock();

       if (rand_op < 14 && test_data.vmo == nullptr) {
         test_data.vmo = CreateTestVmo(idx, rng);
       } else if (rand_op >= 15 && test_data.vmo != nullptr) {
         for (unsigned i = 0; i < test_data.vmo->page_count; i++) {
           auto val = reinterpret_cast<std::atomic_uint64_t*>(test_data.vmo->ptr +
                                                              i * zx_system_get_page_size())
                          ->load();
           // vmo ids are monotonically increasing, so we shouldn't see
           // any ids greater than the current vmo's.
           if (get_vmo_id(val) > test_data.vmo->vmo_id) {
             DumpTestVmoAccessInfo(test_data.vmo, i, val);
             shutdown_.store(true);
             test_data.mtx.unlock();
             return -1;
           }
         }

         zx::vmar::root_self()->unmap(test_data.vmo->ptr,
                                      test_data.vmo->page_count * zx_system_get_page_size());
         test_data.vmo->vmo.reset();
         test_data.vmo = nullptr;
       }
       test_data.mtx.unlock();
     } else {
       test_data.mtx.lock_shared();

       if (test_data.vmo != nullptr) {
         if (!TestVmoWrite(idx, test_data.vmo, rng)) {
           test_data.mtx.unlock_shared();
           return -1;
         }
       }

       test_data.mtx.unlock_shared();
     }
   }
   return 0;
 }

 // This test instances runs multiple VMOs across multiple threads and is trying to trigger unusual
 // race conditions and kernel failures that come from mixing parallelism of all kinds of operations.
 // The trade off is that the test almost never knows what the outcome of an operation should be and
 // so this only catches bugs where we can ultimately trip a kernel assert or something similar.
 class MultiVmoTestInstance : public TestInstance {
  public:
   MultiVmoTestInstance(VmStressTest* test, uint64_t instance_id, uint64_t mem_limit)
       : TestInstance(test, instance_id),
         memory_limit_pages_(mem_limit / zx_system_get_page_size()),
         // Scale our maximum threads to ensure that if all threads allocate a full size vmo (via
         // copy-on-write or otherwise) we wouldn't exceed our memory limit
         max_threads_(std::min(memory_limit_pages_ / kMaxVmoPages, kMaxThreads)) {}

   zx_status_t Start() override {
     // If max threads was calculated smaller than low threads then that means we really don't have
     // much memory. Don't try and recover this case, just fail.
     if (max_threads_ < low_threads_) {
       PrintfAlways("Not enough free memory to run test instance\n");
       return ZX_ERR_NO_MEMORY;
     }
     if (shutdown_) {
       return ZX_ERR_INTERNAL;
     }

     zx::unowned_resource iommu_resource = IommuResource();
     if (*iommu_resource) {
       zx_iommu_desc_dummy_t desc;
       zx_status_t result =
           zx::iommu::create(*iommu_resource, ZX_IOMMU_TYPE_DUMMY, &desc, sizeof(desc), &iommu_);
       if (result != ZX_OK) {
         return result;
       }

       result = zx::bti::create(iommu_, 0, 0xdeadbeef, &bti_);
       if (result != ZX_OK) {
         return ZX_OK;
       }
     }

     auto rng = RngGen();

     spawn_root_vmo(rng);
     return ZX_OK;
   }
   zx_status_t Stop() override {
     // Signal shutdown and wait for everyone. Its possible for living_threads_ to increase after
     // shutdown_ is set if a living thread creates another thread. This is fine since it means
     // living_threads goes from a non-zero value to a non-zero value, but it will never go from 0
     // to non-zero.
     shutdown_ = true;
     while (living_threads_ > 0) {
       zx::nanosleep(zx::deadline_after(zx::msec(500)));
     }
     return ZX_OK;
   }

   uint64_t get_max_threads() const { return max_threads_; }

  private:
   void spawn_root_vmo(StressTest::Rng& rng) {
     zx::vmo vmo;
     bool reliable_mappings = true;
     uint64_t vmo_size = uniform_rand(kMaxVmoPages, rng) * zx_system_get_page_size();

     // Mix in contiguous VMOs as a significant fraction, since physical page loaning and borrowing
     // needs to get covered heavily.  Also, some other VMO operations work differently on
     // contiguous VMOs.
     if (bti_ && uniform_rand(2, rng) == 0) {
       zx_status_t result = zx::vmo::create_contiguous(bti_, vmo_size, 0, &vmo);
       if (result != ZX_OK) {
         return;
       }
       is_contiguous_ = true;
     } else {
       uint32_t options = 0;
       // Skew away from resizable VMOs as they are not common and many operations don't work.
       if (uniform_rand(4, rng) == 0) {
         options |= ZX_VMO_RESIZABLE;
         reliable_mappings = false;
       }

       if (uniform_rand(2, rng) == 0) {
         zx::pager pager;
         zx::port port;
         zx_status_t result = zx::pager::create(0, &pager);
         ZX_ASSERT(result == ZX_OK);
         result = zx::port::create(0, &port);
         ZX_ASSERT(result == ZX_OK);

         if (uniform_rand(2, rng) == 0) {
           options |= ZX_VMO_TRAP_DIRTY;
         }

         result = pager.create_vmo(options, port, 0, vmo_size, &vmo);
         ZX_ASSERT(result == ZX_OK);
         // Randomly discard reliable mappings even though not resizable to give the pager a chance
         // to generate faults on non-resizable vmos.
         if (reliable_mappings && uniform_rand(4, rng) == 0) {
           reliable_mappings = false;
         }
         // Force spin up the pager thread as it's required to ensure the VMO threads do not block
         // forever.
         zx::vmo dup_vmo;
         result = vmo.duplicate(ZX_RIGHT_SAME_RIGHTS, &dup_vmo);
         ZX_ASSERT(result == ZX_OK);
         // Stash the pager handle value to be passed to the writeback thread below before
         // transferring ownership to the pager thread. A pager handle cannot be duplicated, so we
         // cannot create another handle for the writeback thread to use. We also cannot make the
         // pager a class member because we might spawn another root VMO which will take a new pager.
         const zx_handle_t pager_handle = pager.get();
         if (!make_thread([this, pager = std::move(pager), port = std::move(port),
                           vmo = std::move(dup_vmo)]() mutable {
               pager_thread(std::move(pager), std::move(port), std::move(vmo));
             })) {
           // if the pager thread couldn't spin up bail right now and don't make the client thread as
           // that client thread will either block or hard crash, either scenario will make that
           // thread unrecoverable.
           return;
         }

         // Now that the pager thread has been successfully created, spin up the writeback thread
         // as well.
         result = vmo.duplicate(ZX_RIGHT_SAME_RIGHTS, &dup_vmo);
         ZX_ASSERT(result == ZX_OK);
         make_thread([this, vmo = std::move(dup_vmo), pager_handle]() mutable {
           writeback_thread(pager_handle, std::move(vmo));
         });

       } else {
         zx_status_t result = zx::vmo::create(vmo_size, options, &vmo);
         ZX_ASSERT(result == ZX_OK);
       }
     }

     auto ops = make_ops(rng);

     make_thread([this, vmo = std::move(vmo), ops = std::move(ops), reliable_mappings]() mutable {
       op_thread(std::move(vmo), std::move(ops), reliable_mappings);
     });
   }

   // TODO: pager_thread currently just fulfills any page faults correctly. This should be expanded
   // to detach, error ranges, pre-supply pages etc.
   void pager_thread(zx::pager pager, zx::port port, zx::vmo vmo) {
     // To exit the pager thread we need to know once we have the only reference to the vmo. This
     // requires two things, our vmo handle be the only handle to that vmo, and the vmo have no
     // children. The first condition has no signal and so until we know we have the only handle we
     // will used timed waits and poll. Once we are the solo_owner, tracked in this variable, we
     // will be able to use the zero children signal.
     bool solo_owner = false;
     while (1) {
       zx_port_packet_t packet;
       zx_status_t result =
           port.wait(solo_owner ? zx::time::infinite() : zx::deadline_after(zx::msec(100)), &packet);
       if (result == ZX_ERR_TIMED_OUT) {
         zx_info_handle_count_t info;
         result = vmo.get_info(ZX_INFO_HANDLE_COUNT, &info, sizeof(info), nullptr, nullptr);
         ZX_ASSERT(result == ZX_OK);
         // Check if we have the only handle
         if (info.handle_count == 1) {
           // Start watching for the zero children signal.
           result = vmo.wait_async(port, 1, ZX_VMO_ZERO_CHILDREN, 0);
           ZX_ASSERT(result == ZX_OK);
           solo_owner = true;
         }
         continue;
       }
       if (packet.key == 1) {
         ZX_ASSERT(solo_owner);

         zx_pager_vmo_stats_t stats;
         zx_status_t status = zx_pager_query_vmo_stats(
             pager.get(), vmo.get(), ZX_PAGER_RESET_VMO_STATS, &stats, sizeof(stats));
         ZX_ASSERT_MSG(status == ZX_OK, "Failed to query VMO stats %s\n",
                       zx_status_get_string(status));
         const bool was_modified = (stats.modified == ZX_PAGER_VMO_STATS_MODIFIED);

         // Writeback any dirty pages before exiting.
         zx_vmo_dirty_range_t ranges[10];
         size_t actual = 0, avail = 0;
         size_t vmo_size;
         status = vmo.get_size(&vmo_size);
         ZX_ASSERT_MSG(status == ZX_OK, "Failed to get VMO size %s\n", zx_status_get_string(status));
         uint64_t start = 0, len = vmo_size;

         do {
           status = zx_pager_query_dirty_ranges(pager.get(), vmo.get(), start, len, &ranges[0],
                                                sizeof(ranges), &actual, &avail);
           ZX_ASSERT_MSG(status == ZX_OK, "Failed to query dirty ranges %s\n",
                         zx_status_get_string(status));

           if (actual == 0) {
             break;
           }

           for (size_t i = 0; i < actual; i++) {
             status = zx_pager_op_range(pager.get(), ZX_PAGER_OP_WRITEBACK_BEGIN, vmo.get(),
                                        ranges[i].offset, ranges[i].length, ranges[i].options);
             ZX_ASSERT_MSG(status == ZX_OK, "Failed to begin writeback %s\n",
                           zx_status_get_string(status));
             status = zx_pager_op_range(pager.get(), ZX_PAGER_OP_WRITEBACK_END, vmo.get(),
                                        ranges[i].offset, ranges[i].length, 0);
             ZX_ASSERT_MSG(status == ZX_OK, "Failed to end writeback %s\n",
                           zx_status_get_string(status));
           }

           uint64_t new_start = ranges[actual - 1].offset + ranges[actual - 1].length;
           len -= (new_start - start);
           start = new_start;
         } while (avail > actual);
         ZX_ASSERT(avail == actual);

         status = zx_pager_query_dirty_ranges(pager.get(), vmo.get(), 0, vmo_size, nullptr, 0,
                                              nullptr, &avail);
         ZX_ASSERT_MSG(avail == 0, "Found %zu dirty ranges after writeback\n", avail);

         // No one should have modified the VMO since the last time we reset modified.
         status = zx_pager_query_vmo_stats(pager.get(), vmo.get(), ZX_PAGER_RESET_VMO_STATS, &stats,
                                           sizeof(stats));
         ZX_ASSERT_MSG(status == ZX_OK, "Failed to query VMO stats %s\n",
                       zx_status_get_string(status));
         ZX_ASSERT_MSG(stats.modified == 0, "VMO was modified (prev query: %d)\n", was_modified);

         // No children, and we have the only handle. Done.
         break;
       }
       ZX_ASSERT(packet.key == 0);
       ZX_ASSERT(packet.type == ZX_PKT_TYPE_PAGE_REQUEST);

       if (packet.page_request.command == ZX_PAGER_VMO_COMPLETE) {
         // VMO is finished, so we have nothing to do. Technically since we have a handle to the vmo
         // this case will never happen.
         break;
       } else if (packet.page_request.command != ZX_PAGER_VMO_READ &&
                  packet.page_request.command != ZX_PAGER_VMO_DIRTY) {
         PrintfAlways("Unknown page_request command %d\n", packet.page_request.command);
         return;
       }

       // No matter what we decide to do we *MUST* ensure we also fullfill the page fault in some way
       // otherwise we risk blocking the faulting thread (and also ourselves) forever. Above all we
       // must guarantee that the op_thread can progress to the point of closing the VMO such that
       // we end up with the only VMO handle.

       zx::vmo aux_vmo;
       if (packet.page_request.command == ZX_PAGER_VMO_READ) {
         if (zx::vmo::create(packet.page_request.length, 0, &aux_vmo) != ZX_OK) {
           PrintfAlways("Failed to create VMO of length %" PRIu64 " to fulfill page fault\n",
                        packet.page_request.length);
           return;
         }
       }

       do {
         // Get the latest VMO size, since using a cached value could get out of sync since we
         // intentionally may call resize from multiple threads concurrently.
         uint64_t vmo_size;
         zx_status_t get_size_status = vmo.get_size(&vmo_size);
         ZX_ASSERT(get_size_status == ZX_OK);
         if (packet.page_request.offset >= vmo_size) {
           // Nothing to fulfill since the requested range has been seen to be entirely outside the
           // VMO after the request was created by the kernel.
           result = ZX_OK;
           break;
         }
         ZX_ASSERT(packet.page_request.offset < vmo_size);
         uint64_t trimmed_length =
             std::min(packet.page_request.length, vmo_size - packet.page_request.offset);
         if (packet.page_request.command == ZX_PAGER_VMO_READ) {
           ZX_ASSERT(aux_vmo.is_valid());
           result = pager.supply_pages(vmo, packet.page_request.offset, trimmed_length, aux_vmo, 0);
         } else {
           ZX_ASSERT(packet.page_request.command == ZX_PAGER_VMO_DIRTY);
           result =
               pager.op_range(ZX_PAGER_OP_DIRTY, vmo, packet.page_request.offset, trimmed_length, 0);
           // ZX_ERR_NOT_FOUND is an acceptable error status as pages that have yet to be dirtied are
           // clean and can get evicted.
           if (result == ZX_ERR_NOT_FOUND) {
             result = ZX_OK;
           }
         }
         // If the underlying VMO was resized then its possible the destination offset is now out of
         // range. This is okay and we can try again if necessary to fulfill the pages that aren't
         // beyond the VMO size.  In any other case something has gone horribly wrong.
       } while (result == ZX_ERR_OUT_OF_RANGE);
       if (result != ZX_OK) {
         PrintfAlways("Failed to %s pages: %d\n",
                      packet.page_request.command == ZX_PAGER_VMO_READ ? "supply" : "dirty", result);
         return;
       }
     }
   }

   void writeback_thread(zx_handle_t pager, zx::vmo vmo) {
     uint64_t total_attempts = 0, successful_attempts = 0;
     while (!shutdown_) {
       // If there are only two handles remaining to the VMO, these will belong to the pager thread
       // and the writeback thread, i.e. no op_thread's will access the VMO anymore. Tear down the
       // writeback thread too so that the pager thread can clean up. This ensures that no threads
       // corresponding to this VMO are left lingering behind and our spawn_root_vmo logic in
       // make_thread works as intended, and creates new VMOs (and new pagers) and new threads for
       // those VMOs.
       zx_info_handle_count_t info;
       zx_status_t status =
           vmo.get_info(ZX_INFO_HANDLE_COUNT, &info, sizeof(info), nullptr, nullptr);
       ZX_ASSERT(status == ZX_OK);
       if (info.handle_count == 2) {
         break;
       }

       // Ensure we can query VMO stats successfully.
       zx_pager_vmo_stats_t stats;
       status = zx_pager_query_vmo_stats(pager, vmo.get(), 0, &stats, sizeof(stats));
       ZX_ASSERT_MSG(status == ZX_OK, "Failed to query VMO stats %s\n",
                     zx_status_get_string(status));
       const bool was_modified = (stats.modified == ZX_PAGER_VMO_STATS_MODIFIED);
       status = zx_pager_query_vmo_stats(pager, vmo.get(), ZX_PAGER_RESET_VMO_STATS, &stats,
                                         sizeof(stats));
       ZX_ASSERT_MSG(status == ZX_OK, "Failed to query VMO stats %s\n",
                     zx_status_get_string(status));
       // The first query did not request a reset, so if the VMO was modified before it should be
       // modified now as well. If the VMO was not previously modified though, it's possible for it
       // to have gotten modified between the two calls, so we cannot make any assertions in that
       // case. For the same reason we cannot check that the modified state is cleared after the
       // second query.
       ZX_ASSERT_MSG(!was_modified || stats.modified == ZX_PAGER_VMO_STATS_MODIFIED,
                     "VMO modified state: %u (prev: %d)\n", stats.modified, was_modified);

       zx_vmo_dirty_range_t ranges[10];
       size_t actual = 0, avail = 0;
       size_t vmo_size;
       status = vmo.get_size(&vmo_size);
       ZX_ASSERT_MSG(status == ZX_OK, "Failed to get VMO size %s\n", zx_status_get_string(status));
       uint64_t start = 0, remaining_len = vmo_size;

       // Make a single pass over the entire VMO during each writeback attempt. The goal is to write
       // back as much as possible without having to keep retrying in the face of competing writes
       // and resizes. It is fine if the size we queried above becomes stale; the loop will ignore
       // any OUT_OF_RANGE errors in the case of a resize down, and a larger size will be written
       // back in the next attempt.
       do {
         status = zx_pager_query_dirty_ranges(pager, vmo.get(), start, remaining_len, &ranges[0],
                                              sizeof(ranges), &actual, &avail);
         ZX_ASSERT_MSG(status == ZX_OK || status == ZX_ERR_OUT_OF_RANGE,
                       "Failed to query dirty ranges %s\n", zx_status_get_string(status));
         // Nothing more to do if we could not query dirty ranges successfully. The VMO was resized
         // down from under us. Retry in the next iteration of the outer loop.
         if (status == ZX_ERR_OUT_OF_RANGE) {
           break;
         }

         // No (more) dirty ranges.
         if (avail == 0) {
           remaining_len = 0;
           break;
         }
         ZX_ASSERT(actual > 0);

         uint64_t new_start;
         for (size_t i = 0; i < actual; i++) {
           status = zx_pager_op_range(pager, ZX_PAGER_OP_WRITEBACK_BEGIN, vmo.get(),
                                      ranges[i].offset, ranges[i].length, ranges[i].options);
           ZX_ASSERT_MSG(status == ZX_OK || status == ZX_ERR_OUT_OF_RANGE,
                         "Failed to begin writeback %s\n", zx_status_get_string(status));
           if (status == ZX_ERR_OUT_OF_RANGE) {
             break;
           }
           status = zx_pager_op_range(pager, ZX_PAGER_OP_WRITEBACK_END, vmo.get(), ranges[i].offset,
                                      ranges[i].length, 0);
           ZX_ASSERT_MSG(status == ZX_OK || status == ZX_ERR_OUT_OF_RANGE,
                         "Failed to end writeback %s\n", zx_status_get_string(status));
           if (status == ZX_ERR_OUT_OF_RANGE) {
             break;
           }
           new_start = ranges[i].offset + ranges[i].length;
         }

         if (new_start < ranges[actual - 1].offset + ranges[actual - 1].length) {
           break;
         }

         remaining_len -= (new_start - start);
         start = new_start;
       } while (remaining_len > 0);

       if (remaining_len == 0) {
         successful_attempts++;
       }
       total_attempts++;

       // The rate of writeback is kept relatively high to stress the system.
       zx::nanosleep(zx::deadline_after(zx::msec(200)));
     }
     if (successful_attempts != total_attempts) {
       PrintfAlways("Successful writeback attempts: %lu (out of %lu)\n", successful_attempts,
                    total_attempts);
     }
   }

   // This is the main function that performs continuous operations on a vmo. It may try to spawn
   // additional threads for parallelism, but they all share the same op counter to prevent any
   // particular vmo hierarchy living 'forever' by spawning new children all the time.
   void op_thread(zx::vmo vmo, std::shared_ptr<std::atomic<uint64_t>> op_count,
                  bool reliable_mappings) {
     auto rng = RngGen();

     zx::pmt pmt;
     std::optional<cpp20::span<uint8_t>> mapping;
     auto unmap_mapping = [&mapping]() {
       if (auto span = mapping) {
         zx::vmar::root_self()->unmap(reinterpret_cast<uintptr_t>(span->data()), span->size_bytes());
         mapping = std::nullopt;
       }
     };
     auto cleanup = fit::defer([&unmap_mapping, &pmt]() {
       unmap_mapping();
       if (pmt) {
         pmt.unpin();
       }
     });

     // Query for the current size of the vmo. This could change due to other threads with handles to
     // this vmo calling set-size, but should ensure a decent hit rate of random range operations.
     uint64_t vmo_size;
     if (vmo.get_size(&vmo_size) != ZX_OK) {
       vmo_size = kMaxVmoPages * zx_system_get_page_size();
     }
     while (!shutdown_ && op_count->fetch_add(1) < kMaxOps) {
       // Produce a random offset and size up front since many ops will need it.
       uint64_t op_off, op_size;
       random_off_size(rng, vmo_size, &op_off, &op_size);
       switch (uniform_rand(141, rng)) {
         case 0:  // give up early
           Printf("G");
           return;
           break;
         case 1 ... 10: {  // duplicate
           Printf("D");
           zx::vmo dup_vmo;
           zx_status_t result = vmo.duplicate(ZX_RIGHT_SAME_RIGHTS, &dup_vmo);
           if (result != ZX_OK) {
             PrintfAlways("vmo.duplicate() failed - result: %d\n", result);
           }
           ZX_ASSERT(result == ZX_OK);
           make_thread(
               [this, dup = std::move(dup_vmo), ops = op_count, reliable_mappings]() mutable {
                 op_thread(std::move(dup), std::move(ops), reliable_mappings);
               });
           break;
         }
         case 11 ... 20: {  // read
           Printf("R");
           std::vector<uint8_t> buffer;
           bool use_map = false;
           if (mapping.has_value() && uniform_rand(2, rng) == 0) {
             op_off = uniform_rand(mapping.value().size_bytes(), rng);
             op_size = uniform_rand(mapping.value().size_bytes() - op_off, rng);
             use_map = true;
           }
           // Avoid ubsan complaints by making the size at least 1, so that &buffer[start] works
           // below.
           buffer.resize(std::max(1ul, op_size));
           // pre-commit some portion of the buffer
           const size_t end = uniform_rand(op_size, rng);
           const size_t start = uniform_rand(op_size, rng);
           memset(&buffer[start], 42, end - std::min(end, start));
           if (use_map) {
             memcpy(buffer.data(), &mapping.value()[op_off], op_size);
           } else {
             vmo.read(buffer.data(), op_off, op_size);
           }
           break;
         }
         case 21 ... 30: {  // write
           Printf("W");
           std::vector<uint8_t> buffer;
           bool use_map = false;
           if (mapping.has_value() && uniform_rand(2, rng) == 0) {
             op_off = uniform_rand(mapping.value().size_bytes(), rng);
             op_size = uniform_rand(mapping.value().size_bytes() - op_off, rng);
             use_map = true;
           }
           // Avoid ubsan complaints by making the size at least 1, so that &buffer[start] works
           // below.
           buffer.resize(std::max(1ul, op_size));
           // write some portion of the buffer with 'random' data.
           const size_t end = uniform_rand(op_size, rng);
           const size_t start = uniform_rand(op_size, rng);
           memset(&buffer[start], 42, end - std::min(end, start));
           if (use_map) {
             memcpy(&mapping.value()[op_off], buffer.data(), op_size);
           } else {
             vmo.write(buffer.data(), op_off, op_size);
           }
           break;
         }
         case 31 ... 40:  // vmo_set_size
           Printf("S");
           vmo.set_size(uniform_rand(kMaxVmoPages * zx_system_get_page_size(), rng));
           break;
         case 81 ... 100:  // commit
           Printf("c");
           vmo.op_range(ZX_VMO_OP_COMMIT, op_off, op_size, nullptr, 0);
           break;
         case 101 ... 120:  // decommit
           Printf("d");
           vmo.op_range(ZX_VMO_OP_DECOMMIT, op_off, op_size, nullptr, 0);
           break;
         case 121 ... 130: {  // vmo_op_range (other than COMMIT/DECOMMIT which have their own cases)
           Printf("O");
           static const uint32_t ops[] = {ZX_VMO_OP_ZERO,
                                          ZX_VMO_OP_LOCK,
                                          ZX_VMO_OP_UNLOCK,
                                          ZX_VMO_OP_CACHE_SYNC,
                                          ZX_VMO_OP_CACHE_INVALIDATE,
                                          ZX_VMO_OP_CACHE_CLEAN,
                                          ZX_VMO_OP_CACHE_CLEAN_INVALIDATE,
                                          ZX_VMO_OP_DONT_NEED,
                                          ZX_VMO_OP_TRY_LOCK};
           vmo.op_range(ops[uniform_rand(std::size(ops), rng)], op_off, op_size, nullptr, 0);
           break;
         }
         case 131 ... 140: {  // transfer_data
           Printf("T");
           // Create a second VMO that will be used as either the source or the destination for a
           // data transfer.
           zx::vmo vmo2;
           uint64_t vmo2_num_pages = uniform_rand(kMaxVmoPages, rng);
           zx::vmo::create(vmo2_num_pages * zx_system_get_page_size(), 0, &vmo2);

           // Compute the maximum number of pages we could transfer between these two VMOs, then
           // pick a random number of pages below that threshold.
           uint64_t vmo_num_pages = vmo_size / zx_system_get_page_size();
           uint64_t max_num_transfer_pages =
               vmo2_num_pages > vmo_num_pages ? vmo_num_pages : vmo2_num_pages;
           uint64_t num_transfer_pages = uniform_rand(max_num_transfer_pages, rng);

           // Compute possible offsets within the source and destination VMOs.
           uint64_t max_offset_pages = max_num_transfer_pages - num_transfer_pages;
           uint64_t src_offset_pages = uniform_rand(max_offset_pages, rng);
           uint64_t dst_offset_pages = uniform_rand(max_offset_pages, rng);

           // Translate all of our sizes and offsets, which are in units of PAGE_SIZE, to units of
           // bytes.
           uint64_t src_offset = src_offset_pages * zx_system_get_page_size();
           uint64_t dst_offset = dst_offset_pages * zx_system_get_page_size();
           uint64_t transfer_size = num_transfer_pages * zx_system_get_page_size();

           // Transfer data from vmo to vmo2 on even random numbers, and from vmo2 to vmo on odd
           // random numbers.
           if (uniform_rand(100, rng) % 2) {
             vmo.transfer_data(0, dst_offset, transfer_size, &vmo2, src_offset);
           } else {
             vmo2.transfer_data(0, dst_offset, transfer_size, &vmo, src_offset);
           }
           break;
         }
         case 41 ... 50: {  // vmo_set_cache_policy
           Printf("P");
           static const uint32_t policies[] = {ZX_CACHE_POLICY_CACHED,

           // TODO(https://fxbug.dev/42081323): On arm64, unaligned access to Device memory generates
           // an alignment exception.  Our current memcpy/memset implementations are designed to work
           // on Normal memory and will result in unaligned access if the pointer arguments are not
           // aligned.  We should fix vmstress to never issue unaligned memcpy/memset to Device
           // memory.
 #if !defined(__aarch64__)
                                               ZX_CACHE_POLICY_UNCACHED,
                                               ZX_CACHE_POLICY_UNCACHED_DEVICE,
                                               ZX_CACHE_POLICY_WRITE_COMBINING
 #endif
           };
           vmo.set_cache_policy(policies[uniform_rand(std::size(policies), rng)]);
           break;
         }
         case 51 ... 60: {  // vmo_create_child
           Printf("C");
           static const uint32_t type[] = {
               ZX_VMO_CHILD_SNAPSHOT, ZX_VMO_CHILD_SNAPSHOT_AT_LEAST_ON_WRITE, ZX_VMO_CHILD_SLICE,
               ZX_VMO_CHILD_REFERENCE, ZX_VMO_CHILD_SNAPSHOT_MODIFIED};
           uint32_t options = type[uniform_rand(std::size(type), rng)];
           bool child_reliable_mappings = reliable_mappings;
           if (uniform_rand(3, rng) == 0) {
             options |= ZX_VMO_CHILD_RESIZABLE;
             child_reliable_mappings = false;
           }
           if (uniform_rand(4, rng)) {
             options |= ZX_VMO_CHILD_NO_WRITE;
           }
           zx::vmo child;
           if (vmo.create_child(options, op_off, op_size, &child) == ZX_OK) {
             make_thread([this, child = std::move(child), ops = op_count,
                          child_reliable_mappings]() mutable {
               op_thread(std::move(child), std::move(ops), child_reliable_mappings);
             });
           }
           break;
         }
         case 61 ... 70: {  // vmar_map/unmap
           // If reliable mappings is true it means we know that no one else is going to mess with
           // the VMO in a way that would cause access to a valid mapping to generate a fault.
           // Generally this means that the VMO is not resizable.
           Printf("V");
           if (reliable_mappings) {
             if (!mapping.has_value() || uniform_rand(2, rng) == 0) {
               uint32_t options = ZX_VM_PERM_READ | ZX_VM_PERM_WRITE;
               if (uniform_rand(2, rng) == 0) {
                 options |= ZX_VM_MAP_RANGE;
               }
               zx_info_vmo_t info;
               ZX_ASSERT(vmo.get_info(ZX_INFO_VMO, &info, sizeof(info), nullptr, nullptr) == ZX_OK);
               if (info.flags & ZX_INFO_VMO_PAGER_BACKED) {
                 options |= ZX_VM_ALLOW_FAULTS;
               }
               zx_vaddr_t addr;
               // Currently fault prevention isn't enforced in mappings and so we must be *very*
               // careful to not map in outside the actual range of the vmo.
               if (op_off + op_size <= vmo_size &&
                   zx::vmar::root_self()->map(options, 0, vmo, op_off, op_size, &addr) == ZX_OK) {
                 unmap_mapping();
                 mapping = cpp20::span<uint8_t>{reinterpret_cast<uint8_t*>(addr), op_size};
               }
             } else {
               unmap_mapping();
             }
           }
           break;
         }
         case 71 ... 80: {  // bti_pin/bti_unpin
           Printf("I");
           if (bti_) {
             if (pmt || uniform_rand(2, rng) == 0) {
               zx::pmt new_pmt;
               std::vector<zx_paddr_t> paddrs{op_size / zx_system_get_page_size(), 0};
               if (bti_.pin(ZX_BTI_PERM_READ | ZX_BTI_PERM_WRITE, vmo, op_off, op_size,
                            paddrs.data(), paddrs.size(), &new_pmt) == ZX_OK) {
                 if (pmt) {
                   pmt.unpin();
                 }
                 pmt = std::move(new_pmt);
               }
             } else {
               if (pmt) {
                 pmt.unpin();
               }
             }
           }
           break;
         }
       }
     }
     if (!shutdown_) {
       // Achieved max ops.
       Printf("M");
     }
   }

   static void random_off_size(StressTest::Rng& rng, uint64_t vmo_size, uint64_t* off_out,
                               uint64_t* size_out) {
     // When calculating out of bounds values pick a limit that still gives chance to be in bounds
     constexpr uint64_t kOobLimitPages = kMaxVmoPages * 2;
     // We don't want a uniform distribution of offsets and sizes as a lot of interesting things
     // happen with page alignment and entire vmo ranges.
     switch (uniform_rand(5, rng)) {
       case 0:  // Anchor offset at 0
         *off_out = 0;
         break;
       case 1:  // Page aligned offset, in bounds
         *off_out =
             uniform_rand(vmo_size / zx_system_get_page_size(), rng) * zx_system_get_page_size();
         break;
       case 2:  // Page aligned offset, out of bounds
         *off_out = uniform_rand(kOobLimitPages, rng) * zx_system_get_page_size();
         break;
       case 3:  // In bounds
         *off_out = uniform_rand(vmo_size, rng);
         break;
       case 4:  // Out of bounds
         *off_out = uniform_rand(kOobLimitPages * zx_system_get_page_size(), rng);
         break;
     }
     const uint64_t remaining = vmo_size - std::min(vmo_size, *off_out);
     switch (uniform_rand(5, rng)) {
       case 0:  // Maximum remaining vmo size
         *size_out = remaining;
         break;
       case 1:  // In range page aligned size
         *size_out =
             uniform_rand(remaining / zx_system_get_page_size(), rng) * zx_system_get_page_size();
         break;
       case 2:  // Out of range page aligned size
         *size_out = uniform_rand(kOobLimitPages, rng) * zx_system_get_page_size();
         break;
       case 3:  // In range size
         *size_out = uniform_rand(remaining, rng);
         break;
       case 4:  // Out of range size
         *size_out = uniform_rand(kOobLimitPages * zx_system_get_page_size(), rng);
         break;
     }
   }

   // This wrapper spawns a new thread to run F and automatically updates the living_threads_ count
   // and spawns any new root threads should we start running low.
   template <typename F>
   bool make_thread(F func) {
     uint64_t prev_count = living_threads_.fetch_add(1);
     if (prev_count >= max_threads_) {
       living_threads_.fetch_sub(1);
       return false;
     }
     std::thread t{[this, func = std::move(func)]() mutable {
       func();
       // Spawn threads *before* decrementing our count as the shutdown logic assumes once shutdown_
       // then once living_threads_ becomes 0 it must never increment.
       while (!shutdown_ && living_threads_ < low_threads_) {
         auto rng = RngGen();
         spawn_root_vmo(rng);
       }
       living_threads_.fetch_sub(1);
     }};
     t.detach();
     return true;
   }

   std::shared_ptr<std::atomic<uint64_t>> make_ops(StressTest::Rng& rng) {
     uint64_t start_ops = uniform_rand(kMaxOps, rng);
     return std::make_shared<std::atomic<uint64_t>>(start_ops);
   }

   // To explore interesting scenarios, especially involving parallelism, we want to run every VMO
   // tree for a decent number of ops, but not too long as at some point running longer is the same
   // as just spawning a new tree. This number was chosen fairly arbitrarily, but given that all
   // previous VM bugs had unit test reproductions in the <20 ops, this seems reasonable.
   static constexpr uint64_t kMaxOps = 4096;

   // 128 pages in a vmo should be all we need to create sufficiently interesting hierarchies, so
   // cap our spending there. This allows us to spin up more threads and copy-on-write hierarchies
   // without worrying that they all commit and blow the memory limit.
   static constexpr uint64_t kMaxVmoPages = 128;

   // While it might be possible for max_threads_ to be arbitrarily high and still not exhaust free
   // memory, cap the number so that we don't end up spinning an arbitrarily large number of threads
   // on a system with large amounts of memory. An exceedingly large number of threads can slow down
   // the overall system and cause test timeouts in certain environments (like in CQ, where the
   // ssh-keep-alive does not get a chance to get scheduled and the ssh connection to the target gets
   // dropped). The parallelism offered by 300 threads should be sufficiently interesting.
   static constexpr uint64_t kMaxThreads = 300;

   // This will be set to the total memory limit (in pages) that this test instance is constructed
   // with. We should not spend more than that.
   const uint64_t memory_limit_pages_;

   // The maximum number of threads we can create that will not cause us to exceed our memory limit.
   const uint64_t max_threads_;

   // Generally we don't want too many threads, so set low_threads (which is the threshold at which
   // we start spawning more root threads) to be fairly low. max_threads_ can be arbitrarily high,
   // which allows our low amount of root threads to (potentially) spin up a lot of parallelism.
   const uint64_t low_threads_ = 8;

   // Set to true when we are trying to shutdown.
   std::atomic<bool> shutdown_ = false;
   // Number of alive threads. Used to coordinate shutdown.
   std::atomic<uint64_t> living_threads_ = 0;

   // Valid if we got the iommu resource.
   zx::iommu iommu_;
   zx::bti bti_;

   bool is_contiguous_ = false;
 };

 // Test thread which initializes/tears down TestInstances
 int VmStressTest::test_thread() {
 #if defined(__x86_64__)
   constexpr uint64_t kMaxInstances = 8;
 #else
   // Scale down the number of instances on less powerful hardware.
   constexpr uint64_t kMaxInstances = 4;
 #endif

   constexpr uint64_t kVariableInstances = kMaxInstances - 1;
   std::unique_ptr<TestInstance> test_instances[kMaxInstances] = {};

   const uint64_t free_bytes = kmem_stats_.free_bytes;
   // scale the size of the VMO we create based on the size of memory in the system.
   // 1/64th the size of total memory generates a fairly sizeable vmo (16MB per 1GB)
   const uint64_t vmo_test_size = free_bytes / 64 / kMaxInstances;

   PrintfAlways("VM stress test: using vmo of size %" PRIu64 "\n", vmo_test_size);

   uint64_t total_test_instances = 0;

   // The MultiVmoTestInstance already does spin up / tear down of threads internally and there is
   // no benefit in also spinning up and tearing down the whole thing. So we just run 1 of them
   // explicitly as a static instance and randomize the others as variable instances. We give this
   // instance a 'full slice' of free memory as it is incredibly unlikely that it even allocates
   // anywhere near that.
   auto multi_vmo = std::make_unique<MultiVmoTestInstance>(this, total_test_instances++,
                                                           free_bytes / kMaxInstances);
   PrintfAlways("VM stress test: multi vmo instance max threads %lu\n",
                multi_vmo->get_max_threads());

   test_instances[kVariableInstances] = std::move(multi_vmo);
   test_instances[kVariableInstances]->Start();

   zx::time deadline = zx::clock::get_monotonic();
   auto rng = RngGen();
   size_t iter = 0;
   size_t max_threads = 0;
   while (!shutdown_.load()) {
     iter++;
     // Log the number of threads every so often. Serves as a hint for whether we are leaking
     // threads. Does not need to be precise.
     if (iter % 1000 == 0) {
       size_t num_threads;
       if (zx::process::self()->get_info(ZX_INFO_PROCESS_THREADS, nullptr, 0, nullptr,
                                         &num_threads) == ZX_OK) {
         max_threads = std::max(max_threads, num_threads);
         PrintfAlways("Number of threads: %lu (peak %lu)\n", num_threads, max_threads);
       }
     }

     uint64_t r = uniform_rand(kVariableInstances, rng);
     if (test_instances[r]) {
       size_t num_threads;
       if (zx::process::self()->get_info(ZX_INFO_PROCESS_THREADS, nullptr, 0, nullptr,
                                         &num_threads) == ZX_OK) {
         max_threads = std::max(max_threads, num_threads);
       }
       test_instances[r]->Stop();
       test_instances[r].reset();
     } else {
       switch (uniform_rand(3, rng)) {
         case 0:
           if (uniform_rand(2, rng)) {
             test_instances[r] = std::make_unique<SingleVmoTestInstance>(
                 this, total_test_instances++, true, true, vmo_test_size);
           } else {
             test_instances[r] = std::make_unique<SingleVmoTestInstance>(
                 this, total_test_instances++, true, false, vmo_test_size);
           }
           break;
         case 1:
           test_instances[r] = std::make_unique<SingleVmoTestInstance>(this, total_test_instances++,
                                                                       false, false, vmo_test_size);
           break;
         case 2:
           test_instances[r] = std::make_unique<CowCloneTestInstance>(this, total_test_instances++);
           break;
       }

       if (test_instances[r]) {
         ZX_ASSERT(test_instances[r]->Start() == ZX_OK);
       }
     }

     constexpr uint64_t kOpsPerSec = 25;
     deadline += zx::duration(ZX_SEC(1) / kOpsPerSec);
     zx::nanosleep(deadline);
   }

   for (uint64_t i = 0; i < kMaxInstances; i++) {
     if (test_instances[i]) {
       test_instances[i]->Stop();
     }
   }
   return 0;
 }

 zx_status_t VmStressTest::Start() {
   auto test_worker = [](void* arg) -> int {
     return static_cast<VmStressTest*>(arg)->test_thread();
   };
   thrd_create_with_name(&test_thread_, test_worker, this, "test_worker");

   return ZX_OK;
 }

 zx_status_t VmStressTest::Stop() {
   shutdown_.store(true);
   thrd_join(test_thread_, nullptr);
   return ZX_OK;
 }