blob: 6882cb130285fa8b538a1f87e35c376ff56d305d [file] [log] [blame]
// Copyright 2017 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <lib/zx/vmar.h>
#include <lib/zx/vmo.h>
#include <vector>
#include <fbl/string_printf.h>
#include <perftest/perftest.h>
#include "assert.h"
// This file contains various test cases that measure the cost of copying to/from a VMO, effectively
// measuring the cost of a memcpy() for different cases. These include:
// * Vmo/Read, Vmo/Write: cost of copying from/to a VMO with zx_vmo_read()/zx_vmo_write(). The
// operated-on VMO is already mapped, with page table entries populated.
// * Subcase: Vmo/Write/ZeroPage: cost of zx_vmo_write() when the memory being read from is the
// shared zero page (as implemented by the kernel).
// * VmoMap/Read, VmoMap/Write: cost of mapping a VMO and then copying it. The operated-on VMO
// already has its pages committed.
// * Subcase: "/Kernel" variants use zx_vmo_read()/zx_vmo_write() to copy to/from the VMO; other
// variants use memcpy() in userland.
// * Subcase: VmoMapRange: uses ZX_VM_MAP_RANGE so that the map operation pre-populates the page
// table entries for the mappings.
// * Vmo/Memcpy: cost of creating a VMO, mapping it, then copying it using memcpy().
// * Subcase: "/WithPrecommit" variants use ZX_VMAR_OP_COMMIT to map and commit the VMO prior to
// issuing a memcpy.
// * Subcase: "/WithoutPrecommit" variants perform a memcpy into the VMO without committing it.
// * Vmo/TransferData: cost of zx_vmo_transfer_data().
namespace {
// Measure the time taken to write or read a chunk of data to/from a VMO using the zx_vmo_write() or
// zx_vmo_read() syscalls respectively. If |do_write| and |zero_page| are true, this measures the
// time to do a zx_vmo_write() that copies from a buffer that maps to the kernel's shared zero page
// into the VMO. One reason for testing this case is that this uses a different code path in the
// kernel than if non-zero pages were used. For multi-page buffers, it will also read fewer pages of
// physical memory.
bool VmoReadOrWriteTest(perftest::RepeatState* state, uint32_t copy_size, bool do_write,
bool zero_page) {
if (zero_page) {
// This is the only meaningful combination. See comments where the test is registered below.
ZX_ASSERT(do_write);
}
zx::vmo vmo;
ASSERT_OK(zx::vmo::create(copy_size, 0, &vmo));
// Use a vmo as the buffer to read from / write to, so we can exactly control whether we're using
// distinct physical pages or the singleton zero page.
zx::vmo buffer_vmo;
ASSERT_OK(zx::vmo::create(copy_size, 0, &buffer_vmo));
zx_vaddr_t buffer_addr;
ASSERT_OK(zx::vmar::root_self()->map(ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, buffer_vmo, 0,
copy_size, &buffer_addr));
// If |zero_page| is not specified, memset to non-zero to make sure buffer_vmo's pages are
// populated and not eligible for zero page deduping, otherwise let the kernel fault in the zero
// page as required.
//
// This can alter the runtime of the vmo write below. If |zero_page| is true, for vmo
// write, the buffer is being read from, so we will just use the singleton zero page.
//
// Also when performing page lookups in the vmo to retrieve backing pages, the logic in the kernel
// for handling distinct physical pages differs from the zero page.
if (!zero_page) {
memset(reinterpret_cast<void*>(buffer_addr), 0xa, copy_size);
}
// Write the VMO so that the pages are pre-committed. This matters
// more for the read case.
ASSERT_OK(vmo.write(reinterpret_cast<void*>(buffer_addr), 0, copy_size));
if (do_write) {
while (state->KeepRunning()) {
ASSERT_OK(vmo.write(reinterpret_cast<void*>(buffer_addr), 0, copy_size));
}
} else {
while (state->KeepRunning()) {
ASSERT_OK(vmo.read(reinterpret_cast<void*>(buffer_addr), 0, copy_size));
}
}
ASSERT_OK(zx::vmar::root_self()->unmap(buffer_addr, copy_size));
return true;
}
// Measure the time taken to write or read a chunk of data to/from a mapped VMO. The writing/reading
// is either done from userland using memcpy() (when user_memcpy=true) or by the kernel using
// zx_vmo_read()/zx_vmo_write() (when user_memcpy=false).
bool VmoReadOrWriteMapTestImpl(perftest::RepeatState* state, uint32_t copy_size, bool do_write,
int flags, bool user_memcpy) {
zx::vmo vmo;
ASSERT_OK(zx::vmo::create(copy_size, 0, &vmo));
std::vector<char> buffer(copy_size);
zx_vaddr_t mapped_addr;
zx::vmo vmo_buf;
if (!user_memcpy) {
// Create a temporary VMO that we can use to get the kernel to read/write our mapped memory.
ASSERT_OK(zx::vmo::create(copy_size, 0, &vmo_buf));
}
// Write the VMO so that the pages are pre-committed. This matters
// more for the read case.
ASSERT_OK(vmo.write(buffer.data(), 0, copy_size));
if (do_write) {
while (state->KeepRunning()) {
ASSERT_OK(zx::vmar::root_self()->map(ZX_VM_PERM_READ | ZX_VM_PERM_WRITE | flags, 0, vmo, 0,
copy_size, &mapped_addr));
if (user_memcpy) {
std::memcpy(reinterpret_cast<void*>(mapped_addr), buffer.data(), copy_size);
} else {
// To write to the mapped in portion we *read* from the temporary VMO.
ASSERT_OK(vmo_buf.read(reinterpret_cast<void*>(mapped_addr), 0, copy_size));
}
ASSERT_OK(zx::vmar::root_self()->unmap(mapped_addr, copy_size));
}
} else { // read
while (state->KeepRunning()) {
ASSERT_OK(zx::vmar::root_self()->map(ZX_VM_PERM_READ | ZX_VM_PERM_WRITE | flags, 0, vmo, 0,
copy_size, &mapped_addr));
if (user_memcpy) {
std::memcpy(buffer.data(), reinterpret_cast<void*>(mapped_addr), copy_size);
} else {
// To read from the mapped in portion we *write* it to the temporary VMO.
ASSERT_OK(vmo_buf.write(reinterpret_cast<void*>(mapped_addr), 0, copy_size));
}
ASSERT_OK(zx::vmar::root_self()->unmap(mapped_addr, copy_size));
}
}
return true;
}
bool VmoReadOrWriteMapTest(perftest::RepeatState* state, uint32_t copy_size, bool do_write,
bool user_memcpy) {
return VmoReadOrWriteMapTestImpl(state, copy_size, do_write, 0, user_memcpy);
}
bool VmoReadOrWriteMapRangeTest(perftest::RepeatState* state, uint32_t copy_size, bool do_write,
bool user_memcpy) {
return VmoReadOrWriteMapTestImpl(state, copy_size, do_write, ZX_VM_MAP_RANGE, user_memcpy);
}
// Measure the time taken to create a VMO, map it into the root VMAR, optionally commit and
// map the pages, memcpy data into the VMO, then unmap and destroy the VMO. This is used as
// an indirect way to measure the overhead induced by page faulting during a memcpy.
bool VmoMemcpyPrecommitTest(perftest::RepeatState* state, uint32_t size, bool precommit) {
state->DeclareStep("create_and_map_vmo");
if (precommit) {
state->DeclareStep("precommit");
}
state->DeclareStep("memcpy");
state->DeclareStep("unmap_and_destroy_vmo");
// Set up a source buffer and initialize it.
std::unique_ptr<char[]> src(new char[size]);
memset(src.get(), 0xff, size);
while (state->KeepRunning()) {
// Create and map the destination VMO.
zx::vmo dst_vmo;
ASSERT_OK(zx::vmo::create(size, 0, &dst_vmo));
zx_vaddr_t dst;
ASSERT_OK(
zx::vmar::root_self()->map(ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, dst_vmo, 0, size, &dst));
state->NextStep();
// Commit the destination VMO if we're running the precommit case.
if (precommit) {
ASSERT_OK(zx::vmar::root_self()->op_range(ZX_VMAR_OP_COMMIT, dst, size, 0, 0));
state->NextStep();
}
// Memcpy from source into dst.
memcpy(reinterpret_cast<void*>(dst), src.get(), size);
state->NextStep();
// Unmap the destination VMO.
ASSERT_OK(zx::vmar::root_self()->unmap(dst, size));
}
return true;
}
// Measure the time taken to clone a vmo and destroy it. If map_size is non zero, then this function
// tests the case where the original vmo is mapped in chunks of map_size; otherwise it tests the
// case where the original vmo is not mapped.
bool VmoCloneTest(perftest::RepeatState* state, uint32_t copy_size, uint32_t map_size) {
if (map_size > 0) {
state->DeclareStep("map");
}
state->DeclareStep("clone");
state->DeclareStep("close");
if (map_size > 0) {
state->DeclareStep("unmap");
}
zx::vmo vmo;
ASSERT_OK(zx::vmo::create(copy_size, 0, &vmo));
ASSERT_OK(vmo.op_range(ZX_VMO_OP_COMMIT, 0, copy_size, nullptr, 0));
zx::vmar vmar;
zx_vaddr_t addr = 0;
// Allocate a single vmar so we have a single reserved block if mapping in using multiple chunks.
ASSERT_OK(zx::vmar::root_self()->allocate(
ZX_VM_CAN_MAP_SPECIFIC | ZX_VM_CAN_MAP_READ | ZX_VM_CAN_MAP_WRITE, 0, copy_size, &vmar,
&addr));
while (state->KeepRunning()) {
if (map_size > 0) {
zx_vaddr_t chunk_addr;
for (uint32_t off = 0; off < copy_size; off += map_size) {
ASSERT_OK(vmar.map(ZX_VM_MAP_RANGE | ZX_VM_PERM_READ | ZX_VM_SPECIFIC, off, vmo, off,
map_size, &chunk_addr));
}
state->NextStep();
}
zx::vmo clone;
ASSERT_OK(vmo.create_child(ZX_VMO_CHILD_SNAPSHOT, 0, copy_size, &clone));
state->NextStep();
clone.reset();
if (map_size > 0) {
state->NextStep();
ASSERT_OK(vmar.unmap(addr, copy_size));
}
}
return true;
}
// Measure the time taken to create a clone, map, unmap and then destroy it.
bool VmoMapCloneTest(perftest::RepeatState* state, uint32_t copy_size) {
state->DeclareStep("clone");
state->DeclareStep("map");
state->DeclareStep("unmap");
state->DeclareStep("close");
zx::vmo vmo;
ASSERT_OK(zx::vmo::create(copy_size, 0, &vmo));
// Fully commit the parent vmo's pages, so that the clone mapping has backing pages to map in.
ASSERT_OK(vmo.op_range(ZX_VMO_OP_COMMIT, 0, copy_size, nullptr, 0));
while (state->KeepRunning()) {
zx::vmo clone;
ASSERT_OK(vmo.create_child(ZX_VMO_CHILD_SNAPSHOT, 0, copy_size, &clone));
state->NextStep();
zx_vaddr_t addr = 0;
// ZX_VM_MAP_RANGE will fully populate the mapping.
ASSERT_OK(zx::vmar::root_self()->map(ZX_VM_MAP_RANGE | ZX_VM_PERM_READ, 0, clone, 0, copy_size,
&addr));
state->NextStep();
ASSERT_OK(zx::vmar::root_self()->unmap(addr, copy_size));
state->NextStep();
clone.reset();
}
return true;
}
// Measure the time it takes to clone a vmo. Specifically, this measures:
// - Clone a vmo.
// - Read or write either the original vmo (do_target_clone=false) or the
// clone (do_target_clone=true).
// - For bidirectional clones, we don't expect varying do_target_clone to
// significantly affect this performance.
// - do_full_op controls whether we read or write the whole vmo or just
// a subset of the pages, as the performance characteristics of a
// partially populated clone and a fully populated clone can differ.
// - Destroy the clone.
bool VmoCloneReadOrWriteTest(perftest::RepeatState* state, uint32_t copy_size, bool do_write,
bool do_target_clone, bool do_full_op) {
const size_t kPageSize = zx_system_get_page_size();
state->DeclareStep("clone");
state->DeclareStep(do_write ? "write" : "read");
state->DeclareStep("close");
zx::vmo vmo;
ASSERT_OK(zx::vmo::create(copy_size, 0, &vmo));
ASSERT_OK(vmo.op_range(ZX_VMO_OP_COMMIT, 0, copy_size, nullptr, 0));
std::vector<char> buffer(copy_size);
while (state->KeepRunning()) {
zx::vmo clone;
ASSERT_OK(vmo.create_child(ZX_VMO_CHILD_SNAPSHOT, 0, copy_size, &clone));
state->NextStep();
const zx::vmo& target = do_target_clone ? clone : vmo;
if (do_full_op) {
if (do_write) {
ASSERT_OK(target.write(buffer.data(), 0, copy_size));
} else {
ASSERT_OK(target.read(buffer.data(), 0, copy_size));
}
} else {
// There's no special meaning behind the particular value of this
// constant. It just needs to result in a couple of writes into
// the vmo without populating it too densely.
const uint64_t kWriteInterval = 8 * kPageSize;
for (uint64_t offset = 0; offset < copy_size; offset += kWriteInterval) {
if (do_write) {
ASSERT_OK(target.write(buffer.data(), offset, kPageSize));
} else {
ASSERT_OK(target.read(buffer.data(), offset, kPageSize));
}
}
}
state->NextStep();
// The clone goes out of scope and is implicitly closed.
}
return true;
}
// Measure the times taken to create, write and then read some data from a VMO on a single thread.
// This is used to measure the performance of a brand new VMO's entire lifecycle up to data read
// completion time. This test is useful because this is essentially what users of `fuchsia.mem.Data`
// or `fuchsia.mem.Buffer` must do on top of their default zx.channel write/read operations. It's
// worth measuring these operations together (when they are also tested separately) because we
// expect them to have different performance behavior together.
//
// The zx_vmo_write() call will cause pages to be allocated in the VMO, and closing the VMO handle
// will free those pages.
bool VmoCreateWriteReadCloseTest(perftest::RepeatState* state, uint32_t copy_size) {
state->DeclareStep("create");
state->DeclareStep("write");
state->DeclareStep("read");
state->DeclareStep("close");
// Use a vmo as the buffer to read from / write to.
zx::vmo buffer_vmo;
ASSERT_OK(zx::vmo::create(copy_size, 0, &buffer_vmo));
zx_vaddr_t buffer_addr;
ASSERT_OK(zx::vmar::root_self()->map(ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, buffer_vmo, 0,
copy_size, &buffer_addr));
memset(reinterpret_cast<void*>(buffer_addr), 0xa, copy_size);
while (state->KeepRunning()) {
zx::vmo vmo;
ASSERT_OK(zx::vmo::create(copy_size, 0, &vmo));
state->NextStep();
ASSERT_OK(vmo.write(reinterpret_cast<void*>(buffer_addr), 0, copy_size));
state->NextStep();
ASSERT_OK(vmo.read(reinterpret_cast<void*>(buffer_addr), 0, copy_size));
state->NextStep();
}
ASSERT_OK(zx::vmar::root_self()->unmap(buffer_addr, copy_size));
return true;
}
// Measure the time taken to transfer data between VMOs using zx_vmo_transfer_data().
bool VmoTransferDataTest(perftest::RepeatState* state, uint64_t transfer_size) {
const uint64_t vmo_size = transfer_size * 2;
const uint64_t src_offset = transfer_size;
const uint64_t dst_offset = 0;
zx::vmo src_vmo;
ASSERT_OK(zx::vmo::create(vmo_size, 0, &src_vmo));
zx_vaddr_t buffer_addr;
ASSERT_OK(zx::vmar::root_self()->map(ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0, src_vmo, 0, vmo_size,
&buffer_addr));
zx::vmo dst_vmo;
ASSERT_OK(zx::vmo::create(vmo_size, 0, &dst_vmo));
state->DeclareStep("fill_source");
state->DeclareStep("transfer");
while (state->KeepRunning()) {
memset((void*)buffer_addr, 'f', vmo_size);
state->NextStep();
ASSERT_OK(dst_vmo.transfer_data(0, dst_offset, transfer_size, &src_vmo, src_offset));
}
ASSERT_OK(zx::vmar::root_self()->unmap(buffer_addr, vmo_size));
return true;
}
template <typename Func, typename... Args>
void RegisterVmoTest(const char* name, Func fn, Args... args) {
for (unsigned size_in_kbytes : {4, 32, 128, 512, 2048}) {
auto full_name = fbl::StringPrintf("%s/%ukbytes", name, size_in_kbytes);
perftest::RegisterTest(full_name.c_str(), fn, size_in_kbytes * 1024, args...);
}
}
void RegisterTests() {
for (bool do_write : {false, true}) {
for (bool zero : {false, true}) {
// The zero case for vmo read is not meaningful since it will only operate on the zero page in
// the first iteration; the remaining iterations will use forked pages which is equivalent to
// the non-zero case. Skip this combo.
if (zero && !do_write) {
continue;
}
const char* rw = do_write ? "Write" : "Read";
const char* z = zero ? "/ZeroPage" : "";
auto rw_name = fbl::StringPrintf("Vmo/%s%s", rw, z);
RegisterVmoTest(rw_name.c_str(), VmoReadOrWriteTest, do_write, zero);
}
}
for (bool do_write : {false, true}) {
for (bool user_memcpy : {false, true}) {
const char* rw = do_write ? "Write" : "Read";
const char* user_kernel = user_memcpy ? "" : "/Kernel";
auto rw_name = fbl::StringPrintf("VmoMap/%s%s", rw, user_kernel);
RegisterVmoTest(rw_name.c_str(), VmoReadOrWriteMapTest, do_write, user_memcpy);
rw_name = fbl::StringPrintf("VmoMapRange/%s%s", rw, user_kernel);
RegisterVmoTest(rw_name.c_str(), VmoReadOrWriteMapRangeTest, do_write, user_memcpy);
}
}
for (bool precommit : {false, true}) {
const char* pc = precommit ? "WithPrecommit" : "WithoutPrecommit";
auto precommit_name = fbl::StringPrintf("Vmo/Memcpy/%s", pc);
RegisterVmoTest(precommit_name.c_str(), VmoMemcpyPrecommitTest, precommit);
}
for (bool map : {false, true}) {
auto clone_name = fbl::StringPrintf("Vmo/Clone%s", map ? "/MapParent" : "");
RegisterVmoTest(clone_name.c_str(), [map](perftest::RepeatState* state, uint32_t size) {
return VmoCloneTest(state, size, map ? size : 0);
});
}
for (unsigned map_chunk_kb : {4, 64, 2048, 32768}) {
constexpr uint32_t vmo_size_kb = 32768;
auto name = fbl::StringPrintf("Vmo/Clone/MapParent%usegments/%ukbytes",
vmo_size_kb / map_chunk_kb, vmo_size_kb);
perftest::RegisterTest(name.c_str(), VmoCloneTest, vmo_size_kb * 1024, map_chunk_kb * 1024);
}
auto name = fbl::StringPrintf("Vmo/MapClone");
RegisterVmoTest(name.c_str(), VmoMapCloneTest);
for (bool do_write : {false, true}) {
for (bool do_target_clone : {false, true}) {
for (bool do_full_op : {false, true}) {
const char* rw = do_write ? "Write" : "Read";
const char* target = do_target_clone ? "Clone" : "Orig";
const char* density = do_full_op ? "All" : "Some";
auto clone_rw_name = fbl::StringPrintf("Vmo/Clone/%s%s%s", rw, target, density);
RegisterVmoTest(clone_rw_name.c_str(), VmoCloneReadOrWriteTest, do_write, do_target_clone,
do_full_op);
}
}
}
name = fbl::StringPrintf("Vmo/CreateWriteReadClose");
RegisterVmoTest(name.c_str(), VmoCreateWriteReadCloseTest);
name = fbl::StringPrintf("Vmo/TransferData");
RegisterVmoTest(name.c_str(), VmoTransferDataTest);
}
PERFTEST_CTOR(RegisterTests)
} // namespace