blob: f2698af9be6fbb961b56a8339fc6e9db0275c871 [file] [log] [blame]
// Copyright 2019 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <algorithm>
#include <cstring>
#include <iostream>
#include <random>
#include <vector>
#include <fbl/string_printf.h>
#include <perftest/perftest.h>
// These tests are very slow under ASAN under ARM64 KVM-QEMU, so disable
// them under ASAN in general (fxbug.dev/33182).
#if !__has_feature(address_sanitizer)
namespace {
constexpr size_t kCacheSizeMB = 16; // Larger than last-level cache on common CPUs.
constexpr size_t kBufferSizeMB = 128;
// A pragmatic upper-bound on the length of a random access-sequence, to limit
// the amount of time we spend generating the sequence.
// * Per the Prng microbenchmarks, and accounting for the fact that we generate
// two sequences of this length, this value should limit the sequence
// generation time for a single instance of the RandomMemcpy benchmark to
// about 200 msec.
// * This does not affect results for the default invocations of the benchmarks,
// as they only run for 1000 iterations.
constexpr size_t kMaxAccessSequenceLen = 100000;
// Measure the time taken to copy a randomly chosen block of |block_size_bytes|
// to a random destination, both within a buffer of size |region_size_bytes|.
// See also:
// * bench_memcpy in //zircon, which runs in the kernel, with interrupts
// disabled, and repeatedly copies the same source to the same destination
// * MemcpyTest in //zircon, which runs in userspace, and repeatedly copies the
// same source to the same destination
bool RandomMemcpy(perftest::RepeatState* state, size_t block_size_bytes, size_t buffer_size_mb) {
const size_t buffer_size_bytes = buffer_size_mb * 1024 * 1024;
if (block_size_bytes >= buffer_size_bytes) {
std::cerr << "Invalid configuration: block_size_bytes >= buffer_size_bytes ("
<< block_size_bytes << " >= " << buffer_size_bytes << ")" << std::endl;
return false;
}
// Prepare the buffer.
auto buf = std::make_unique<char[]>(buffer_size_bytes);
memset(buf.get(), 0, buffer_size_bytes);
// Prepare the random source and destination addresses.
const size_t cache_size_bytes = kCacheSizeMB * 1024 * 1024;
const size_t num_blocks_to_overflow_cache = cache_size_bytes / block_size_bytes + 1;
const size_t access_sequence_len = std::min(num_blocks_to_overflow_cache, kMaxAccessSequenceLen);
std::random_device rand_dev;
std::uniform_int_distribution rand_offset_gen(
size_t(0), buffer_size_bytes - block_size_bytes // Ensure end of block is within buffer.
);
std::vector<char*> src_addrs(access_sequence_len);
std::vector<char*> dst_addrs(access_sequence_len);
std::generate(src_addrs.begin(), src_addrs.end(),
[&] { return buf.get() + rand_offset_gen(rand_dev); });
std::generate(dst_addrs.begin(), dst_addrs.end(),
[&] { return buf.get() + rand_offset_gen(rand_dev); });
// Run the benchmark task.
for (size_t i = 0; state->KeepRunning(); ++i) {
// The blocks might overlap, so we use memmove() instead of memcpy().
memmove(dst_addrs[i % access_sequence_len], src_addrs[i % access_sequence_len],
block_size_bytes);
}
return true;
}
void RegisterTest(size_t block_size_bytes, size_t buffer_size_mb) {
fbl::String test_name;
if (block_size_bytes < 1024) {
test_name =
fbl::StringPrintf("RandomMemcpy/%zubytes/%zuMbytes", block_size_bytes, buffer_size_mb);
} else if (block_size_bytes < 1024 * 1024) {
test_name = fbl::StringPrintf("RandomMemcpy/%zuKbytes/%zuMbytes", block_size_bytes / 1024,
buffer_size_mb);
} else {
test_name = fbl::StringPrintf("RandomMemcpy/%zuMbytes/%zuMbytes",
block_size_bytes / 1024 / 1024, buffer_size_mb);
}
perftest::RegisterTest(test_name.c_str(), RandomMemcpy, block_size_bytes, buffer_size_mb);
}
void RegisterTests() {
for (auto block_size_bytes : {1, 4, 16, 64, 256}) {
RegisterTest(block_size_bytes, kBufferSizeMB);
}
for (auto block_size_kb : {1, 4, 16, 64, 256}) {
RegisterTest(block_size_kb * 1024, kBufferSizeMB);
}
for (auto block_size_mb : {1, 4, 16}) {
RegisterTest(block_size_mb * 1024 * 1024, kBufferSizeMB);
}
}
PERFTEST_CTOR(RegisterTests)
} // namespace
#endif