blob: 09bf1db241c7f323351de50c14684724df675a78 [file] [log] [blame]
// Copyright 2023 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <lib/stdcompat/span.h>
#include <algorithm>
#include <cstring>
#include <iostream>
#include <random>
#include <vector>
#include <fbl/string_printf.h>
#include <perftest/perftest.h>
namespace {
constexpr size_t kCacheSizeMB = 16; // Larger than last-level cache on common CPUs.
constexpr size_t kBufferSizeMB = 128;
constexpr size_t kMaxAccessSequenceLen = 100000;
// Measure the time taken to copy a randomly chosen block of |block_size_bytes|
// to a random destination, both within a buffer of size |region_size_bytes|,
// using std::copy on a cpp20::span.
bool RandomSpanCopy(perftest::RepeatState* state, size_t block_size_bytes, size_t buffer_size_mb) {
const size_t buffer_size_bytes = buffer_size_mb * 1024 * 1024;
if (block_size_bytes >= buffer_size_bytes) {
std::cerr << "Invalid configuration: block_size_bytes >= buffer_size_bytes ("
<< block_size_bytes << " >= " << buffer_size_bytes << ")" << std::endl;
return false;
}
// Prepare the buffer.
auto buf = std::make_unique<uint8_t[]>(buffer_size_bytes);
memset(buf.get(), 0xAA, buffer_size_bytes);
// Prepare the random source and destination addresses.
const size_t cache_size_bytes = kCacheSizeMB * 1024 * 1024;
const size_t num_blocks_to_overflow_cache = cache_size_bytes / block_size_bytes + 1;
const size_t access_sequence_len = std::min(num_blocks_to_overflow_cache, kMaxAccessSequenceLen);
std::random_device rand_dev;
std::uniform_int_distribution rand_offset_gen(
size_t(0), buffer_size_bytes - block_size_bytes // Ensure end of block is within buffer.
);
std::vector<uint8_t*> src_addrs(access_sequence_len);
std::vector<uint8_t*> dst_addrs(access_sequence_len);
std::generate(src_addrs.begin(), src_addrs.end(),
[&] { return buf.get() + rand_offset_gen(rand_dev); });
std::generate(dst_addrs.begin(), dst_addrs.end(),
[&] { return buf.get() + rand_offset_gen(rand_dev); });
// Run the benchmark task.
for (size_t i = 0; state->KeepRunning(); ++i) {
cpp20::span<uint8_t> src(src_addrs[i % access_sequence_len], block_size_bytes);
cpp20::span<uint8_t> dst(dst_addrs[i % access_sequence_len], block_size_bytes);
if (dst.data() >= src.data()) {
std::copy_backward(src.begin(), src.end(), dst.end());
} else {
std::copy(src.begin(), src.end(), dst.begin());
}
}
return true;
}
void RegisterTest(size_t block_size_bytes, size_t buffer_size_mb) {
fbl::String test_name;
if (block_size_bytes < 1024) {
test_name = fbl::StringPrintf("Stdcompat/CopySpan/%zubytes/%zuMbytes", block_size_bytes,
buffer_size_mb);
} else if (block_size_bytes < 1024 * 1024) {
test_name = fbl::StringPrintf("Stdcompat/CopySpan/%zuKbytes/%zuMbytes", block_size_bytes / 1024,
buffer_size_mb);
} else {
test_name = fbl::StringPrintf("Stdcompat/CopySpan/%zuMbytes/%zuMbytes",
block_size_bytes / 1024 / 1024, buffer_size_mb);
}
perftest::RegisterTest(test_name.c_str(), RandomSpanCopy, block_size_bytes, buffer_size_mb);
}
void RegisterTests() {
for (auto block_size_bytes : {1, 4, 16, 64, 256}) {
RegisterTest(block_size_bytes, kBufferSizeMB);
}
for (auto block_size_kb : {1, 4, 16, 64, 256}) {
RegisterTest(block_size_kb * 1024, kBufferSizeMB);
}
for (auto block_size_mb : {1, 4, 16}) {
RegisterTest(block_size_mb * 1024 * 1024, kBufferSizeMB);
}
}
PERFTEST_CTOR(RegisterTests)
} // namespace