blob: 7ae04a4f6014c2dfd2c80f5cf4995de22bd2272c [file] [log] [blame]
// Copyright 2018 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <iostream>
#include <random>
#include <vector>
#include <fbl/intrusive_single_list.h>
#include <fbl/ref_counted.h>
#include <fbl/ref_ptr.h>
#include <fbl/slab_allocator.h>
#include <fbl/string_printf.h>
#include <perftest/perftest.h>
namespace {
/*** Common definitions ***/
// The motivation for multiple sizes is to quantify any scaling behavior with
// the size of the allocation.
constexpr size_t kSmallBlockSizeBytes = 64;
constexpr size_t kLargeBlockSizeBytes = 8192;
// This value must accommodate the maximal value of |total_size_kbytes| in
// RegisterRetainedMemTest().
constexpr size_t kLiveAllocLimitBytes = 32u * 1024 * 1024;
template <size_t Size>
class DataBuf {
public:
DataBuf() {
// We provide a user-defined default constructor that does nothing, to
// avoid the cost of zeroing out |data|.
//
// Rationale: in the absence of a user-defined constructor, expressions
// such as |DataBuf()| or |new DataBuf()| trigger value-initialization,
// which zeros out |data|. For details, see
// https://stackoverflow.com/a/2418195
}
private:
char data[Size];
};
template <typename AllocatorTraits>
class SlabDataBuf;
template <typename AllocatorTraits>
class SlabDataBuf : public fbl::SlabAllocated<AllocatorTraits>,
public fbl::RefCounted<SlabDataBuf<AllocatorTraits>>,
public fbl::SinglyLinkedListable<fbl::RefPtr<SlabDataBuf<AllocatorTraits>>> {
public:
SlabDataBuf() {
// As with DataBuf, we provide a user-defined default constructor that
// does nothing, to avoid the cost of zeroing out |data|.
//
// CAUTION: For reasons that are unclear, inheriting DataBuf (rather than
// declaring our own |data|) does not avoid the cost of zeroing out
// DataBuf::data, even though DataBuf has a user-defined default
// constructor.
}
private:
char data[AllocatorTraits::kUserBufSize];
};
/*** Static slab allocator definitions ***/
template <typename AllocatorTraits>
class StaticSlabAllocator;
template <size_t ObjSize, size_t SlabSize>
struct StaticSlabAllocatorTraits
: public fbl::StaticSlabAllocatorTraits<
fbl::RefPtr<SlabDataBuf<StaticSlabAllocatorTraits<ObjSize, SlabSize>>>, SlabSize> {
using AllocT = StaticSlabAllocator<StaticSlabAllocatorTraits<ObjSize, SlabSize>>;
static constexpr char kName[] = "SlabStatic";
static constexpr size_t kUserBufSize = ObjSize;
static constexpr size_t kSlabSizeKbytes = SlabSize / 1024;
static auto GetConfigAsString() {
return fbl::StringPrintf("%zubytes/%zuKbytes", kUserBufSize, kSlabSizeKbytes);
}
};
template <typename AllocatorTraits>
class StaticSlabAllocator {
public:
static constexpr size_t kUserBufSize = AllocatorTraits::kUserBufSize;
auto New() { return fbl::SlabAllocator<AllocatorTraits>::New(); }
};
using StaticSmallBlockAllocatorTraits =
StaticSlabAllocatorTraits<kSmallBlockSizeBytes, fbl::DEFAULT_SLAB_ALLOCATOR_SLAB_SIZE>;
using StaticLargeBlockAllocatorTraits =
StaticSlabAllocatorTraits<kLargeBlockSizeBytes, kLargeBlockSizeBytes * 205>;
static_assert(fbl::SlabAllocator<StaticLargeBlockAllocatorTraits>::AllocsPerSlab ==
fbl::SlabAllocator<StaticSmallBlockAllocatorTraits>::AllocsPerSlab,
"Please adjust the SLAB_SIZE parameter for "
"StaticLargeBlockAllocatorTraits, so that the StaticLargeBlockAllocator "
"amortizes malloc() calls over as many slab objects as the Static "
"SmallBlockAllocator.");
/*** Instanced slab allocator definitions ***/
template <typename AllocatorTraits>
class InstancedSlabAllocator;
template <size_t ObjSize, size_t SlabSize>
struct InstancedSlabAllocatorTraits
: public fbl::SlabAllocatorTraits<
fbl::RefPtr<SlabDataBuf<InstancedSlabAllocatorTraits<ObjSize, SlabSize>>>, SlabSize> {
using AllocT = InstancedSlabAllocator<InstancedSlabAllocatorTraits<ObjSize, SlabSize>>;
static constexpr char kName[] = "SlabInstanced";
static constexpr size_t kUserBufSize = ObjSize;
static constexpr size_t kSlabSizeKbytes = SlabSize / 1024;
static auto GetConfigAsString() {
return fbl::StringPrintf("%zubytes/%zuKbytes", kUserBufSize, kSlabSizeKbytes);
}
};
template <typename AllocatorTraits>
class InstancedSlabAllocator {
public:
static constexpr size_t kUserBufSize = AllocatorTraits::kUserBufSize;
InstancedSlabAllocator() : allocator_(kMaxSlabs) {}
auto New() { return allocator_.New(); }
private:
static constexpr size_t kMaxSizeBytes = kLiveAllocLimitBytes;
static constexpr size_t kMaxSlabs =
(kMaxSizeBytes / (fbl::SlabAllocator<AllocatorTraits>::AllocsPerSlab * kUserBufSize)) +
1 /* Round up */;
fbl::SlabAllocator<AllocatorTraits> allocator_;
};
using InstancedSmallBlockAllocatorTraits =
InstancedSlabAllocatorTraits<kSmallBlockSizeBytes, fbl::DEFAULT_SLAB_ALLOCATOR_SLAB_SIZE>;
using InstancedLargeBlockAllocatorTraits =
InstancedSlabAllocatorTraits<kLargeBlockSizeBytes, kLargeBlockSizeBytes * 187>;
static_assert(fbl::SlabAllocator<InstancedLargeBlockAllocatorTraits>::AllocsPerSlab ==
fbl::SlabAllocator<InstancedSmallBlockAllocatorTraits>::AllocsPerSlab,
"Please adjust the SLAB_SIZE parameter for "
"InstancedLargeBlockAllocatorTraits, so that the "
"InstancedLargeBlockAllocator amortizes malloc() calls over as many slab "
"objects as the InstancedSmallBlockAllocator.");
/*** Heap allocator definitions ***/
template <typename AllocatorTraits>
class HeapAllocator;
template <size_t ObjSize>
struct HeapAllocatorTraits {
using AllocT = HeapAllocator<HeapAllocatorTraits<ObjSize>>;
static constexpr char kName[] = "Malloc";
static constexpr size_t kUserBufSize = ObjSize;
static auto GetConfigAsString() { return fbl::StringPrintf("%zubytes", kUserBufSize); }
};
template <typename AllocatorTraits>
class HeapAllocator {
public:
static constexpr size_t kUserBufSize = AllocatorTraits::kUserBufSize;
auto New() { return std::make_unique<DataBuf<kUserBufSize>>(); }
};
using HeapSmallBlockAllocatorTraits = HeapAllocatorTraits<kSmallBlockSizeBytes>;
using HeapLargeBlockAllocatorTraits = HeapAllocatorTraits<kLargeBlockSizeBytes>;
/*** Benchmark code ***/
// Utility function called from RetainAndFreeOldest() and RetainAndFreeRandom().
template <typename AllocatorT>
bool RetainAndFree(const std::vector<size_t>& replacement_sequence, perftest::RepeatState* state) {
const size_t num_bufs_to_retain = replacement_sequence.size();
if (num_bufs_to_retain < 1) {
std::cerr << "Must retain at least 1 buffer" << std::endl;
return false;
}
// Populate an initial collection of buffers.
AllocatorT allocator;
std::vector<decltype(allocator.New())> retained_bufs(num_bufs_to_retain);
std::generate(retained_bufs.begin(), retained_bufs.end(), [&] { return allocator.New(); });
for (size_t i = 0; i < retained_bufs.size(); ++i) {
if (!retained_bufs[i]) {
std::cerr << "Failed to allocate buf " << i << " before benchmark loop" << std::endl;
return false;
}
}
// The benchmark task: replace a random existing buffer with a new one.
for (size_t i = 0; state->KeepRunning(); ++i) {
const size_t old_buf_index = replacement_sequence[i % num_bufs_to_retain];
const auto& buf = (retained_bufs[old_buf_index] = allocator.New());
if (!buf) {
std::cerr << "Failed to allocate " << allocator.kUserBufSize
<< " bytes at benchmark iteration " << i << std::endl;
return false;
}
}
return true;
}
// Measure the time taken to allocate and immediately free a block
// from a slab allocator. The block is allocated from the pool
// initialized by one of the DECLARE_STATIC_SLAB_ALLOCATOR_STORAGE
// statements at the end of this file. This benchmark represents (presumed)
// best-case behavior, as the memory pool should be unfragmented.
template <typename AllocatorT>
bool AllocAndFree(perftest::RepeatState* state) {
AllocatorT allocator;
while (state->KeepRunning()) {
auto buf = allocator.New();
perftest::DoNotOptimize(buf);
if (!buf) {
return false;
}
}
return true;
}
// Measure the time taken to free the oldest allocated block, and allocate a
// new one, using the slab allocator. The block is allocated from the pool
// initialized by one of the DECLARE_STATIC_SLAB_ALLOCATOR_STORAGE statements
// at the end of this file. This benchmark abstracts a network copy workload,
// when copying from a fast source, to a slow sink.
template <typename AllocatorT>
bool RetainAndFreeOldest(perftest::RepeatState* state, size_t num_bufs_to_retain) {
// Generate the sequence of indexes of buffers to replace.
std::vector<size_t> buf_to_free(num_bufs_to_retain);
std::iota(buf_to_free.begin(), buf_to_free.end(), 0);
return RetainAndFree<AllocatorT>(buf_to_free, state);
}
// Measure the time taken to free a random allocated block, and allocate a
// new one, using the slab allocator. The block is allocated from the pool
// initialized by one of the DECLARE_STATIC_SLAB_ALLOCATOR_STORAGE statements
// at the end of this file. This benchmark attempts to quantify the effects of
// memory fragmentation.
template <typename AllocatorT>
bool RetainAndFreeRandom(perftest::RepeatState* state, size_t num_bufs_to_retain) {
// Generate the sequence of indexes of buffers to replace.
std::vector<size_t> buf_to_free(num_bufs_to_retain);
std::random_device rand_dev;
std::iota(buf_to_free.begin(), buf_to_free.end(), 0);
std::shuffle(buf_to_free.begin(), buf_to_free.end(), rand_dev);
return RetainAndFree<AllocatorT>(buf_to_free, state);
}
/*** Linkage and instantiation ***/
using RetainedMemPerfTest = bool (*)(perftest::RepeatState* state, size_t num_bufs_to_retain);
template <typename AllocatorTraits, RetainedMemPerfTest PerfTest>
void RegisterTest(const char* bench_name) {
// The maximum value of 32768KB below was chosen empirically, as the point at
// which allocators started showing scaling behaviors on Eve.
for (size_t total_size_kbytes : {8, 32, 128, 512, 2048, 8192, 32768}) {
const size_t total_size_bytes = total_size_kbytes * 1024;
perftest::RegisterTest(
fbl::StringPrintf("MemAlloc/%s/%s/%s/%zuKbytes", AllocatorTraits::kName, bench_name,
AllocatorTraits::GetConfigAsString().c_str(), total_size_kbytes)
.c_str(),
PerfTest, total_size_bytes / AllocatorTraits::kUserBufSize);
}
}
using NoRetainedMemPerfTest = bool (*)(perftest::RepeatState* state);
template <typename AllocatorTraits, NoRetainedMemPerfTest PerfTest>
void RegisterTest(const char* bench_name) {
perftest::RegisterTest(fbl::StringPrintf("MemAlloc/%s/%s/%s", AllocatorTraits::kName, bench_name,
AllocatorTraits::GetConfigAsString().c_str())
.c_str(),
PerfTest);
}
#define REGISTER_PERF_TEST_INSTANCE(ALLOCATION_PATTERN, ALLOCATOR_TRAITS) \
RegisterTest<ALLOCATOR_TRAITS, ALLOCATION_PATTERN<ALLOCATOR_TRAITS::AllocT>>(#ALLOCATION_PATTERN);
#define REGISTER_PERF_TEST(ALLOCATION_PATTERN) \
do { \
REGISTER_PERF_TEST_INSTANCE(ALLOCATION_PATTERN, StaticSmallBlockAllocatorTraits); \
REGISTER_PERF_TEST_INSTANCE(ALLOCATION_PATTERN, StaticLargeBlockAllocatorTraits); \
REGISTER_PERF_TEST_INSTANCE(ALLOCATION_PATTERN, InstancedSmallBlockAllocatorTraits); \
REGISTER_PERF_TEST_INSTANCE(ALLOCATION_PATTERN, InstancedLargeBlockAllocatorTraits); \
REGISTER_PERF_TEST_INSTANCE(ALLOCATION_PATTERN, HeapSmallBlockAllocatorTraits); \
REGISTER_PERF_TEST_INSTANCE(ALLOCATION_PATTERN, HeapLargeBlockAllocatorTraits); \
} while (0)
void RegisterTests() {
REGISTER_PERF_TEST(AllocAndFree);
REGISTER_PERF_TEST(RetainAndFreeOldest);
REGISTER_PERF_TEST(RetainAndFreeRandom);
}
PERFTEST_CTOR(RegisterTests)
} // namespace
#define DECLARE_STATIC_STORAGE(SLAB_TRAITS) \
DECLARE_STATIC_SLAB_ALLOCATOR_STORAGE( \
SLAB_TRAITS, (kLiveAllocLimitBytes / (fbl::SlabAllocator<SLAB_TRAITS>::AllocsPerSlab * \
SLAB_TRAITS::kUserBufSize)) + \
1 /* Round up */);
DECLARE_STATIC_STORAGE(StaticSmallBlockAllocatorTraits)
DECLARE_STATIC_STORAGE(StaticLargeBlockAllocatorTraits)