blob: de1e189468e5ceed9283b98e473d1a890a4be13a [file] [edit]
/* benchmark_insert_string.cc -- benchmark insert_string variants
* Copyright (C) 2025 Nathan Moinvaziri
* For conditions of distribution and use, see copyright notice in zlib.h
*/
#include <limits.h>
#include <cstring>
#include <benchmark/benchmark.h>
extern "C" {
# include "zbuild.h"
# include "deflate.h"
# include "arch_functions.h"
# include "../test_cpu_features.h"
# include "insert_string_p.h"
}
#define MAX_WSIZE 32768
#define TEST_WINDOW_SIZE (MAX_WSIZE * 2)
typedef uint32_t (* quick_insert_string_cb)(deflate_state *const s, unsigned char *window, uint32_t str);
// Base class with common setup/teardown for both insert_string benchmarks
class insert_string_base: public benchmark::Fixture {
protected:
deflate_state *s;
unsigned char *window;
public:
void SetUp(const ::benchmark::State&) {
s = (deflate_state*)zng_alloc_aligned(sizeof(deflate_state), 64);
memset(s, 0, sizeof(deflate_state));
// Set up window parameters
s->w_size = MAX_WSIZE;
s->window_size = TEST_WINDOW_SIZE;
// Allocate window
s->window = (uint8_t*)zng_alloc_aligned(TEST_WINDOW_SIZE, 64);
window = s->window;
// Allocate hash tables
s->head = (Pos*)zng_alloc_aligned(HASH_SIZE * sizeof(Pos), 64);
s->prev = (Pos*)zng_alloc_aligned(MAX_WSIZE * sizeof(Pos), 64);
// Initialize hash tables
memset(s->head, 0, HASH_SIZE * sizeof(Pos));
memset(s->prev, 0, MAX_WSIZE * sizeof(Pos));
// Initialize rolling hash state for rolling variant
s->ins_h = 0;
// Fill window with deterministic data patterns
for (size_t i = 0; i < TEST_WINDOW_SIZE; i++) {
// Create patterns that will exercise the hash function well
window[i] = (uint8_t)((i * 17 + (i >> 4) * 31 + (i >> 8) * 13) & 0xFF);
}
}
void TearDown(const ::benchmark::State&) {
zng_free_aligned(s->window);
zng_free_aligned(s->head);
zng_free_aligned(s->prev);
zng_free_aligned(s);
}
};
class insert_string_bench: public insert_string_base {
public:
void Bench(benchmark::State& state, insert_string_cb insert_func) {
uint32_t str_pos = (uint32_t)state.range(0); // Starting position
uint32_t count = (uint32_t)state.range(1); // Number of strings to insert
// Ensure we don't go beyond window bounds
if (str_pos + count >= TEST_WINDOW_SIZE - 4) {
state.SkipWithError("Parameters exceed window size");
return;
}
for (auto _ : state) {
state.PauseTiming();
// Reset hash tables to ensure consistent starting state
memset(s->head, 0, HASH_SIZE * sizeof(Pos));
memset(s->prev, 0, MAX_WSIZE * sizeof(Pos));
s->ins_h = 0;
state.ResumeTiming();
// Benchmark the insert_string function
insert_func(s, window, str_pos, count);
}
}
};
#define BENCHMARK_INSERT_STRING(name, fptr, support_flag) \
BENCHMARK_DEFINE_F(insert_string_bench, name)(benchmark::State& state) { \
if (!(support_flag)) { \
state.SkipWithError("Function " #name " not supported"); \
} \
Bench(state, fptr); \
} \
BENCHMARK_REGISTER_F(insert_string_bench, name) \
->Args({100, 3}) /* Most common case */ \
->Args({100, 4}) \
->Args({100, 5}) \
->Args({100, 7}) \
->Args({100, 14}) /* Mid-range cluster */ \
->Args({100, 32}) /* Transition point */ \
->Args({100, 127}) /* Large cluster around powers of 2 */ \
->Args({100, 255}) /* Near maximum observed values */ \
->Unit(benchmark::kNanosecond);
// Benchmark the standard integer hash variant
BENCHMARK_INSERT_STRING(integer_hash, ::insert_string, 1);
// Benchmark the rolling hash variant
BENCHMARK_INSERT_STRING(rolling_hash, ::insert_string_roll, 1);
// Additional benchmark class for quick_insert_string functions
class quick_insert_string_bench: public insert_string_base {
public:
void Bench(benchmark::State& state, quick_insert_string_cb quick_insert_func) {
uint32_t start_pos = (uint32_t)state.range(0); // Starting position
uint32_t count = (uint32_t)state.range(1); // Number of insertions
if (start_pos + count >= TEST_WINDOW_SIZE - 4) {
state.SkipWithError("Parameters exceed window size");
return;
}
for (auto _ : state) {
state.PauseTiming();
// Reset hash tables
memset(s->head, 0, HASH_SIZE * sizeof(Pos));
memset(s->prev, 0, MAX_WSIZE * sizeof(Pos));
s->ins_h = 0;
state.ResumeTiming();
// Benchmark quick_insert_string (single insertions)
for (uint32_t i = 0; i < count; i++) {
uint32_t result = quick_insert_func(s, window, start_pos + i);
benchmark::DoNotOptimize(result);
}
}
}
};
#define BENCHMARK_QUICK_INSERT_STRING(name, fptr, support_flag) \
BENCHMARK_DEFINE_F(quick_insert_string_bench, name)(benchmark::State& state) { \
if (!(support_flag)) { \
state.SkipWithError("Function " #name " not supported"); \
} \
Bench(state, fptr); \
} \
BENCHMARK_REGISTER_F(quick_insert_string_bench, name) \
->Args({100, 1}) /* Single insertion (baseline) */ \
->Args({100, 100}) /* 100 insertions (measure amortized cost) */ \
->Args({16000, 100}) /* 100 insertions at mid window (different hash distribution) */ \
->Unit(benchmark::kNanosecond);
BENCHMARK_QUICK_INSERT_STRING(quick_integer_hash, ::quick_insert_string, 1);
BENCHMARK_QUICK_INSERT_STRING(quick_rolling_hash, ::quick_insert_string_roll, 1);