blob: d68e4aff3cc515d98021109f41e81aaa79cbfb99 [file] [log] [blame]
/*
* Copyright (C) 2015 Christopher Gilbert.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef BENCHPRESS_HPP
#define BENCHPRESS_HPP
#include <algorithm> // max, min
#include <atomic> // atomic_intmax_t
#include <chrono> // high_resolution_timer, duration
#include <functional> // function
#include <iomanip> // setw
#include <iostream> // cout
#include <regex> // regex, regex_match
#include <sstream> // stringstream
#include <string> // string
#include <thread> // thread
#include <vector> // vector
namespace benchpress {
/*
* The options class encapsulates all options for running benchmarks.
*
* When including benchpress, a main function can be emitted which includes a command-line parser for building an
* options object. However from time-to-time it may be necessary for the developer to have to build their own main
* stub and construct the options object manually.
*
* options opts;
* opts
* .bench(".*")
* .benchtime(1)
* .cpu(4);
*/
class options {
std::string d_bench;
size_t d_benchtime;
size_t d_cpu;
public:
options()
: d_bench(".*")
, d_benchtime(1)
, d_cpu(std::thread::hardware_concurrency())
{}
options& bench(const std::string& bench) {
d_bench = bench;
return *this;
}
options& benchtime(size_t benchtime) {
d_benchtime = benchtime;
return *this;
}
options& cpu(size_t cpu) {
d_cpu = cpu;
return *this;
}
std::string get_bench() const {
return d_bench;
}
size_t get_benchtime() const {
return d_benchtime;
}
size_t get_cpu() const {
return d_cpu;
}
};
class context;
/*
* The benchmark_info class is used to store a function name / pointer pair.
*
* benchmark_info bi("example", [](benchpress::context* b) {
* // benchmark function
* });
*/
class benchmark_info {
std::string d_name;
std::function<void(context*)> d_func;
public:
benchmark_info(std::string name, std::function<void(context*)> func)
: d_name(name)
, d_func(func)
{}
std::string get_name() const { return d_name; }
std::function<void(context*)> get_func() const { return d_func; }
};
/*
* The registration class is responsible for providing a single global point of reference for registering
* benchmark functions.
*
* registration::get_ptr()->register_benchmark(info);
*/
class registration {
static registration* d_this;
std::vector<benchmark_info> d_benchmarks;
public:
static registration* get_ptr() {
if (nullptr == d_this) {
d_this = new registration();
}
return d_this;
}
void register_benchmark(benchmark_info& info) {
d_benchmarks.push_back(info);
}
std::vector<benchmark_info> get_benchmarks() { return d_benchmarks; }
};
/*
* The auto_register class is a helper used to register benchmarks.
*/
class auto_register {
public:
auto_register(const std::string& name, std::function<void(context*)> func) {
benchmark_info info(name, func);
registration::get_ptr()->register_benchmark(info);
}
};
#define CONCAT(x, y) x ## y
#define CONCAT2(x, y) CONCAT(x, y)
// The BENCHMARK macro is a helper for creating benchmark functions and automatically registering them with the
// registration class.
#define BENCHMARK(x, f) benchpress::auto_register CONCAT2(register_, __LINE__)((x), (f));
// This macro will prevent the compiler from removing a redundant code path which has no side-effects.
#define DISABLE_REDUNDANT_CODE_OPT() { asm(""); }
/*
* The result class is responsible for producing a printable string representation of a benchmark run.
*/
class result {
size_t d_num_iterations;
std::chrono::nanoseconds d_duration;
size_t d_num_bytes;
public:
result(size_t num_iterations, std::chrono::nanoseconds duration, size_t num_bytes)
: d_num_iterations(num_iterations)
, d_duration(duration)
, d_num_bytes(num_bytes)
{}
size_t get_ns_per_op() const {
if (d_num_iterations <= 0) {
return 0;
}
return d_duration.count() / d_num_iterations;
}
double get_mb_per_s() const {
if (d_num_iterations <= 0 || d_duration.count() <= 0 || d_num_bytes <= 0) {
return 0;
}
return ((double(d_num_bytes) * double(d_num_iterations) / double(1e6)) /
double(std::chrono::duration_cast<std::chrono::seconds>(d_duration).count()));
}
std::string to_string() const {
std::stringstream tmp;
tmp << std::setw(12) << std::right << d_num_iterations;
size_t npo = get_ns_per_op();
tmp << std::setw(12) << std::right << npo << std::setw(0) << " ns/op";
double mbs = get_mb_per_s();
if (mbs > 0.0) {
tmp << std::setw(12) << std::right << mbs << std::setw(0) << " MB/s";
}
return std::string(tmp.str());
}
};
/*
* The parallel_context class is responsible for providing a thread-safe context for parallel benchmark code.
*/
class parallel_context {
std::atomic_intmax_t d_num_iterations;
public:
parallel_context(size_t num_iterations)
: d_num_iterations(num_iterations)
{}
bool next() {
return (d_num_iterations.fetch_sub(1) > 0);
}
};
/*
* The context class is responsible for providing an interface for capturing benchmark metrics to benchmark functions.
*/
class context {
bool d_timer_on;
std::chrono::high_resolution_clock::time_point d_start;
std::chrono::nanoseconds d_duration;
std::chrono::seconds d_benchtime;
size_t d_num_iterations;
size_t d_num_threads;
size_t d_num_bytes;
benchmark_info d_benchmark;
public:
context(const benchmark_info& info, const options& opts)
: d_timer_on(false)
, d_start()
, d_duration()
, d_benchtime(std::chrono::seconds(opts.get_benchtime()))
, d_num_iterations(1)
, d_num_threads(opts.get_cpu())
, d_num_bytes(0)
, d_benchmark(info)
{}
size_t num_iterations() const { return d_num_iterations; }
void set_num_threads(size_t n) { d_num_threads = n; }
size_t num_threads() const { return d_num_threads; }
void start_timer() {
if (!d_timer_on) {
d_start = std::chrono::high_resolution_clock::now();
d_timer_on = true;
}
}
void stop_timer() {
if (d_timer_on) {
d_duration += std::chrono::high_resolution_clock::now() - d_start;
d_timer_on = false;
}
}
void reset_timer() {
if (d_timer_on) {
d_start = std::chrono::high_resolution_clock::now();
}
d_duration = std::chrono::nanoseconds::zero();
}
void set_bytes(int64_t bytes) { d_num_bytes = bytes; }
size_t get_ns_per_op() {
if (d_num_iterations <= 0) {
return 0;
}
return d_duration.count() / d_num_iterations;
}
void run_n(size_t n) {
d_num_iterations = n;
reset_timer();
start_timer();
d_benchmark.get_func()(this);
stop_timer();
}
void run_parallel(std::function<void(parallel_context*)> f) {
parallel_context pc(d_num_iterations);
std::vector<std::thread> threads;
for (size_t i = 0; i < d_num_threads; ++i) {
threads.push_back(std::thread([&pc,&f]() -> void {
f(&pc);
}));
}
for(auto& thread : threads){
thread.join();
}
}
result run() {
size_t n = 1;
run_n(n);
while (d_duration < d_benchtime && n < 1e9) {
size_t last = n;
if (get_ns_per_op() == 0) {
n = 1e9;
} else {
n = d_duration.count() / get_ns_per_op();
}
n = std::max(std::min(n+n/2, 100*last), last+1);
n = round_up(n);
run_n(n);
}
return result(n, d_duration, d_num_bytes);
}
private:
template<typename T>
T round_down_10(T n) {
int tens = 0;
while (n > 10) {
n /= 10;
tens++;
}
int result = 1;
for (int i = 0; i < tens; ++i) {
result *= 10;
}
return result;
}
template<typename T>
T round_up(T n) {
T base = round_down_10(n);
if (n < (2 * base)) {
return 2 * base;
}
if (n < (5 * base)) {
return 5 * base;
}
return 10 * base;
}
};
/*
* The run_benchmarks function will run the registered benchmarks.
*/
void run_benchmarks(const options& opts) {
std::regex match_r(opts.get_bench());
auto benchmarks = registration::get_ptr()->get_benchmarks();
for (auto& info : benchmarks) {
if (std::regex_match(info.get_name(), match_r)) {
context c(info, opts);
auto r = c.run();
std::cout << std::setw(35) << std::left << info.get_name() << r.to_string() << std::endl;
}
}
}
} // namespace benchpress
/*
* If BENCHPRESS_CONFIG_MAIN is defined when the file is included then a main function will be emitted which provides a
* command-line parser and then executes run_benchmarks.
*/
#ifdef BENCHPRESS_CONFIG_MAIN
#include "cxxopts.hpp"
benchpress::registration* benchpress::registration::d_this;
int main(int argc, char** argv) {
std::chrono::high_resolution_clock::time_point bp_start = std::chrono::high_resolution_clock::now();
benchpress::options bench_opts;
try {
cxxopts::Options cmd_opts(argv[0], " - command line options");
cmd_opts.add_options()
("bench", "run benchmarks matching the regular expression", cxxopts::value<std::string>()
->default_value(".*"))
("benchtime", "run enough iterations of each benchmark to take t seconds", cxxopts::value<size_t>()
->default_value("1"))
("cpu", "specify the number of threads to use for parallel benchmarks", cxxopts::value<size_t>()
->default_value(std::to_string(std::thread::hardware_concurrency())))
("help", "print help")
;
cmd_opts.parse(argc, argv);
if (cmd_opts.count("help")) {
std::cout << cmd_opts.help({""}) << std::endl;
exit(0);
}
if (cmd_opts.count("bench")) {
bench_opts.bench(cmd_opts["bench"].as<std::string>());
}
if (cmd_opts.count("benchtime")) {
bench_opts.benchtime(cmd_opts["benchtime"].as<size_t>());
}
if (cmd_opts.count("cpu")) {
bench_opts.cpu(cmd_opts["cpu"].as<size_t>());
}
} catch (const cxxopts::OptionException& e) {
std::cout << "error parsing options: " << e.what() << std::endl;
exit(1);
}
benchpress::run_benchmarks(bench_opts);
float duration = std::chrono::duration_cast<std::chrono::milliseconds>(
std::chrono::high_resolution_clock::now() - bp_start
).count() / 1000.f;
std::cout << argv[0] << " " << duration << "s" << std::endl;
return 0;
}
#endif
#endif // BENCHPRESS_HPP