| /* |
| * Copyright (C) 2015 Christopher Gilbert. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to deal |
| * in the Software without restriction, including without limitation the rights |
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| * copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in all |
| * copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| #ifndef BENCHPRESS_HPP |
| #define BENCHPRESS_HPP |
| |
| #include <algorithm> // max, min |
| #include <atomic> // atomic_intmax_t |
| #include <chrono> // high_resolution_timer, duration |
| #include <functional> // function |
| #include <iomanip> // setw |
| #include <iostream> // cout |
| #include <regex> // regex, regex_match |
| #include <sstream> // stringstream |
| #include <string> // string |
| #include <thread> // thread |
| #include <vector> // vector |
| |
| namespace benchpress { |
| |
| /* |
| * The options class encapsulates all options for running benchmarks. |
| * |
| * When including benchpress, a main function can be emitted which includes a command-line parser for building an |
| * options object. However from time-to-time it may be necessary for the developer to have to build their own main |
| * stub and construct the options object manually. |
| * |
| * options opts; |
| * opts |
| * .bench(".*") |
| * .benchtime(1) |
| * .cpu(4); |
| */ |
| class options { |
| std::string d_bench; |
| size_t d_benchtime; |
| size_t d_cpu; |
| public: |
| options() |
| : d_bench(".*") |
| , d_benchtime(1) |
| , d_cpu(std::thread::hardware_concurrency()) |
| {} |
| options& bench(const std::string& bench) { |
| d_bench = bench; |
| return *this; |
| } |
| options& benchtime(size_t benchtime) { |
| d_benchtime = benchtime; |
| return *this; |
| } |
| options& cpu(size_t cpu) { |
| d_cpu = cpu; |
| return *this; |
| } |
| std::string get_bench() const { |
| return d_bench; |
| } |
| size_t get_benchtime() const { |
| return d_benchtime; |
| } |
| size_t get_cpu() const { |
| return d_cpu; |
| } |
| }; |
| |
| class context; |
| |
| /* |
| * The benchmark_info class is used to store a function name / pointer pair. |
| * |
| * benchmark_info bi("example", [](benchpress::context* b) { |
| * // benchmark function |
| * }); |
| */ |
| class benchmark_info { |
| std::string d_name; |
| std::function<void(context*)> d_func; |
| |
| public: |
| benchmark_info(std::string name, std::function<void(context*)> func) |
| : d_name(name) |
| , d_func(func) |
| {} |
| |
| std::string get_name() const { return d_name; } |
| std::function<void(context*)> get_func() const { return d_func; } |
| }; |
| |
| /* |
| * The registration class is responsible for providing a single global point of reference for registering |
| * benchmark functions. |
| * |
| * registration::get_ptr()->register_benchmark(info); |
| */ |
| class registration { |
| static registration* d_this; |
| std::vector<benchmark_info> d_benchmarks; |
| |
| public: |
| static registration* get_ptr() { |
| if (nullptr == d_this) { |
| d_this = new registration(); |
| } |
| return d_this; |
| } |
| |
| void register_benchmark(benchmark_info& info) { |
| d_benchmarks.push_back(info); |
| } |
| |
| std::vector<benchmark_info> get_benchmarks() { return d_benchmarks; } |
| }; |
| |
| /* |
| * The auto_register class is a helper used to register benchmarks. |
| */ |
| class auto_register { |
| public: |
| auto_register(const std::string& name, std::function<void(context*)> func) { |
| benchmark_info info(name, func); |
| registration::get_ptr()->register_benchmark(info); |
| } |
| }; |
| |
| #define CONCAT(x, y) x ## y |
| #define CONCAT2(x, y) CONCAT(x, y) |
| |
| // The BENCHMARK macro is a helper for creating benchmark functions and automatically registering them with the |
| // registration class. |
| #define BENCHMARK(x, f) benchpress::auto_register CONCAT2(register_, __LINE__)((x), (f)); |
| |
| // This macro will prevent the compiler from removing a redundant code path which has no side-effects. |
| #define DISABLE_REDUNDANT_CODE_OPT() { asm(""); } |
| |
| /* |
| * The result class is responsible for producing a printable string representation of a benchmark run. |
| */ |
| class result { |
| size_t d_num_iterations; |
| std::chrono::nanoseconds d_duration; |
| size_t d_num_bytes; |
| |
| public: |
| result(size_t num_iterations, std::chrono::nanoseconds duration, size_t num_bytes) |
| : d_num_iterations(num_iterations) |
| , d_duration(duration) |
| , d_num_bytes(num_bytes) |
| {} |
| |
| size_t get_ns_per_op() const { |
| if (d_num_iterations <= 0) { |
| return 0; |
| } |
| return d_duration.count() / d_num_iterations; |
| } |
| |
| double get_mb_per_s() const { |
| if (d_num_iterations <= 0 || d_duration.count() <= 0 || d_num_bytes <= 0) { |
| return 0; |
| } |
| return ((double(d_num_bytes) * double(d_num_iterations) / double(1e6)) / |
| double(std::chrono::duration_cast<std::chrono::seconds>(d_duration).count())); |
| } |
| |
| std::string to_string() const { |
| std::stringstream tmp; |
| tmp << std::setw(12) << std::right << d_num_iterations; |
| size_t npo = get_ns_per_op(); |
| tmp << std::setw(12) << std::right << npo << std::setw(0) << " ns/op"; |
| double mbs = get_mb_per_s(); |
| if (mbs > 0.0) { |
| tmp << std::setw(12) << std::right << mbs << std::setw(0) << " MB/s"; |
| } |
| return std::string(tmp.str()); |
| } |
| }; |
| |
| /* |
| * The parallel_context class is responsible for providing a thread-safe context for parallel benchmark code. |
| */ |
| class parallel_context { |
| std::atomic_intmax_t d_num_iterations; |
| public: |
| parallel_context(size_t num_iterations) |
| : d_num_iterations(num_iterations) |
| {} |
| |
| bool next() { |
| return (d_num_iterations.fetch_sub(1) > 0); |
| } |
| }; |
| |
| /* |
| * The context class is responsible for providing an interface for capturing benchmark metrics to benchmark functions. |
| */ |
| class context { |
| bool d_timer_on; |
| std::chrono::high_resolution_clock::time_point d_start; |
| std::chrono::nanoseconds d_duration; |
| std::chrono::seconds d_benchtime; |
| size_t d_num_iterations; |
| size_t d_num_threads; |
| size_t d_num_bytes; |
| benchmark_info d_benchmark; |
| |
| public: |
| context(const benchmark_info& info, const options& opts) |
| : d_timer_on(false) |
| , d_start() |
| , d_duration() |
| , d_benchtime(std::chrono::seconds(opts.get_benchtime())) |
| , d_num_iterations(1) |
| , d_num_threads(opts.get_cpu()) |
| , d_num_bytes(0) |
| , d_benchmark(info) |
| {} |
| |
| size_t num_iterations() const { return d_num_iterations; } |
| |
| void set_num_threads(size_t n) { d_num_threads = n; } |
| size_t num_threads() const { return d_num_threads; } |
| |
| void start_timer() { |
| if (!d_timer_on) { |
| d_start = std::chrono::high_resolution_clock::now(); |
| d_timer_on = true; |
| } |
| } |
| void stop_timer() { |
| if (d_timer_on) { |
| d_duration += std::chrono::high_resolution_clock::now() - d_start; |
| d_timer_on = false; |
| } |
| } |
| void reset_timer() { |
| if (d_timer_on) { |
| d_start = std::chrono::high_resolution_clock::now(); |
| } |
| d_duration = std::chrono::nanoseconds::zero(); |
| } |
| |
| void set_bytes(int64_t bytes) { d_num_bytes = bytes; } |
| |
| size_t get_ns_per_op() { |
| if (d_num_iterations <= 0) { |
| return 0; |
| } |
| return d_duration.count() / d_num_iterations; |
| } |
| |
| void run_n(size_t n) { |
| d_num_iterations = n; |
| reset_timer(); |
| start_timer(); |
| d_benchmark.get_func()(this); |
| stop_timer(); |
| } |
| |
| void run_parallel(std::function<void(parallel_context*)> f) { |
| parallel_context pc(d_num_iterations); |
| std::vector<std::thread> threads; |
| for (size_t i = 0; i < d_num_threads; ++i) { |
| threads.push_back(std::thread([&pc,&f]() -> void { |
| f(&pc); |
| })); |
| } |
| for(auto& thread : threads){ |
| thread.join(); |
| } |
| } |
| |
| result run() { |
| size_t n = 1; |
| run_n(n); |
| while (d_duration < d_benchtime && n < 1e9) { |
| size_t last = n; |
| if (get_ns_per_op() == 0) { |
| n = 1e9; |
| } else { |
| n = d_duration.count() / get_ns_per_op(); |
| } |
| n = std::max(std::min(n+n/2, 100*last), last+1); |
| n = round_up(n); |
| run_n(n); |
| } |
| return result(n, d_duration, d_num_bytes); |
| } |
| |
| private: |
| template<typename T> |
| T round_down_10(T n) { |
| int tens = 0; |
| while (n > 10) { |
| n /= 10; |
| tens++; |
| } |
| int result = 1; |
| for (int i = 0; i < tens; ++i) { |
| result *= 10; |
| } |
| return result; |
| } |
| |
| template<typename T> |
| T round_up(T n) { |
| T base = round_down_10(n); |
| if (n < (2 * base)) { |
| return 2 * base; |
| } |
| if (n < (5 * base)) { |
| return 5 * base; |
| } |
| return 10 * base; |
| } |
| }; |
| |
| /* |
| * The run_benchmarks function will run the registered benchmarks. |
| */ |
| void run_benchmarks(const options& opts) { |
| std::regex match_r(opts.get_bench()); |
| auto benchmarks = registration::get_ptr()->get_benchmarks(); |
| for (auto& info : benchmarks) { |
| if (std::regex_match(info.get_name(), match_r)) { |
| context c(info, opts); |
| auto r = c.run(); |
| std::cout << std::setw(35) << std::left << info.get_name() << r.to_string() << std::endl; |
| } |
| } |
| } |
| |
| } // namespace benchpress |
| |
| /* |
| * If BENCHPRESS_CONFIG_MAIN is defined when the file is included then a main function will be emitted which provides a |
| * command-line parser and then executes run_benchmarks. |
| */ |
| #ifdef BENCHPRESS_CONFIG_MAIN |
| #include "cxxopts.hpp" |
| benchpress::registration* benchpress::registration::d_this; |
| int main(int argc, char** argv) { |
| std::chrono::high_resolution_clock::time_point bp_start = std::chrono::high_resolution_clock::now(); |
| benchpress::options bench_opts; |
| try { |
| cxxopts::Options cmd_opts(argv[0], " - command line options"); |
| cmd_opts.add_options() |
| ("bench", "run benchmarks matching the regular expression", cxxopts::value<std::string>() |
| ->default_value(".*")) |
| ("benchtime", "run enough iterations of each benchmark to take t seconds", cxxopts::value<size_t>() |
| ->default_value("1")) |
| ("cpu", "specify the number of threads to use for parallel benchmarks", cxxopts::value<size_t>() |
| ->default_value(std::to_string(std::thread::hardware_concurrency()))) |
| ("help", "print help") |
| ; |
| cmd_opts.parse(argc, argv); |
| if (cmd_opts.count("help")) { |
| std::cout << cmd_opts.help({""}) << std::endl; |
| exit(0); |
| } |
| if (cmd_opts.count("bench")) { |
| bench_opts.bench(cmd_opts["bench"].as<std::string>()); |
| } |
| if (cmd_opts.count("benchtime")) { |
| bench_opts.benchtime(cmd_opts["benchtime"].as<size_t>()); |
| } |
| if (cmd_opts.count("cpu")) { |
| bench_opts.cpu(cmd_opts["cpu"].as<size_t>()); |
| } |
| } catch (const cxxopts::OptionException& e) { |
| std::cout << "error parsing options: " << e.what() << std::endl; |
| exit(1); |
| } |
| benchpress::run_benchmarks(bench_opts); |
| float duration = std::chrono::duration_cast<std::chrono::milliseconds>( |
| std::chrono::high_resolution_clock::now() - bp_start |
| ).count() / 1000.f; |
| std::cout << argv[0] << " " << duration << "s" << std::endl; |
| return 0; |
| } |
| #endif |
| |
| #endif // BENCHPRESS_HPP |