blob: 5ce7f8a35e41e9266725fabcbb99b78e1049a241 [file] [log] [blame]
#include <memory>
#include <random>
#include <stdint.h>
#include "benchmark/benchmark.h"
static std::mt19937 rng;
// Initialize array A with random numbers.
template <typename Ty>
static void init_data(const std::unique_ptr<Ty[]> &A, unsigned N) {
std::uniform_int_distribution<uint64_t> distrib(
std::numeric_limits<Ty>::min(), std::numeric_limits<Ty>::max());
for (unsigned i = 0; i < N; i++)
A[i] = static_cast<Ty>(distrib(rng));
}
static void vecWithRuntimeChecks4Pointers(uint32_t *A, uint32_t *B, uint32_t *C,
uint32_t *D, unsigned TC,
unsigned Step) {
// Prevent the unroller from interfering.
#pragma clang loop unroll(disable)
for (unsigned I = 0; I < 1000; I++) {
// Make sure the inner loop cannot be optimized out.
benchmark::ClobberMemory();
#pragma clang loop interleave_count(1)
#pragma clang loop unroll(disable)
for (unsigned J = 0; J < TC; ++J) {
A[J] = B[J] + C[J] + D[J];
}
// Make sure the runtime checks cannot be hoisted out of the outer loop.
A += Step;
B++;
C++;
D++;
}
}
/// Helper to block optimizing \p F based on its arguments.
template <typename F, typename... Args>
__attribute__((optnone)) static void callThroughOptnone(F &&f, Args &&...args) {
f(std::forward<Args>(args)...);
}
// Benchmark for when runtime checks are passing.
void benchVecWithRuntimeChecks4PointersAllDisjointIncreasing(
benchmark::State &state) {
unsigned TC = state.range(0);
unsigned Size = 4 * TC + 1000;
std::unique_ptr<uint32_t[]> A(new uint32_t[Size]);
init_data(A, Size);
for (auto _ : state) {
callThroughOptnone(vecWithRuntimeChecks4Pointers, &A[0], &A[TC], &A[2 * TC],
&A[3 * TC], TC, 1);
benchmark::DoNotOptimize(A);
benchmark::ClobberMemory();
}
}
BENCHMARK(benchVecWithRuntimeChecks4PointersAllDisjointIncreasing)
->Arg(32)
->Arg(1000);
void benchVecWithRuntimeChecks4PointersAllDisjointDecreasing(
benchmark::State &state) {
unsigned TC = state.range(0);
unsigned Size = 4 * TC + 1000;
std::unique_ptr<uint32_t[]> A(new uint32_t[Size]);
init_data(A, Size);
for (auto _ : state) {
callThroughOptnone(vecWithRuntimeChecks4Pointers, &A[3 * TC], &A[2 * TC],
&A[1 * TC], &A[0], TC, 1);
benchmark::DoNotOptimize(A);
benchmark::ClobberMemory();
}
}
BENCHMARK(benchVecWithRuntimeChecks4PointersAllDisjointDecreasing)
->Arg(32)
->Arg(1000);
void benchVecWithRuntimeChecks4PointersDEqualsA(benchmark::State &state) {
unsigned TC = state.range(0);
unsigned Size = 4 * TC + 1000;
std::unique_ptr<uint32_t[]> A(new uint32_t[Size]);
init_data(A, Size);
for (auto _ : state) {
callThroughOptnone(vecWithRuntimeChecks4Pointers, &A[0], &A[TC], &A[2 * TC],
&A[0], TC, 1);
benchmark::DoNotOptimize(A);
benchmark::ClobberMemory();
}
}
BENCHMARK(benchVecWithRuntimeChecks4PointersDEqualsA)->Arg(32)->Arg(1000);
// Benchmark for when runtime checks are failing.
void benchVecWithRuntimeChecks4PointersDBeforeA(benchmark::State &state) {
unsigned TC = state.range(0);
unsigned Size = 4 * TC + 1000;
std::unique_ptr<uint32_t[]> A(new uint32_t[Size]);
init_data(A, Size);
for (auto _ : state) {
callThroughOptnone(vecWithRuntimeChecks4Pointers, &A[2], &A[2 * TC],
&A[3 * TC], &A[0], TC, 1);
benchmark::DoNotOptimize(A);
benchmark::ClobberMemory();
}
}
BENCHMARK(benchVecWithRuntimeChecks4PointersDBeforeA)->Arg(32)->Arg(1000);
void benchVecWithRuntimeChecks4PointersDAfterA(benchmark::State &state) {
unsigned TC = state.range(0);
unsigned Size = 4 * TC + 1000;
std::unique_ptr<uint32_t[]> A(new uint32_t[Size]);
init_data(A, Size);
for (auto _ : state) {
callThroughOptnone(vecWithRuntimeChecks4Pointers, &A[0], &A[2 * TC],
&A[3 * TC], &A[2], TC, 1);
benchmark::DoNotOptimize(A);
benchmark::ClobberMemory();
}
}
BENCHMARK(benchVecWithRuntimeChecks4PointersDAfterA)->Arg(32)->Arg(1000);