blob: c1489cdb492c96ca55a0bc7f62bd43153b9bf854 [file] [log] [blame]
#include <algorithm>
#include <chrono>
#include <iostream>
#include <functional>
#include <vector>
#include "compiler_specifics.h"
using Clock = std::chrono::high_resolution_clock;
void CompilerOpaqueUse(void* p) {
asm volatile ("" :: "r"(p) : "memory");
}
Clock::duration Runtime(std::function<void()> f) {
auto start = Clock::now();
f();
return Clock::now() - start;
}
int64_t Microseconds(const Clock::duration &d) {
return std::chrono::duration_cast<std::chrono::microseconds>(d).count();
}
SAFESIDE_NEVER_INLINE
void Fill(int* buffer, int size, int val) {
std::fill(buffer, buffer + size, val);
// seems unnecessary
// CompilerOpaqueUse(buffer);
}
Clock::duration TestOne(int* buffer, int size, int val) {
int warmup = 5; // avoid transient effects, improve consistency
int rounds = 1; // amplify result
Clock::duration d;
for (int i = 0; i < warmup + 1; ++i) {
d = Runtime([&]() {
for (int r = 0; r < rounds; ++r) {
// Worth further investigation: Why at -O3 does this work:
Fill(buffer, size, val);
// and show a difference of 10%+ writing 0, but this:
// std::fill(buffer, buffer + size, val);
// CompilerOpaqueUse(buffer);
// takes *longer* and shows no obvious difference writing 0?
// Part of it is probably that CompilerOpaqueUse includes a "memory"
// clobber.
}
});
}
return d;
}
void Test() {
// the behavior should appear for any buffer larger than L2
// see `sudo lshw` or /sys/devices/system/cpu/cpu0/cache/index2/size
int buffer_bytes = 2 * 1024 * 1024;
std::vector<int> buffer(buffer_bytes / sizeof(int));
std::vector<Clock::duration> d0s, d1s, d2s;
int samples = 30; // reduce variation
for (int i = 0; i < samples; ++i) {
d0s.push_back(TestOne(buffer.data(), buffer.size(), 0));
// Use two non-zero values so we can also show the amount of variation/
// jitter between runs that we *always* expect to act the same.
d1s.push_back(TestOne(buffer.data(), buffer.size(), 1));
d2s.push_back(TestOne(buffer.data(), buffer.size(), 2));
}
std::sort(d0s.begin(), d0s.end());
std::sort(d1s.begin(), d1s.end());
std::sort(d2s.begin(), d2s.end());
std::cout << "Fill with 0: " << Microseconds(d0s[samples/2]) << "us"
<< std::endl;
std::cout << "Fill with 1: " << Microseconds(d1s[samples/2]) << "us"
<< std::endl;
std::cout << "Fill with 2: " << Microseconds(d2s[samples/2]) << "us"
<< std::endl;
}
int main(int argc, char* argv[]) {
Test();
return 0;
}