Merge pull request #224 from zabereer/donotoptimize_with_test_merged
Force DoNotOptimize operand to memory for both gcc and clang
diff --git a/AUTHORS b/AUTHORS
index 7ddffd8..0f93e01 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -8,6 +8,7 @@
#
# Please keep the list sorted.
+Albert Pretorius <pretoalb@gmail.com>
Arne Beer <arne@twobeer.de>
Christopher Seymour <chris.j.seymour@hotmail.com>
David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index af40292..4bff126 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -22,6 +22,7 @@
#
# Please keep the list sorted.
+Albert Pretorius <pretoalb@gmail.com>
Arne Beer <arne@twobeer.de>
Billy Robert O'Neal III <billy.oneal@gmail.com> <bion@microsoft.com>
Chris Kennelly <ckennelly@google.com> <ckennelly@ckennelly.com>
diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h
index 9bf38a8..4167553 100644
--- a/include/benchmark/benchmark_api.h
+++ b/include/benchmark/benchmark_api.h
@@ -210,20 +210,18 @@
// expression from being optimized away by the compiler. This function is
// intented to add little to no overhead.
// See: http://stackoverflow.com/questions/28287064
-#if defined(__clang__) && defined(__GNUC__)
+#if defined(__GNUC__)
// TODO(ericwf): Clang has a bug where it tries to always use a register
// even if value must be stored in memory. This causes codegen to fail.
// To work around this we remove the "r" modifier so the operand is always
// loaded into memory.
+// GCC also has a bug where it complains about inconsistent operand constraints
+// when "+rm" is used for a type larger than can fit in a register or two.
+// For now force the operand to memory for both GCC and Clang.
template <class Tp>
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
asm volatile("" : "+m" (const_cast<Tp&>(value)));
}
-#elif defined(__GNUC__)
-template <class Tp>
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
- asm volatile("" : "+rm" (const_cast<Tp&>(value)));
-}
#else
template <class Tp>
inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7d75c11..247c630 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -39,6 +39,9 @@
compile_benchmark_test(skip_with_error_test)
add_test(skip_with_error_test skip_with_error_test --benchmark_min_time=0.01)
+compile_benchmark_test(donotoptimize_test)
+add_test(donotoptimize_test donotoptimize_test --benchmark_min_time=0.01)
+
compile_benchmark_test(fixture_test)
add_test(fixture_test fixture_test --benchmark_min_time=0.01)
diff --git a/test/donotoptimize_test.cc b/test/donotoptimize_test.cc
new file mode 100644
index 0000000..e4453fb
--- /dev/null
+++ b/test/donotoptimize_test.cc
@@ -0,0 +1,36 @@
+#include "benchmark/benchmark.h"
+
+#include <cstdint>
+
+namespace {
+#if defined(__GNUC__)
+ std::uint64_t double_up(const std::uint64_t x) __attribute__ ((const));
+#endif
+ std::uint64_t double_up(const std::uint64_t x) {
+ return x * 2;
+ }
+}
+
+int main(int, char*[]) {
+
+ // this test verifies compilation of DoNotOptimize() for some types
+
+ char buffer8[8];
+ benchmark::DoNotOptimize(buffer8);
+
+ char buffer20[20];
+ benchmark::DoNotOptimize(buffer20);
+
+ char buffer1024[1024];
+ benchmark::DoNotOptimize(buffer1024);
+ benchmark::DoNotOptimize(&buffer1024[0]);
+
+ int x = 123;
+ benchmark::DoNotOptimize(x);
+ benchmark::DoNotOptimize(&x);
+ benchmark::DoNotOptimize(x += 42);
+
+ benchmark::DoNotOptimize(double_up(x));
+
+ return 0;
+}