Merge pull request #224 from zabereer/donotoptimize_with_test_merged

Force DoNotOptimize operand to memory for both gcc and clang
diff --git a/AUTHORS b/AUTHORS
index 7ddffd8..0f93e01 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -8,6 +8,7 @@
 #
 # Please keep the list sorted.
 
+Albert Pretorius <pretoalb@gmail.com>
 Arne Beer <arne@twobeer.de>
 Christopher Seymour <chris.j.seymour@hotmail.com>
 David Coeurjolly <david.coeurjolly@liris.cnrs.fr>
diff --git a/CONTRIBUTORS b/CONTRIBUTORS
index af40292..4bff126 100644
--- a/CONTRIBUTORS
+++ b/CONTRIBUTORS
@@ -22,6 +22,7 @@
 #
 # Please keep the list sorted.
 
+Albert Pretorius <pretoalb@gmail.com>
 Arne Beer <arne@twobeer.de>
 Billy Robert O'Neal III <billy.oneal@gmail.com> <bion@microsoft.com>
 Chris Kennelly <ckennelly@google.com> <ckennelly@ckennelly.com>
diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h
index 9bf38a8..4167553 100644
--- a/include/benchmark/benchmark_api.h
+++ b/include/benchmark/benchmark_api.h
@@ -210,20 +210,18 @@
 // expression from being optimized away by the compiler. This function is
 // intented to add little to no overhead.
 // See: http://stackoverflow.com/questions/28287064
-#if defined(__clang__) && defined(__GNUC__)
+#if defined(__GNUC__)
 // TODO(ericwf): Clang has a bug where it tries to always use a register
 // even if value must be stored in memory. This causes codegen to fail.
 // To work around this we remove the "r" modifier so the operand is always
 // loaded into memory.
+// GCC also has a bug where it complains about inconsistent operand constraints
+// when "+rm" is used for a type larger than can fit in a register or two.
+// For now force the operand to memory for both GCC and Clang.
 template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
     asm volatile("" : "+m" (const_cast<Tp&>(value)));
 }
-#elif defined(__GNUC__)
-template <class Tp>
-inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
-    asm volatile("" : "+rm" (const_cast<Tp&>(value)));
-}
 #else
 template <class Tp>
 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7d75c11..247c630 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -39,6 +39,9 @@
 compile_benchmark_test(skip_with_error_test)
 add_test(skip_with_error_test skip_with_error_test --benchmark_min_time=0.01)
 
+compile_benchmark_test(donotoptimize_test)
+add_test(donotoptimize_test donotoptimize_test --benchmark_min_time=0.01)
+
 compile_benchmark_test(fixture_test)
 add_test(fixture_test fixture_test --benchmark_min_time=0.01)
 
diff --git a/test/donotoptimize_test.cc b/test/donotoptimize_test.cc
new file mode 100644
index 0000000..e4453fb
--- /dev/null
+++ b/test/donotoptimize_test.cc
@@ -0,0 +1,36 @@
+#include "benchmark/benchmark.h"
+
+#include <cstdint>
+
+namespace {
+#if defined(__GNUC__)
+  std::uint64_t double_up(const std::uint64_t x) __attribute__ ((const));
+#endif
+  std::uint64_t double_up(const std::uint64_t x) {
+    return x * 2;
+  }
+}
+
+int main(int, char*[]) {
+
+  // this test verifies compilation of DoNotOptimize() for some types
+
+  char buffer8[8];
+  benchmark::DoNotOptimize(buffer8);
+
+  char buffer20[20];
+  benchmark::DoNotOptimize(buffer20);
+
+  char buffer1024[1024];
+  benchmark::DoNotOptimize(buffer1024);
+  benchmark::DoNotOptimize(&buffer1024[0]);
+
+  int x = 123;
+  benchmark::DoNotOptimize(x);
+  benchmark::DoNotOptimize(&x);
+  benchmark::DoNotOptimize(x += 42);
+
+  benchmark::DoNotOptimize(double_up(x));
+
+  return 0;
+}