Add SkipWithError(...) function.
diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h
index 2ded481..654986d 100644
--- a/include/benchmark/benchmark_api.h
+++ b/include/benchmark/benchmark_api.h
@@ -235,22 +235,27 @@
 // benchmark to use.
 class State {
 public:
-  State(size_t max_iters, bool has_x, int x, bool has_y, int y, int thread_i, int n_threads);
+  State(size_t max_iters, bool has_x, int x, bool has_y, int y,
+        int thread_i, int n_threads);
 
   // Returns true iff the benchmark should continue through another iteration.
   // NOTE: A benchmark may not return from the test until KeepRunning() has
   // returned false.
   bool KeepRunning() {
     if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
-        ResumeTiming();
+        assert(!finished_);
         started_ = true;
+        ResumeTiming();
     }
     bool const res = total_iterations_++ < max_iterations;
     if (BENCHMARK_BUILTIN_EXPECT(!res, false)) {
-        assert(started_);
-        PauseTiming();
+        assert(started_ && (!finished_ || error_occurred_));
+        if (!error_occurred_) {
+            PauseTiming();
+        }
         // Total iterations now is one greater than max iterations. Fix this.
         total_iterations_ = max_iterations;
+        finished_ = true;
     }
     return res;
   }
@@ -283,6 +288,8 @@
   // within each benchmark iteration, if possible.
   void ResumeTiming();
 
+  void SkipWithError(const char* msg);
+
   // REQUIRES: called exactly once per iteration of the KeepRunning loop.
   // Set the manually measured time for this benchmark iteration, which
   // is used instead of automatically measured time if UseManualTime() was
@@ -371,6 +378,7 @@
 
 private:
   bool started_;
+  bool finished_;
   size_t total_iterations_;
 
   bool has_range_x_;
@@ -383,6 +391,9 @@
   size_t items_processed_;
 
 public:
+  // FIXME: Make this private somehow.
+  bool error_occurred_;
+public:
   // Index of the executing thread. Values from [0, threads).
   const int thread_index;
   // Number of threads concurrently executing the benchmark.
diff --git a/include/benchmark/reporter.h b/include/benchmark/reporter.h
index aaf5fbf..ac222e4 100644
--- a/include/benchmark/reporter.h
+++ b/include/benchmark/reporter.h
@@ -42,6 +42,7 @@
 
   struct Run {
     Run() :
+      error_occurred(false),
       iterations(1),
       time_unit(kNanosecond),
       real_accumulated_time(0),
@@ -52,6 +53,9 @@
 
     std::string benchmark_name;
     std::string report_label;  // Empty if not set by benchmark.
+    bool error_occurred;
+    std::string error_message;
+
     int64_t iterations;
     TimeUnit time_unit;
     double real_accumulated_time;
diff --git a/src/benchmark.cc b/src/benchmark.cc
index 3d12d28..5d8f279 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -112,6 +112,10 @@
     return &label;
 }
 
+// Global variable so that a benchmark can report an error as a human readable
+// string. If error_message is null no error occurred.
+static std::atomic<const char*> error_message = ATOMIC_VAR_INIT(nullptr);
+
 // TODO(ericwf): support MallocCounter.
 //static benchmark::MallocCounter *benchmark_mc;
 
@@ -126,6 +130,7 @@
  public:
   TimerManager(int num_threads, Notification* done)
       : num_threads_(num_threads),
+        running_threads_(num_threads),
         done_(done),
         running_(false),
         real_time_used_(0),
@@ -133,7 +138,8 @@
         manual_time_used_(0),
         num_finalized_(0),
         phase_number_(0),
-        entered_(0) {
+        entered_(0)
+  {
   }
 
   // Called by each thread
@@ -196,6 +202,16 @@
     }
   }
 
+  void RemoveErroredThread() EXCLUDES(lock_) {
+    MutexLock ml(lock_);
+    int last_thread = --running_threads_ == 0;
+    if (last_thread && running_) {
+        InternalStop();
+    } else if (!last_thread){
+      phase_condition_.notify_all();
+    }
+  }
+
   // REQUIRES: timer is not running
   double real_time_used() EXCLUDES(lock_) {
     MutexLock l(lock_);
@@ -221,6 +237,7 @@
   Mutex lock_;
   Condition phase_condition_;
   int num_threads_;
+  int running_threads_;
   Notification* done_;
 
   bool running_;                // Is the timer running
@@ -252,22 +269,27 @@
   // entered the barrier.  Returns iff this is the last thread to
   // enter the barrier.
   bool Barrier(MutexLock& ml) REQUIRES(lock_) {
-    CHECK_LT(entered_, num_threads_);
+    CHECK_LT(entered_, running_threads_);
     entered_++;
-    if (entered_ < num_threads_) {
+    if (entered_ < running_threads_) {
       // Wait for all threads to enter
       int phase_number_cp = phase_number_;
       auto cb = [this, phase_number_cp]() {
-        return this->phase_number_ > phase_number_cp;
+        return this->phase_number_ > phase_number_cp ||
+               entered_ == running_threads_; // A thread has aborted in error
       };
-      phase_condition_.wait(ml.native_handle(), cb);
-      return false;  // I was not the last one
-    } else {
-      // Last thread has reached the barrier
-      phase_number_++;
-      entered_ = 0;
-      return true;
+      while (true) {
+        phase_condition_.wait(ml.native_handle(), cb);
+        if (phase_number_ > phase_number_cp)
+          return false;
+        else if (running_threads_ == entered_)
+          break;
+      }
     }
+    // Last thread has reached the barrier
+    phase_number_++;
+    entered_ = 0;
+    return true;
   }
 };
 
@@ -702,6 +724,7 @@
         MutexLock l(GetBenchmarkLock());
         GetReportLabel()->clear();
       }
+      error_message = nullptr;
 
       Notification done;
       timer_manager = std::unique_ptr<TimerManager>(new TimerManager(b.threads, &done));
@@ -747,40 +770,48 @@
         MutexLock l(GetBenchmarkLock());
         label = *GetReportLabel();
       }
+      const char* error_msg = error_message;
 
       const double min_time = !IsZero(b.min_time) ? b.min_time
                                                   : FLAGS_benchmark_min_time;
 
       // If this was the first run, was elapsed time or cpu time large enough?
       // If this is not the first run, go with the current value of iter.
-      if ((i > 0) ||
+      if ((i > 0) || (error_msg != nullptr) ||
           (iters >= kMaxIterations) ||
           (seconds >= min_time) ||
           (real_accumulated_time >= 5*min_time)) {
-        double bytes_per_second = 0;
-        if (total.bytes_processed > 0 && seconds > 0.0) {
-          bytes_per_second = (total.bytes_processed / seconds);
-        }
-        double items_per_second = 0;
-        if (total.items_processed > 0 && seconds > 0.0) {
-          items_per_second = (total.items_processed / seconds);
-        }
 
         // Create report about this benchmark run.
         BenchmarkReporter::Run report;
         report.benchmark_name = b.name;
+        report.error_occurred = error_msg != nullptr;
+        report.error_message = error_msg != nullptr ? error_msg : "";
         report.report_label = label;
         // Report the total iterations across all threads.
         report.iterations = static_cast<int64_t>(iters) * b.threads;
         report.time_unit = b.time_unit;
-        if (b.use_manual_time) {
-          report.real_accumulated_time = manual_accumulated_time;
-        } else {
-          report.real_accumulated_time = real_accumulated_time;
+
+        if (!report.error_occurred) {
+
+          double bytes_per_second = 0;
+          if (total.bytes_processed > 0 && seconds > 0.0) {
+            bytes_per_second = (total.bytes_processed / seconds);
+          }
+          double items_per_second = 0;
+          if (total.items_processed > 0 && seconds > 0.0) {
+            items_per_second = (total.items_processed / seconds);
+          }
+
+          if (b.use_manual_time) {
+            report.real_accumulated_time = manual_accumulated_time;
+          } else {
+            report.real_accumulated_time = real_accumulated_time;
+          }
+          report.cpu_accumulated_time = cpu_accumulated_time;
+          report.bytes_per_second = bytes_per_second;
+          report.items_per_second = items_per_second;
         }
-        report.cpu_accumulated_time = cpu_accumulated_time;
-        report.bytes_per_second = bytes_per_second;
-        report.items_per_second = items_per_second;
         reports.push_back(report);
         break;
       }
@@ -815,10 +846,11 @@
 
 State::State(size_t max_iters, bool has_x, int x, bool has_y, int y,
              int thread_i, int n_threads)
-    : started_(false), total_iterations_(0),
+    : started_(false), finished_(false), total_iterations_(0),
       has_range_x_(has_x), range_x_(x),
       has_range_y_(has_y), range_y_(y),
       bytes_processed_(0), items_processed_(0),
+      error_occurred_(false),
       thread_index(thread_i),
       threads(n_threads),
       max_iterations(max_iters)
@@ -830,14 +862,26 @@
 void State::PauseTiming() {
   // Add in time accumulated so far
   CHECK(running_benchmark);
+  CHECK(started_ && !finished_ && !error_occurred_);
   timer_manager->StopTimer();
 }
 
 void State::ResumeTiming() {
   CHECK(running_benchmark);
+  CHECK(started_ && !finished_);
   timer_manager->StartTimer();
 }
 
+void State::SkipWithError(const char* msg) {
+  CHECK(msg);
+  error_occurred_ = true;
+  const char* expected_no_error_msg = nullptr;
+  error_message.compare_exchange_weak(expected_no_error_msg, msg);
+  started_ = finished_ = true;
+  total_iterations_ = max_iterations;
+  timer_manager->RemoveErroredThread();
+}
+
 void State::SetIterationTime(double seconds)
 {
   CHECK(running_benchmark);
diff --git a/src/check.h b/src/check.h
index d2c1fda..cb49c49 100644
--- a/src/check.h
+++ b/src/check.h
@@ -10,6 +10,18 @@
 namespace benchmark {
 namespace internal {
 
+typedef void(AbortHandlerT)();
+
+inline AbortHandlerT*& GetAbortHandler() {
+    static AbortHandlerT* handler = &std::abort;
+    return handler;
+}
+
+BENCHMARK_NORETURN inline void CallAbortHandler() {
+    GetAbortHandler()();
+    std::abort(); // fallback to enforce noreturn
+}
+
 // CheckHandler is the class constructed by failing CHECK macros. CheckHandler
 // will log information about the failures and abort when it is destructed.
 class CheckHandler {
@@ -25,13 +37,13 @@
     return log_;
   }
 
-  BENCHMARK_NORETURN ~CheckHandler() {
+  BENCHMARK_NORETURN ~CheckHandler() noexcept(false) {
       log_ << std::endl;
-      std::abort();
+      CallAbortHandler();
   }
 
-  CheckHandler & operator=(const CheckHandler&) = delete;

-  CheckHandler(const CheckHandler&) = delete;

+  CheckHandler & operator=(const CheckHandler&) = delete;
+  CheckHandler(const CheckHandler&) = delete;
   CheckHandler() = delete;
 private:
   std::ostream& log_;
diff --git a/src/console_reporter.cc b/src/console_reporter.cc
index 56bd3ce..786d030 100644
--- a/src/console_reporter.cc
+++ b/src/console_reporter.cc
@@ -16,6 +16,7 @@
 
 #include <cstdint>
 #include <cstdio>
+#include <algorithm>
 #include <iostream>
 #include <string>
 #include <tuple>
@@ -65,7 +66,11 @@
     PrintRunData(run);
   }
 
-  if (reports.size() < 2) {
+  auto error_count = std::count_if(
+      reports.begin(), reports.end(),
+      [](Run const& run) {return run.error_occurred;});
+
+  if (reports.size() - error_count < 2) {
     // We don't report aggregated data if there was a single run.
     return;
   }
@@ -80,6 +85,14 @@
 }
 
 void ConsoleReporter::PrintRunData(const Run& result) {
+  ColorPrintf(COLOR_GREEN, "%-*s ",
+              name_field_width_, result.benchmark_name.c_str());
+
+  if (result.error_occurred) {
+    ColorPrintf(COLOR_RED, "ERROR OCCURRED: \'%s\'", result.error_message.c_str());
+    ColorPrintf(COLOR_DEFAULT, "\n");
+    return;
+  }
   // Format bytes per second
   std::string rate;
   if (result.bytes_per_second > 0) {
@@ -97,9 +110,6 @@
   const char* timeLabel;
   std::tie(timeLabel, multiplier) = GetTimeUnitAndMultiplier(result.time_unit);
 
-  ColorPrintf(COLOR_GREEN, "%-*s ",
-              name_field_width_, result.benchmark_name.c_str());
-
   if (result.iterations == 0) {
     ColorPrintf(COLOR_YELLOW, "%10.0f %s %10.0f %s ",
                 result.real_accumulated_time * multiplier,
diff --git a/src/json_reporter.cc b/src/json_reporter.cc
index 7ed141f..84e2e28 100644
--- a/src/json_reporter.cc
+++ b/src/json_reporter.cc
@@ -137,6 +137,14 @@
     out << indent
         << FormatKV("name", run.benchmark_name)
         << ",\n";
+    if (run.error_occurred) {
+        out << indent
+            << FormatKV("error_occurred", run.error_occurred)
+            << ",\n";
+        out << indent
+            << FormatKV("error_message", run.error_message)
+            << ",\n";
+    }
     out << indent
         << FormatKV("iterations", run.iterations)
         << ",\n";
diff --git a/src/reporter.cc b/src/reporter.cc
index 036546e..a23c756 100644
--- a/src/reporter.cc
+++ b/src/reporter.cc
@@ -39,6 +39,8 @@
   for (Run const& run : reports) {
     CHECK_EQ(reports[0].benchmark_name, run.benchmark_name);
     CHECK_EQ(run_iterations, run.iterations);
+    if (run.error_occurred)
+      continue;
     real_accumulated_time_stat +=
         Stat1_d(run.real_accumulated_time/run.iterations, run.iterations);
     cpu_accumulated_time_stat +=
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index a10a53a..efbe7bb 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -36,6 +36,12 @@
 compile_benchmark_test(basic_test)
 add_test(basic_benchmark basic_test --benchmark_min_time=0.01)
 
+compile_benchmark_test(diagnostics_test)
+add_test(diagnostics_test diagnostics_test --benchmark_min_time=0.01)
+
+compile_benchmark_test(skip_with_error_test)
+add_test(skip_with_error_test skip_with_error_test --benchmark_min_time=0.01)
+
 compile_benchmark_test(fixture_test)
 add_test(fixture_test fixture_test --benchmark_min_time=0.01)
 
diff --git a/test/diagnostics_test.cc b/test/diagnostics_test.cc
new file mode 100644
index 0000000..60fa3b1
--- /dev/null
+++ b/test/diagnostics_test.cc
@@ -0,0 +1,61 @@
+// Testing:
+//   State::PauseTiming()
+//   State::ResumeTiming()
+// Test that CHECK's within these function diagnose when they are called
+// outside of the KeepRunning() loop.
+//
+// NOTE: Users should NOT include or use src/check.h. This is only done in
+// order to test library internals.
+
+#include "benchmark/benchmark_api.h"
+#include "../src/check.h"
+#include <stdexcept>
+#include <cstdlib>
+
+#if defined(__GNUC__) && !defined(__EXCEPTIONS)
+#define TEST_HAS_NO_EXCEPTIONS
+#endif
+
+void TestHandler() {
+#ifndef TEST_HAS_NO_EXCEPTIONS
+  throw std::logic_error("");
+#else
+  std::abort();
+#endif
+}
+
+void try_invalid_pause_resume(benchmark::State& state) {
+#if !defined(NDEBUG) && !defined(TEST_HAS_NO_EXCEPTIONS)
+  try {
+    state.PauseTiming();
+    std::abort();
+  } catch (std::logic_error const&) {}
+  try {
+    state.ResumeTiming();
+    std::abort();
+  } catch (std::logic_error const&) {}
+#else
+  (void)state; // avoid unused warning
+#endif
+}
+
+void BM_diagnostic_test(benchmark::State& state) {
+  static bool called_once = false;
+
+  if (called_once == false) try_invalid_pause_resume(state);
+
+  while (state.KeepRunning()) {
+    benchmark::DoNotOptimize(state.iterations());
+  }
+
+  if (called_once == false) try_invalid_pause_resume(state);
+
+  called_once = true;
+}
+BENCHMARK(BM_diagnostic_test);
+
+int main(int argc, char** argv) {
+  benchmark::internal::GetAbortHandler() = &TestHandler;
+  benchmark::Initialize(&argc, argv);
+  benchmark::RunSpecifiedBenchmarks();
+}
diff --git a/test/skip_with_error_test.cc b/test/skip_with_error_test.cc
new file mode 100644
index 0000000..dafbd64
--- /dev/null
+++ b/test/skip_with_error_test.cc
@@ -0,0 +1,161 @@
+
+#undef NDEBUG
+#include "benchmark/benchmark.h"
+#include "../src/check.h" // NOTE: check.h is for internal use only!
+#include <cassert>
+#include <vector>
+
+namespace {
+
+class TestReporter : public benchmark::ConsoleReporter {
+ public:
+  virtual bool ReportContext(const Context& context) {
+    return ConsoleReporter::ReportContext(context);
+  };
+
+  virtual void ReportRuns(const std::vector<Run>& report) {
+    all_runs_.insert(all_runs_.end(), begin(report), end(report));
+    ConsoleReporter::ReportRuns(report);
+  }
+
+  TestReporter()  {}
+  virtual ~TestReporter() {}
+
+  mutable std::vector<Run> all_runs_;
+};
+
+struct TestCase {
+  std::string name;
+  bool error_occurred;
+  std::string error_message;
+
+  typedef benchmark::BenchmarkReporter::Run Run;
+
+  void CheckRun(Run const& run) const {
+    CHECK(name == run.benchmark_name) << "expected " << name << " got " << run.benchmark_name;
+    CHECK(error_occurred == run.error_occurred);
+    CHECK(error_message == run.error_message);
+    if (error_occurred) {
+      //CHECK(run.iterations == 0);
+    } else {
+      CHECK(run.iterations != 0);
+    }
+  }
+};
+
+std::vector<TestCase> ExpectedResults;
+
+int AddCases(const char* base_name, std::initializer_list<TestCase> const& v) {
+  for (auto TC : v) {
+    TC.name = base_name + TC.name;
+    ExpectedResults.push_back(std::move(TC));
+  }
+  return 0;
+}
+
+#define CONCAT(x, y) CONCAT2(x, y)
+#define CONCAT2(x, y) x##y
+#define ADD_CASES(...) \
+int CONCAT(dummy, __LINE__) = AddCases(__VA_ARGS__)
+
+}  // end namespace
+
+
+void BM_error_before_running(benchmark::State& state) {
+  state.SkipWithError("error message");
+  while (state.KeepRunning()) {
+    assert(false);
+  }
+}
+BENCHMARK(BM_error_before_running);
+ADD_CASES("BM_error_before_running",
+          {{"", true, "error message"}});
+
+void BM_error_during_running(benchmark::State& state) {
+  int first_iter = true;
+  while (state.KeepRunning()) {
+    if (state.range_x() == 1 && state.thread_index <= (state.threads / 2)) {
+      assert(first_iter);
+      first_iter = false;
+      state.SkipWithError("error message");
+    } else {
+      state.PauseTiming();
+      state.ResumeTiming();
+    }
+  }
+}
+BENCHMARK(BM_error_during_running)->Arg(1)->Arg(2)->ThreadRange(1, 8);
+ADD_CASES(
+    "BM_error_during_running",
+    {{"/1/threads:1", true, "error message"},
+    {"/1/threads:2", true, "error message"},
+    {"/1/threads:4", true, "error message"},
+    {"/1/threads:8", true, "error message"},
+    {"/2/threads:1", false, ""},
+    {"/2/threads:2", false, ""},
+    {"/2/threads:4", false, ""},
+    {"/2/threads:8", false, ""}}
+);
+
+void BM_error_after_running(benchmark::State& state) {
+  while (state.KeepRunning()) {
+    benchmark::DoNotOptimize(state.iterations());
+  }
+  if (state.thread_index <= (state.threads / 2))
+    state.SkipWithError("error message");
+}
+BENCHMARK(BM_error_after_running)->ThreadRange(1, 8);
+ADD_CASES(
+    "BM_error_after_running",
+    {{"/threads:1", true, "error message"},
+    {"/threads:2", true, "error message"},
+    {"/threads:4", true, "error message"},
+    {"/threads:8", true, "error message"}}
+);
+
+void BM_error_while_paused(benchmark::State& state) {
+  bool first_iter = true;
+  while (state.KeepRunning()) {
+    if (state.range_x() == 1 && state.thread_index <= (state.threads / 2)) {
+      assert(first_iter);
+      first_iter = false;
+      state.PauseTiming();
+      state.SkipWithError("error message");
+    } else {
+      state.PauseTiming();
+      state.ResumeTiming();
+    }
+  }
+}
+BENCHMARK(BM_error_while_paused)->Arg(1)->Arg(2)->ThreadRange(1, 8);
+ADD_CASES(
+    "BM_error_while_paused",
+    {{"/1/threads:1", true, "error message"},
+    {"/1/threads:2", true, "error message"},
+    {"/1/threads:4", true, "error message"},
+    {"/1/threads:8", true, "error message"},
+    {"/2/threads:1", false, ""},
+    {"/2/threads:2", false, ""},
+    {"/2/threads:4", false, ""},
+    {"/2/threads:8", false, ""}}
+);
+
+
+int main(int argc, char* argv[]) {
+  benchmark::Initialize(&argc, argv);
+
+  TestReporter test_reporter;
+  benchmark::RunSpecifiedBenchmarks(&test_reporter);
+
+  typedef benchmark::BenchmarkReporter::Run Run;
+  auto EB = ExpectedResults.begin();
+
+  for (Run const& run : test_reporter.all_runs_) {
+    assert(EB != ExpectedResults.end());
+    EB->CheckRun(run);
+    ++EB;
+  }
+  assert(EB == ExpectedResults.end());
+
+  return 0;
+}