Add an additional parameter for time units
diff --git a/include/benchmark/benchmark_api.h b/include/benchmark/benchmark_api.h
index 7a42025..251fd59 100644
--- a/include/benchmark/benchmark_api.h
+++ b/include/benchmark/benchmark_api.h
@@ -216,6 +216,13 @@
 }
 #endif
 
+// TimeUnit is passed to a benchmark in order to specify the order of magnitude
+// for the measured time.
+enum TimeUnit {
+  kNanosecond,
+  kMicrosecond,
+  kMillisecond
+};
 
 // State is passed to a running Benchmark and contains state for the
 // benchmark to use.
@@ -390,6 +397,9 @@
   // REQUIRES: The function passed to the constructor must accept an arg1.
   Benchmark* Arg(int x);
 
+  // Run this benchmark with the given time unit for the generated output report
+  Benchmark* Unit(TimeUnit unit);
+
   // Run this benchmark once for a number of values picked from the
   // range [start..limit].  (start and limit are always picked.)
   // REQUIRES: The function passed to the constructor must accept an arg1.
@@ -534,6 +544,7 @@
 // Old-style macros
 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
 #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->ArgPair((a1), (a2))
+#define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
   BENCHMARK(n)->RangePair((l1), (h1), (l2), (h2))
diff --git a/include/benchmark/reporter.h b/include/benchmark/reporter.h
index f2a8dc2..9c4a69d 100644
--- a/include/benchmark/reporter.h
+++ b/include/benchmark/reporter.h
@@ -36,13 +36,12 @@
 
     // The number of chars in the longest benchmark name.
     size_t name_field_width;
-    // The time unit for displayed execution time.
-    std::string time_unit;
   };
 
   struct Run {
     Run() :
       iterations(1),
+      time_unit(kNanosecond),
       real_accumulated_time(0),
       cpu_accumulated_time(0),
       bytes_per_second(0),
@@ -52,6 +51,7 @@
     std::string benchmark_name;
     std::string report_label;  // Empty if not set by benchmark.
     int64_t iterations;
+    TimeUnit time_unit;
     double real_accumulated_time;
     double cpu_accumulated_time;
 
@@ -86,17 +86,22 @@
     static void ComputeStats(std::vector<Run> const& reports, Run* mean, Run* stddev);
 };
 
+typedef std::pair<const char*,double> TimeUnitMultiplier;
+
 // Simple reporter that outputs benchmark data to the console. This is the
 // default reporter used by RunSpecifiedBenchmarks().
 class ConsoleReporter : public BenchmarkReporter {
  public:
   virtual bool ReportContext(const Context& context);
   virtual void ReportRuns(const std::vector<Run>& reports);
-protected:
+
+ protected:
   virtual void PrintRunData(const Run& report);
 
+ private:
+  TimeUnitMultiplier getTimeUnitAndMultiplier(TimeUnit unit);
+
   size_t name_field_width_;
-  std::string time_unit_;
 };
 
 class JSONReporter : public BenchmarkReporter {
diff --git a/src/benchmark.cc b/src/benchmark.cc
index dd37202..cb55f96 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -64,10 +64,6 @@
              "The number of runs of each benchmark. If greater than 1, the "
              "mean and standard deviation of the runs will be reported.");
 
-DEFINE_string(benchmark_time_unit, "ns",
-              "The time unit to use for console output. Valid values are "
-              "'ns', or 'ms'.");
-
 DEFINE_string(benchmark_format, "tabular",
               "The format to use for console output. Valid values are "
               "'tabular', 'json', or 'csv'.");
@@ -265,6 +261,7 @@
   int            arg1;
   bool           has_arg2;
   int            arg2;
+  TimeUnit       time_unit;
   bool           use_real_time;
   double         min_time;
   int            threads;    // Number of concurrent threads to use
@@ -298,6 +295,7 @@
   ~BenchmarkImp();
 
   void Arg(int x);
+  void Unit(TimeUnit unit);
   void Range(int start, int limit);
   void DenseRange(int start, int limit);
   void ArgPair(int start, int limit);
@@ -317,6 +315,7 @@
   std::string name_;
   int arg_count_;
   std::vector< std::pair<int, int> > args_;  // Args for all benchmark runs
+  TimeUnit time_unit_;
   double min_time_;
   bool use_real_time_;
   std::vector<int> thread_counts_;
@@ -376,6 +375,7 @@
         instance.arg1 = args.first;
         instance.has_arg2 = family->arg_count_ == 2;
         instance.arg2 = args.second;
+        instance.time_unit = family->time_unit_;
         instance.min_time = family->min_time_;
         instance.use_real_time = family->use_real_time_;
         instance.threads = num_threads;
@@ -410,7 +410,7 @@
 }
 
 BenchmarkImp::BenchmarkImp(const char* name)
-    : name_(name), arg_count_(-1),
+    : name_(name), arg_count_(-1), time_unit_(kNanosecond),
       min_time_(0.0), use_real_time_(false) {
 }
 
@@ -423,6 +423,10 @@
   args_.emplace_back(x, -1);
 }
 
+void BenchmarkImp::Unit(TimeUnit unit) {
+  time_unit_ = unit;
+}
+
 void BenchmarkImp::Range(int start, int limit) {
   CHECK(arg_count_ == -1 || arg_count_ == 1);
   arg_count_ = 1;
@@ -535,6 +539,11 @@
   return this;
 }
 
+Benchmark* Benchmark::Unit(TimeUnit unit) {
+  imp_->Unit(unit);
+  return this;
+}
+
 Benchmark* Benchmark::Range(int start, int limit) {
   imp_->Range(start, limit);
   return this;
@@ -703,6 +712,7 @@
         report.report_label = label;
         // Report the total iterations across all threads.
         report.iterations = static_cast<int64_t>(iters) * b.threads;
+        report.time_unit = b.time_unit;
         report.real_accumulated_time = real_accumulated_time;
         report.cpu_accumulated_time = cpu_accumulated_time;
         report.bytes_per_second = bytes_per_second;
@@ -783,7 +793,7 @@
   }
 }
 
-void RunMatchingBenchmarks(const std::string& spec, const std::string& timeUnit,
+void RunMatchingBenchmarks(const std::string& spec,
                            BenchmarkReporter* reporter) {
   CHECK(reporter != nullptr);
   if (spec.empty()) return;
@@ -808,7 +818,6 @@
 
   context.cpu_scaling_enabled = CpuScalingEnabled();
   context.name_field_width = name_field_width;
-  context.time_unit = timeUnit;
 
   if (reporter->ReportContext(context)) {
     for (const auto& benchmark : benchmarks) {
@@ -843,7 +852,6 @@
     internal::PrintBenchmarkList();
     return;
   }
-  std::string timeUnit = FLAGS_benchmark_time_unit;
   std::string spec = FLAGS_benchmark_filter;
   if (spec.empty() || spec == "all")
     spec = ".";  // Regexp that matches all benchmarks
@@ -853,7 +861,7 @@
     default_reporter = internal::GetDefaultReporter();
     reporter = default_reporter.get();
   }
-  internal::RunMatchingBenchmarks(spec, timeUnit, reporter);
+  internal::RunMatchingBenchmarks(spec, reporter);
   reporter->Finalize();
 }
 
@@ -866,7 +874,6 @@
           "          [--benchmark_filter=<regex>]\n"
           "          [--benchmark_min_time=<min_time>]\n"
           "          [--benchmark_repetitions=<num_repetitions>]\n"
-          "          [--benchmark_time_unit=<ns|ms>]\n"
           "          [--benchmark_format=<tabular|json|csv>]\n"
           "          [--color_print={true|false}]\n"
           "          [--v=<verbosity>]\n");
@@ -885,8 +892,6 @@
                         &FLAGS_benchmark_min_time) ||
         ParseInt32Flag(argv[i], "benchmark_repetitions",
                        &FLAGS_benchmark_repetitions) ||
-        ParseStringFlag(argv[i], "benchmark_time_unit",
-                        &FLAGS_benchmark_time_unit) ||
         ParseStringFlag(argv[i], "benchmark_format",
                         &FLAGS_benchmark_format) ||
         ParseBoolFlag(argv[i], "color_print",
@@ -901,11 +906,6 @@
     }
   }
 
-  if (FLAGS_benchmark_time_unit != "ns" &&
-      FLAGS_benchmark_time_unit != "ms") {
-    PrintUsageAndExit();
-  }
-
   if (FLAGS_benchmark_format != "tabular" &&
       FLAGS_benchmark_format != "json" &&
       FLAGS_benchmark_format != "csv") {
diff --git a/src/console_reporter.cc b/src/console_reporter.cc
index 6af5157..c07ed5a 100644
--- a/src/console_reporter.cc
+++ b/src/console_reporter.cc
@@ -18,6 +18,7 @@
 #include <cstdio>
 #include <iostream>
 #include <string>
+#include <tuple>
 #include <vector>
 
 #include "check.h"
@@ -29,7 +30,6 @@
 
 bool ConsoleReporter::ReportContext(const Context& context) {
   name_field_width_ = context.name_field_width;
-  time_unit_ = context.time_unit;
 
   std::cerr << "Run on (" << context.num_cpus << " X " << context.mhz_per_cpu
             << " MHz CPU " << ((context.num_cpus > 1) ? "s" : "") << ")\n";
@@ -47,11 +47,9 @@
                "affected.\n";
 #endif
 
-  std::string timeLabel = "Time(" + time_unit_ + ")";
-  std::string cpuLabel = "CPU(" + time_unit_ + ")";
-  int output_width = fprintf(stdout, "%-*s %10s %10s %10s\n",
+  int output_width = fprintf(stdout, "%-*s %13s %13s %10s\n",
                              static_cast<int>(name_field_width_), "Benchmark",
-                             timeLabel.c_str(), cpuLabel.c_str(), "Iterations");
+                             "Time", "CPU", "Iterations");
   std::cout << std::string(output_width - 1, '-') << "\n";
 
   return true;
@@ -95,21 +93,26 @@
                    " items/s");
   }
 
-  double const multiplier = time_unit_ == "ns" ? 1e9 : 1e3; // nano second or
-                                                            // millis multiplier
+  double multiplier;
+  const char* timeLabel;
+  std::tie(timeLabel, multiplier) = getTimeUnitAndMultiplier(result.time_unit);
 
   ColorPrintf(COLOR_GREEN, "%-*s ",
               name_field_width_, result.benchmark_name.c_str());
   if (result.iterations == 0) {
-    ColorPrintf(COLOR_YELLOW, "%10.0f %10.0f ",
+    ColorPrintf(COLOR_YELLOW, "%10.0f %s %10.0f %s ",
                 result.real_accumulated_time * multiplier,
-                result.cpu_accumulated_time * multiplier);
+                timeLabel,
+                result.cpu_accumulated_time * multiplier,
+                timeLabel);
   } else {
-    ColorPrintf(COLOR_YELLOW, "%10.0f %10.0f ",
+    ColorPrintf(COLOR_YELLOW, "%10.0f %s %10.0f %s ",
                 (result.real_accumulated_time * multiplier) /
                     (static_cast<double>(result.iterations)),
+                timeLabel,
                 (result.cpu_accumulated_time * multiplier) /
-                    (static_cast<double>(result.iterations)));
+                    (static_cast<double>(result.iterations)),
+                timeLabel);
   }
   ColorPrintf(COLOR_CYAN, "%10lld", result.iterations);
   ColorPrintf(COLOR_DEFAULT, "%*s %*s %s\n",
@@ -118,4 +121,16 @@
               result.report_label.c_str());
 }
 
+TimeUnitMultiplier ConsoleReporter::getTimeUnitAndMultiplier(TimeUnit unit) {
+  switch (unit) {
+    case kMillisecond:
+      return std::make_pair("ms", 1e3);
+    case kMicrosecond:
+      return std::make_pair("us", 1e6);
+    case kNanosecond:
+    default:
+      return std::make_pair("ns", 1e9);
+  }
+}
+
 }  // end namespace benchmark
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index a10a53a..196c0ed 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,5 +1,8 @@
 # Enable the tests
 
+# Allow the source files to find headers in src/
+include_directories(${PROJECT_SOURCE_DIR}/src)
+
 find_package(Threads REQUIRED)
 
 set(CXX03_FLAGS "${CMAKE_CXX_FLAGS}")
diff --git a/test/options_test.cc b/test/options_test.cc
index d4c682d..47563fa 100644
--- a/test/options_test.cc
+++ b/test/options_test.cc
@@ -1,11 +1,22 @@
 #include "benchmark/benchmark_api.h"
+#include "sleep.h"
 
 void BM_basic(benchmark::State& state) {
   while (state.KeepRunning()) {
   }
 }
+
+void BM_basic_slow(benchmark::State& state) {
+  while (state.KeepRunning()) {
+    benchmark::SleepForMilliseconds(state.range_x());
+  }
+}
+
 BENCHMARK(BM_basic);
 BENCHMARK(BM_basic)->Arg(42);
+BENCHMARK(BM_basic_slow)->Arg(10)->Unit(benchmark::kNanosecond);
+BENCHMARK(BM_basic_slow)->Arg(100)->Unit(benchmark::kMicrosecond);
+BENCHMARK(BM_basic_slow)->Arg(1000)->Unit(benchmark::kMillisecond);
 BENCHMARK(BM_basic)->Range(1, 8);
 BENCHMARK(BM_basic)->DenseRange(10, 15);
 BENCHMARK(BM_basic)->ArgPair(42, 42);