Add a target for fuzzers like libFuzzer and AFL.

Fixes #64.

Change-Id: If4f351c6642385308577b73c25ea1f1436214f58
Reviewed-on: https://code-review.googlesource.com/4851
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/Makefile b/Makefile
index 49a42a3..4a6d0c2 100644
--- a/Makefile
+++ b/Makefile
@@ -224,6 +224,14 @@
 	@mkdir -p obj/test
 	$(CXX) -o $@ obj/re2/testing/regexp_benchmark.o $(TESTOFILES) obj/util/benchmark.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
 
+# re2_fuzzer is a target for fuzzers like libFuzzer and AFL. This fake fuzzing
+# is simply a way to check that the target builds and then to run it against a
+# fixed set of inputs. To perform real fuzzing, refer to the documentation for
+# libFuzzer (llvm.org/docs/LibFuzzer.html) and AFL (lcamtuf.coredump.cx/afl/).
+obj/test/re2_fuzzer: obj/libre2.a obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o
+	@mkdir -p obj/test
+	$(CXX) -o $@ obj/re2/fuzzing/re2_fuzzer.o obj/util/fuzz.o obj/libre2.a $(RE2_LDFLAGS) $(LDFLAGS)
+
 ifdef REBUILD_TABLES
 re2/perl_groups.cc: re2/make_perl_groups.pl
 	perl $< > $@
@@ -274,6 +282,8 @@
 
 benchmark: obj/test/regexp_benchmark
 
+fuzz: obj/test/re2_fuzzer
+
 install: obj/libre2.a obj/so/libre2.$(SOEXT)
 	mkdir -p $(DESTDIR)$(includedir)/re2 $(DESTDIR)$(libdir)/pkgconfig
 	$(INSTALL_DATA) $(INSTALL_HFILES) $(DESTDIR)$(includedir)/re2
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
new file mode 100644
index 0000000..a8eb1cd
--- /dev/null
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -0,0 +1,98 @@
+// Copyright 2016 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "re2/re2.h"
+#include "util/logging.h"
+
+using re2::FLAGS_minloglevel;
+using re2::StringPiece;
+using std::string;
+
+// NOT static, NOT signed.
+uint8_t dummy = 0;
+
+void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) {
+  RE2 re(pattern, options);
+  if (!re.ok())
+    return;
+
+  StringPiece sp1, sp2, sp3, sp4;
+  string s1, s2, s3, s4;
+  int i1, i2, i3, i4;
+  double d1, d2, d3, d4;
+
+  RE2::FullMatch(text, re, &sp1, &sp2, &sp3, &sp4);
+  RE2::PartialMatch(text, re, &s1, &s2, &s3, &s4);
+
+  sp1 = sp2 = text;
+  RE2::Consume(&sp1, re, &i1, &i2, &i3, &i4);
+  RE2::FindAndConsume(&sp2, re, &d1, &d2, &d3, &d4);
+
+  s3 = s4 = text.ToString();
+  RE2::Replace(&s3, re, "");
+  RE2::GlobalReplace(&s4, re, "");
+
+  // Exercise some other API functionality.
+  dummy += re.NumberOfCapturingGroups();
+  dummy += RE2::QuoteMeta(pattern).size();
+}
+
+// Entry point for libFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  if (size == 0)
+    return 0;
+
+  // Suppress logging below FATAL severity.
+  FLAGS_minloglevel = 3;
+
+  // The one-at-a-time hash by Bob Jenkins.
+  uint32_t hash = 0;
+  for (size_t i = 0; i < size; i++) {
+    hash += data[i];
+    hash += (hash << 10);
+    hash ^= (hash >> 6);
+  }
+  hash += (hash << 3);
+  hash ^= (hash >> 11);
+  hash += (hash << 15);
+
+  RE2::Options options;
+  options.set_log_errors(false);
+  options.set_encoding(hash & 1 ? RE2::Options::EncodingLatin1
+                                : RE2::Options::EncodingUTF8);
+  options.set_posix_syntax(hash & 2);
+  options.set_longest_match(hash & 4);
+  options.set_literal(hash & 8);
+  options.set_never_nl(hash & 16);
+  options.set_dot_nl(hash & 32);
+  options.set_never_capture(hash & 64);
+  options.set_case_sensitive(hash & 128);
+  options.set_perl_classes(hash & 256);
+  options.set_word_boundary(hash & 512);
+  options.set_one_line(hash & 1024);
+
+  const char* ptr = reinterpret_cast<const char*>(data);
+  int len = static_cast<int>(size);
+
+  StringPiece pattern(ptr, len);
+  StringPiece text(ptr, len);
+  Test(pattern, options, text);
+
+  for (int i = 2; i <= 4; i++) {
+    if (len < i)
+      break;
+
+    int frac = len / i;
+    pattern.set(ptr, frac);
+    text.set(ptr + frac, len - frac);
+    Test(pattern, options, text);
+  }
+
+  return 0;
+}
diff --git a/util/fuzz.cc b/util/fuzz.cc
new file mode 100644
index 0000000..9cac118
--- /dev/null
+++ b/util/fuzz.cc
@@ -0,0 +1,21 @@
+// Copyright 2016 The RE2 Authors.  All Rights Reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+// Entry point for libFuzzer.
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size);
+
+int main(int argc, char** argv) {
+  uint8_t data[32];
+  for (int i = 0; i < 32; i++) {
+    for (int j = 0; j < 32; j++) {
+      data[j] = random() & 0xFF;
+    }
+    LLVMFuzzerTestOneInput(data, 32);
+  }
+  return 0;
+}
diff --git a/util/test.cc b/util/test.cc
index b0167e7..0a751fe 100644
--- a/util/test.cc
+++ b/util/test.cc
@@ -23,7 +23,7 @@
   tests[ntests++].name = name;
 }
 
-int main(int argc, char **argv) {
+int main(int argc, char** argv) {
   for (int i = 0; i < ntests; i++) {
     printf("%s\n", tests[i].name);
     tests[i].fn();