Further reduce the maximum repeat count when fuzzing.
Change-Id: Icd0a5ce36d7b131996da18d7186605fdaaa28357
Reviewed-on: https://code-review.googlesource.com/c/re2/+/59090
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/dfa.cc b/re2/dfa.cc
index f292ff1..583303e 100644
--- a/re2/dfa.cc
+++ b/re2/dfa.cc
@@ -56,6 +56,10 @@
// Controls whether the DFA should bail out early if the NFA would be faster.
static bool dfa_should_bail_when_slow = true;
+void Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(bool b) {
+ dfa_should_bail_when_slow = b;
+}
+
// Changing this to true compiles in prints that trace execution of the DFA.
// Generates a lot of output -- only useful for debugging.
static const bool ExtraDebug = false;
@@ -1966,10 +1970,6 @@
return GetDFA(kind)->BuildAllStates(cb);
}
-void Prog::TEST_dfa_should_bail_when_slow(bool b) {
- dfa_should_bail_when_slow = b;
-}
-
// Computes min and max for matching string.
// Won't return strings bigger than maxlen.
bool DFA::PossibleMatchRange(std::string* min, std::string* max, int maxlen) {
diff --git a/re2/fuzzing/re2_fuzzer.cc b/re2/fuzzing/re2_fuzzer.cc
index 8306f88..af1129f 100644
--- a/re2/fuzzing/re2_fuzzer.cc
+++ b/re2/fuzzing/re2_fuzzer.cc
@@ -12,6 +12,7 @@
#include "re2/prefilter.h"
#include "re2/re2.h"
+#include "re2/regexp.h"
using re2::StringPiece;
@@ -50,6 +51,10 @@
if (backslash_p > 1)
return;
+ // The default is 1000. Even 100 turned out to be too generous
+ // for fuzzing, empirically speaking, so let's try 10 instead.
+ re2::Regexp::FUZZING_ONLY_set_maximum_repeat_count(10);
+
RE2 re(pattern, options);
if (!re.ok())
return;
diff --git a/re2/parse.cc b/re2/parse.cc
index 3bba613..87ff2ca 100644
--- a/re2/parse.cc
+++ b/re2/parse.cc
@@ -44,12 +44,12 @@
namespace re2 {
-// Reduce the maximum repeat count by an order of magnitude when fuzzing.
-#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-static const int kMaxRepeat = 100;
-#else
-static const int kMaxRepeat = 1000;
-#endif
+// Controls the maximum repeat count permitted by the parser.
+static int maximum_repeat_count = 1000;
+
+void Regexp::FUZZING_ONLY_set_maximum_repeat_count(int i) {
+ maximum_repeat_count = i;
+}
// Regular expression parse state.
// The list of parsed regexps so far is maintained as a vector of
@@ -568,7 +568,9 @@
bool Regexp::ParseState::PushRepetition(int min, int max,
const StringPiece& s,
bool nongreedy) {
- if ((max != -1 && max < min) || min > kMaxRepeat || max > kMaxRepeat) {
+ if ((max != -1 && max < min) ||
+ min > maximum_repeat_count ||
+ max > maximum_repeat_count) {
status_->set_code(kRegexpRepeatSize);
status_->set_error_arg(s);
return false;
@@ -591,7 +593,7 @@
stacktop_ = re;
if (min >= 2 || max >= 2) {
RepetitionWalker w;
- if (w.Walk(stacktop_, kMaxRepeat) == 0) {
+ if (w.Walk(stacktop_, maximum_repeat_count) == 0) {
status_->set_code(kRegexpRepeatSize);
status_->set_error_arg(s);
return false;
diff --git a/re2/prog.h b/re2/prog.h
index f125641..8ca9880 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -310,10 +310,6 @@
// FOR TESTING OR EXPERIMENTAL PURPOSES ONLY.
int BuildEntireDFA(MatchKind kind, const DFAStateCallback& cb);
- // Controls whether the DFA should bail out early if the NFA would be faster.
- // FOR TESTING ONLY.
- static void TEST_dfa_should_bail_when_slow(bool b);
-
// Compute bytemap.
void ComputeByteMap();
@@ -402,6 +398,10 @@
// Computes hints for ByteRange instructions in [begin, end).
void ComputeHints(std::vector<Inst>* flat, int begin, int end);
+ // Controls whether the DFA should bail out early if the NFA would be faster.
+ // FOR TESTING ONLY.
+ static void TESTING_ONLY_set_dfa_should_bail_when_slow(bool b);
+
private:
friend class Compiler;
diff --git a/re2/regexp.h b/re2/regexp.h
index 61882b5..2f40642 100644
--- a/re2/regexp.h
+++ b/re2/regexp.h
@@ -449,6 +449,10 @@
// regardless of the return value.
bool RequiredPrefixForAccel(std::string* prefix, bool* foldcase);
+ // Controls the maximum repeat count permitted by the parser.
+ // FOR FUZZING ONLY.
+ static void FUZZING_ONLY_set_maximum_repeat_count(int i);
+
private:
// Constructor allocates vectors as appropriate for operator.
explicit Regexp(RegexpOp op, ParseFlags parse_flags);
diff --git a/re2/testing/dfa_test.cc b/re2/testing/dfa_test.cc
index 9e15a41..842daaf 100644
--- a/re2/testing/dfa_test.cc
+++ b/re2/testing/dfa_test.cc
@@ -143,7 +143,7 @@
// NFA implementation instead. (The DFA loses its speed advantage
// if it can't get a good cache hit rate.)
// Tell the DFA to trudge along instead.
- Prog::TEST_dfa_should_bail_when_slow(false);
+ Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(false);
state_cache_resets = 0;
search_failures = 0;
@@ -194,7 +194,7 @@
re->Decref();
// Reset to original behaviour.
- Prog::TEST_dfa_should_bail_when_slow(true);
+ Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(true);
ASSERT_GT(state_cache_resets, 0);
ASSERT_EQ(search_failures, 0);
}
@@ -218,7 +218,7 @@
}
TEST(Multithreaded, SearchDFA) {
- Prog::TEST_dfa_should_bail_when_slow(false);
+ Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(false);
state_cache_resets = 0;
search_failures = 0;
@@ -259,7 +259,7 @@
re->Decref();
// Reset to original behaviour.
- Prog::TEST_dfa_should_bail_when_slow(true);
+ Prog::TESTING_ONLY_set_dfa_should_bail_when_slow(true);
ASSERT_GT(state_cache_resets, 0);
ASSERT_EQ(search_failures, 0);
}