Precompute bit_state_text_max_size during compilation.
Change-Id: I293d8229b74486e4196eb1a3c8e455b3ae2f098c
Reviewed-on: https://code-review.googlesource.com/c/re2/+/59410
Reviewed-by: Perry Lorier <perryl@google.com>
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/prog.cc b/re2/prog.cc
index 396b46c..55dc105 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -118,6 +118,7 @@
prefix_foldcase_(false),
prefix_size_(0),
list_count_(0),
+ bit_state_text_max_size_(0),
dfa_mem_(0),
dfa_first_(NULL),
dfa_longest_(NULL) {
@@ -640,6 +641,11 @@
for (int i = 0; i < list_count_; ++i)
list_heads_[flatmap[i]] = i;
}
+
+ // BitState allocates a bitmap of size list_count_ * (text.size()+1)
+ // for tracking pairs of possibilities that it has already explored.
+ const size_t kBitStateBitmapMaxSize = 256*1024; // max size in bits
+ bit_state_text_max_size_ = kBitStateBitmapMaxSize / list_count_ - 1;
}
void Prog::MarkSuccessors(SparseArray<int>* rootmap,
diff --git a/re2/prog.h b/re2/prog.h
index e701e8c..4af012a 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -207,6 +207,7 @@
int list_count() { return list_count_; }
int inst_count(InstOp op) { return inst_count_[op]; }
uint16_t* list_heads() { return list_heads_.data(); }
+ size_t bit_state_text_max_size() { return bit_state_text_max_size_; }
int64_t dfa_mem() { return dfa_mem_; }
void set_dfa_mem(int64_t dfa_mem) { dfa_mem_ = dfa_mem; }
bool anchor_start() { return anchor_start_; }
@@ -429,10 +430,11 @@
};
};
- int list_count_; // count of lists (see above)
- int inst_count_[kNumInst]; // count of instructions by opcode
- PODArray<uint16_t> list_heads_; // sparse array enumerating list heads
- // not populated if size_ is overly large
+ int list_count_; // count of lists (see above)
+ int inst_count_[kNumInst]; // count of instructions by opcode
+ PODArray<uint16_t> list_heads_; // sparse array enumerating list heads
+ // not populated if size_ is overly large
+ size_t bit_state_text_max_size_; // upper bound (inclusive) on text.size()
PODArray<Inst> inst_; // pointer to instruction array
PODArray<uint8_t> onepass_nodes_; // data for OnePass nodes
diff --git a/re2/re2.cc b/re2/re2.cc
index 150c5ad..c027133 100644
--- a/re2/re2.cc
+++ b/re2/re2.cc
@@ -690,15 +690,9 @@
if (options_.longest_match())
kind = Prog::kLongestMatch;
- bool can_one_pass = (is_one_pass_ && ncap <= Prog::kMaxOnePassCapture);
-
- // BitState allocates a bitmap of size prog_->list_count() * text.size().
- // It also allocates a stack of 3-word structures which could potentially
- // grow as large as prog_->list_count() * text.size(), but in practice is
- // much smaller.
- const int kMaxBitStateBitmapSize = 256*1024; // bitmap size <= max (bits)
+ bool can_one_pass = is_one_pass_ && ncap <= Prog::kMaxOnePassCapture;
bool can_bit_state = prog_->CanBitState();
- size_t bit_state_text_max = kMaxBitStateBitmapSize / prog_->list_count();
+ size_t bit_state_text_max_size = prog_->bit_state_text_max_size();
#ifdef RE2_HAVE_THREAD_LOCAL
hooks::context = this;
@@ -805,7 +799,8 @@
skipped_test = true;
break;
}
- if (can_bit_state && text.size() <= bit_state_text_max && ncap > 1) {
+ if (can_bit_state && text.size() <= bit_state_text_max_size &&
+ ncap > 1) {
skipped_test = true;
break;
}
@@ -852,7 +847,7 @@
LOG(ERROR) << "SearchOnePass inconsistency";
return false;
}
- } else if (can_bit_state && subtext1.size() <= bit_state_text_max) {
+ } else if (can_bit_state && subtext1.size() <= bit_state_text_max_size) {
if (!prog_->SearchBitState(subtext1, text, anchor,
kind, submatch, ncap)) {
if (!skipped_test && options_.log_errors())