Allow BitState to use memchr(3) to find the first byte quickly.
Change-Id: I0e06144ebff0c77ce4000b9787de130bea9e083b
Reviewed-on: https://code-review.googlesource.com/4541
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/bitstate.cc b/re2/bitstate.cc
index 661a8ca..53fa219 100644
--- a/re2/bitstate.cc
+++ b/re2/bitstate.cc
@@ -345,6 +345,15 @@
return TrySearch(prog_->start(), text.begin());
}
+ // Shallow first byte analysis. We could go deeper.
+ int fb = -1;
+ Prog::Inst* ip = prog_->inst(prog_->start());
+ if (ip->opcode() == kInstByteRange &&
+ ip->lo() == ip->hi() &&
+ !(ip->foldcase() && 'a' <= ip->lo() && ip->lo() <= 'z') &&
+ ip->last())
+ fb = ip->lo();
+
// Unanchored search, starting from each possible text position.
// Notice that we have to try the empty string at the end of
// the text, so the loop condition is p <= text.end(), not p < text.end().
@@ -352,6 +361,13 @@
// but we are not clearing visited_ between calls to TrySearch,
// so no work is duplicated and it ends up still being linear.
for (const char* p = text.begin(); p <= text.end(); p++) {
+ // Try to use memchr to find the first byte quickly.
+ if (fb >= 0 && p < text.end() && (p[0] & 0xFF) != fb) {
+ p = reinterpret_cast<const char*>(memchr(p, fb, text.end() - p));
+ if (p == NULL)
+ p = text.end();
+ }
+
cap_[0] = p;
if (TrySearch(prog_->start(), p)) // Match must be leftmost; done.
return true;