Simplify some of the OnePass code.
Change-Id: I9713a93602a63f2e93700eb33b9e65126e61b699
Reviewed-on: https://code-review.googlesource.com/5391
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/onepass.cc b/re2/onepass.cc
index 7a29cfc..da90a86 100644
--- a/re2/onepass.cc
+++ b/re2/onepass.cc
@@ -191,13 +191,10 @@
cap[i] = p;
}
-// Compute a node pointer.
-// Basically (OneState*)(nodes + statesize*nodeindex)
-// but the version with the C++ casts overflows 80 characters (and is ugly).
-static inline OneState* IndexToNode(volatile uint8* nodes, int statesize,
+// Computes the OneState* for the given nodeindex.
+static inline OneState* IndexToNode(uint8* nodes, int statesize,
int nodeindex) {
- return reinterpret_cast<OneState*>(
- const_cast<uint8*>(nodes + statesize*nodeindex));
+ return reinterpret_cast<OneState*>(nodes + statesize*nodeindex);
}
bool Prog::SearchOnePass(const StringPiece& text,
@@ -233,13 +230,10 @@
if (anchor_end())
kind = kFullMatch;
- // State and act are marked volatile to
- // keep the compiler from re-ordering the
- // memory accesses walking over the NFA.
- // This is worth about 5%.
- volatile OneState* state = onepass_start_;
- volatile uint8* nodes = onepass_nodes_;
- volatile uint32 statesize = onepass_statesize_;
+ uint8* nodes = onepass_nodes_;
+ int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32);
+ // start() is always mapped to the zeroth OneState.
+ OneState* state = IndexToNode(nodes, statesize, 0);
uint8* bytemap = bytemap_;
const char* bp = text.begin();
const char* ep = text.end();
@@ -374,7 +368,7 @@
// Constructs and saves corresponding one-pass NFA on success.
bool Prog::IsOnePass() {
if (did_onepass_)
- return onepass_start_ != NULL;
+ return onepass_nodes_ != NULL;
did_onepass_ = true;
if (start() == 0) // no match
@@ -385,7 +379,7 @@
// Limit max node count to 65000 as a conservative estimate to
// avoid overflowing 16-bit node index in encoding.
int maxnodes = 2 + inst_count(kInstByteRange);
- int statesize = sizeof(OneState) + bytemap_range_*sizeof(uint32);
+ int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32);
if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes)
return false;
@@ -609,8 +603,6 @@
dfa_mem_ -= nalloc*statesize;
onepass_nodes_ = new uint8[nalloc*statesize];
memmove(onepass_nodes_, nodes.data(), nalloc*statesize);
- onepass_statesize_ = statesize;
- onepass_start_ = IndexToNode(onepass_nodes_, onepass_statesize_, 0);
delete[] stack;
delete[] nodebyid;
diff --git a/re2/prog.cc b/re2/prog.cc
index ab90377..5d8dd6c 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -102,13 +102,12 @@
bytemap_range_(0),
first_byte_(-1),
flags_(0),
- onepass_statesize_(0),
+ list_count_(0),
inst_(NULL),
+ onepass_nodes_(NULL),
dfa_first_(NULL),
dfa_longest_(NULL),
- dfa_mem_(0),
- onepass_nodes_(NULL),
- onepass_start_(NULL) {
+ dfa_mem_(0) {
}
Prog::~Prog() {
diff --git a/re2/prog.h b/re2/prog.h
index 8499851..61467c5 100644
--- a/re2/prog.h
+++ b/re2/prog.h
@@ -353,22 +353,19 @@
int bytemap_range_; // bytemap_[x] < bytemap_range_
int first_byte_; // required first byte for match, or -1 if none
int flags_; // regexp parse flags
- int onepass_statesize_; // byte size of each OneState* node
int list_count_; // count of lists (see above)
int inst_count_[kNumInst]; // count of instructions by opcode
Inst* inst_; // pointer to instruction array
+ uint8* onepass_nodes_; // data for OnePass nodes
Mutex dfa_mutex_; // Protects dfa_first_, dfa_longest_
std::atomic<DFA*> dfa_first_; // DFA cached for kFirstMatch
std::atomic<DFA*> dfa_longest_; // DFA cached for kLongestMatch and kFullMatch
int64 dfa_mem_; // Maximum memory for DFAs.
- uint8 bytemap_[256]; // map from input bytes to byte classes
-
- uint8* onepass_nodes_; // data for OnePass nodes
- OneState* onepass_start_; // start node for OnePass program
+ uint8 bytemap_[256]; // map from input bytes to byte classes
std::once_flag first_byte_once_;