Simplify some of the OnePass code. Change-Id: I9713a93602a63f2e93700eb33b9e65126e61b699 Reviewed-on: https://code-review.googlesource.com/5391 Reviewed-by: Paul Wankadia <junyer@google.com>

commit: 8f8bb44754f8ca4401033a16f5e24c8e7d1c127a [log] [tgz]
author: Paul Wankadia <junyer@google.com> Tue Jul 26 20:24:50 2016 +1000
committer: Paul Wankadia <junyer@google.com> Tue Jul 26 10:27:25 2016 +0000
tree: ec32248fd41c92245642ed364586426dcbce5f0d
parent: 004b150574442fc48582c40c2be769530fe4dda5 [diff]
diff --git a/re2/onepass.cc b/re2/onepass.cc
index 7a29cfc..da90a86 100644
--- a/re2/onepass.cc
+++ b/re2/onepass.cc

@@ -191,13 +191,10 @@
       cap[i] = p;
 }
 
-// Compute a node pointer.
-// Basically (OneState*)(nodes + statesize*nodeindex)
-// but the version with the C++ casts overflows 80 characters (and is ugly).
-static inline OneState* IndexToNode(volatile uint8* nodes, int statesize,
+// Computes the OneState* for the given nodeindex.
+static inline OneState* IndexToNode(uint8* nodes, int statesize,
                                     int nodeindex) {
-  return reinterpret_cast<OneState*>(
-    const_cast<uint8*>(nodes + statesize*nodeindex));
+  return reinterpret_cast<OneState*>(nodes + statesize*nodeindex);
 }
 
 bool Prog::SearchOnePass(const StringPiece& text,
@@ -233,13 +230,10 @@
   if (anchor_end())
     kind = kFullMatch;
 
-  // State and act are marked volatile to
-  // keep the compiler from re-ordering the
-  // memory accesses walking over the NFA.
-  // This is worth about 5%.
-  volatile OneState* state = onepass_start_;
-  volatile uint8* nodes = onepass_nodes_;
-  volatile uint32 statesize = onepass_statesize_;
+  uint8* nodes = onepass_nodes_;
+  int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32);
+  // start() is always mapped to the zeroth OneState.
+  OneState* state = IndexToNode(nodes, statesize, 0);
   uint8* bytemap = bytemap_;
   const char* bp = text.begin();
   const char* ep = text.end();
@@ -374,7 +368,7 @@
 // Constructs and saves corresponding one-pass NFA on success.
 bool Prog::IsOnePass() {
   if (did_onepass_)
-    return onepass_start_ != NULL;
+    return onepass_nodes_ != NULL;
   did_onepass_ = true;
 
   if (start() == 0)  // no match
@@ -385,7 +379,7 @@
   // Limit max node count to 65000 as a conservative estimate to
   // avoid overflowing 16-bit node index in encoding.
   int maxnodes = 2 + inst_count(kInstByteRange);
-  int statesize = sizeof(OneState) + bytemap_range_*sizeof(uint32);
+  int statesize = sizeof(OneState) + bytemap_range()*sizeof(uint32);
   if (maxnodes >= 65000 || dfa_mem_ / 4 / statesize < maxnodes)
     return false;
 
@@ -609,8 +603,6 @@
   dfa_mem_ -= nalloc*statesize;
   onepass_nodes_ = new uint8[nalloc*statesize];
   memmove(onepass_nodes_, nodes.data(), nalloc*statesize);
-  onepass_statesize_ = statesize;
-  onepass_start_ = IndexToNode(onepass_nodes_, onepass_statesize_, 0);
 
   delete[] stack;
   delete[] nodebyid;

diff --git a/re2/prog.cc b/re2/prog.cc
index ab90377..5d8dd6c 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc

@@ -102,13 +102,12 @@
     bytemap_range_(0),
     first_byte_(-1),
     flags_(0),
-    onepass_statesize_(0),
+    list_count_(0),
     inst_(NULL),
+    onepass_nodes_(NULL),
     dfa_first_(NULL),
     dfa_longest_(NULL),
-    dfa_mem_(0),
-    onepass_nodes_(NULL),
-    onepass_start_(NULL) {
+    dfa_mem_(0) {
 }
 
 Prog::~Prog() {

diff --git a/re2/prog.h b/re2/prog.h
index 8499851..61467c5 100644
--- a/re2/prog.h
+++ b/re2/prog.h

@@ -353,22 +353,19 @@
   int bytemap_range_;       // bytemap_[x] < bytemap_range_
   int first_byte_;          // required first byte for match, or -1 if none
   int flags_;               // regexp parse flags
-  int onepass_statesize_;   // byte size of each OneState* node
 
   int list_count_;            // count of lists (see above)
   int inst_count_[kNumInst];  // count of instructions by opcode
 
   Inst* inst_;              // pointer to instruction array
+  uint8* onepass_nodes_;    // data for OnePass nodes
 
   Mutex dfa_mutex_;    // Protects dfa_first_, dfa_longest_
   std::atomic<DFA*> dfa_first_;     // DFA cached for kFirstMatch
   std::atomic<DFA*> dfa_longest_;   // DFA cached for kLongestMatch and kFullMatch
   int64 dfa_mem_;      // Maximum memory for DFAs.
 
-  uint8 bytemap_[256];       // map from input bytes to byte classes
-
-  uint8* onepass_nodes_;     // data for OnePass nodes
-  OneState* onepass_start_;  // start node for OnePass program
+  uint8 bytemap_[256];      // map from input bytes to byte classes
 
   std::once_flag first_byte_once_;
commit	8f8bb44754f8ca4401033a16f5e24c8e7d1c127a	[log] [tgz]
author	Paul Wankadia <junyer@google.com>	Tue Jul 26 20:24:50 2016 +1000
committer	Paul Wankadia <junyer@google.com>	Tue Jul 26 10:27:25 2016 +0000
tree	ec32248fd41c92245642ed364586426dcbce5f0d
parent	004b150574442fc48582c40c2be769530fe4dda5 [diff]