Improve the bytemap computation one more time.

Change-Id: Id157eef2a18d02c1e4d0ad689d090826f13a8cc0
Reviewed-on: https://code-review.googlesource.com/5252
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/prog.cc b/re2/prog.cc
index f5a15da..17c387b 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -353,7 +353,7 @@
   vector<int> colors_;
   int nextcolor_;
   vector<pair<int, int>> colormap_;
-  vector<pair<int, int>> batch_;
+  vector<pair<int, int>> ranges_;
 
   DISALLOW_COPY_AND_ASSIGN(ByteMapBuilder);
 };
@@ -370,12 +370,12 @@
   if (lo == 0 && hi == 255)
     return;
 
-  batch_.emplace_back(lo, hi);
+  ranges_.emplace_back(lo, hi);
 }
 
 void ByteMapBuilder::Merge() {
-  for (vector<pair<int, int>>::const_iterator it = batch_.begin();
-       it != batch_.end();
+  for (vector<pair<int, int>>::const_iterator it = ranges_.begin();
+       it != ranges_.end();
        ++it) {
     int lo = it->first-1;
     int hi = it->second;
@@ -401,7 +401,7 @@
     }
   }
   colormap_.clear();
-  batch_.clear();
+  ranges_.clear();
 }
 
 void ByteMapBuilder::Build(uint8* bytemap, int* bytemap_range) {
@@ -466,6 +466,12 @@
         if (foldlo <= foldhi)
           builder.Mark(foldlo + 'A' - 'a', foldhi + 'A' - 'a');
       }
+      // If this Inst is not the last Inst in its list AND the next Inst is
+      // also a ByteRange AND the Insts have the same out, defer the merge.
+      if (!ip->last() &&
+          inst(id+1)->opcode() == kInstByteRange &&
+          ip->out() == inst(id+1)->out())
+        continue;
       builder.Merge();
     } else if (ip->opcode() == kInstEmptyWidth) {
       if (ip->empty() & (kEmptyBeginLine|kEmptyEndLine) &&
diff --git a/re2/testing/compile_test.cc b/re2/testing/compile_test.cc
index 7897b38..cd8406d 100644
--- a/re2/testing/compile_test.cc
+++ b/re2/testing/compile_test.cc
@@ -164,7 +164,7 @@
   DumpByteMap(".", Regexp::PerlX|Regexp::Latin1, &bytemap);
   EXPECT_EQ("[00-09] -> 0\n"
             "[0a-0a] -> 1\n"
-            "[0b-ff] -> 2\n",
+            "[0b-ff] -> 0\n",
             bytemap);
 }
 
@@ -176,9 +176,9 @@
   EXPECT_EQ("[00-2f] -> 0\n"
             "[30-39] -> 1\n"
             "[3a-40] -> 0\n"
-            "[41-46] -> 2\n"
+            "[41-46] -> 1\n"
             "[47-60] -> 0\n"
-            "[61-66] -> 2\n"
+            "[61-66] -> 1\n"
             "[67-ff] -> 0\n",
             bytemap);
 
@@ -195,13 +195,11 @@
             "[7b-ff] -> 0\n",
             bytemap);
 
-  // FIXME: The ASCII case-folding optimization creates too many byte classes!
+  // Bug in the ASCII case-folding optimization created too many byte classes.
   DumpByteMap("[^_]", Regexp::LikePerl|Regexp::Latin1, &bytemap);
-  EXPECT_EQ("[00-40] -> 0\n"
-            "[41-5a] -> 1\n"
-            "[5b-5e] -> 0\n"
-            "[5f-5f] -> 2\n"
-            "[60-ff] -> 3\n",
+  EXPECT_EQ("[00-5e] -> 0\n"
+            "[5f-5f] -> 1\n"
+            "[60-ff] -> 0\n",
             bytemap);
 }
 
@@ -215,17 +213,17 @@
   DumpByteMap(".", Regexp::PerlX, &bytemap);
   EXPECT_EQ("[00-09] -> 0\n"
             "[0a-0a] -> 1\n"
-            "[0b-7f] -> 2\n"
-            "[80-8f] -> 3\n"
-            "[90-9f] -> 4\n"
-            "[a0-bf] -> 5\n"
+            "[0b-7f] -> 0\n"
+            "[80-8f] -> 2\n"
+            "[90-9f] -> 3\n"
+            "[a0-bf] -> 4\n"
             "[c0-c1] -> 1\n"
-            "[c2-df] -> 6\n"
-            "[e0-e0] -> 7\n"
-            "[e1-ef] -> 8\n"
-            "[f0-f0] -> 9\n"
-            "[f1-f3] -> 10\n"
-            "[f4-f4] -> 11\n"
+            "[c2-df] -> 5\n"
+            "[e0-e0] -> 6\n"
+            "[e1-ef] -> 7\n"
+            "[f0-f0] -> 8\n"
+            "[f1-f3] -> 9\n"
+            "[f4-f4] -> 10\n"
             "[f5-ff] -> 1\n",
             bytemap);
 }