Improve the bytemap computation one more time.
Change-Id: Id157eef2a18d02c1e4d0ad689d090826f13a8cc0
Reviewed-on: https://code-review.googlesource.com/5252
Reviewed-by: Paul Wankadia <junyer@google.com>
diff --git a/re2/prog.cc b/re2/prog.cc
index f5a15da..17c387b 100644
--- a/re2/prog.cc
+++ b/re2/prog.cc
@@ -353,7 +353,7 @@
vector<int> colors_;
int nextcolor_;
vector<pair<int, int>> colormap_;
- vector<pair<int, int>> batch_;
+ vector<pair<int, int>> ranges_;
DISALLOW_COPY_AND_ASSIGN(ByteMapBuilder);
};
@@ -370,12 +370,12 @@
if (lo == 0 && hi == 255)
return;
- batch_.emplace_back(lo, hi);
+ ranges_.emplace_back(lo, hi);
}
void ByteMapBuilder::Merge() {
- for (vector<pair<int, int>>::const_iterator it = batch_.begin();
- it != batch_.end();
+ for (vector<pair<int, int>>::const_iterator it = ranges_.begin();
+ it != ranges_.end();
++it) {
int lo = it->first-1;
int hi = it->second;
@@ -401,7 +401,7 @@
}
}
colormap_.clear();
- batch_.clear();
+ ranges_.clear();
}
void ByteMapBuilder::Build(uint8* bytemap, int* bytemap_range) {
@@ -466,6 +466,12 @@
if (foldlo <= foldhi)
builder.Mark(foldlo + 'A' - 'a', foldhi + 'A' - 'a');
}
+ // If this Inst is not the last Inst in its list AND the next Inst is
+ // also a ByteRange AND the Insts have the same out, defer the merge.
+ if (!ip->last() &&
+ inst(id+1)->opcode() == kInstByteRange &&
+ ip->out() == inst(id+1)->out())
+ continue;
builder.Merge();
} else if (ip->opcode() == kInstEmptyWidth) {
if (ip->empty() & (kEmptyBeginLine|kEmptyEndLine) &&
diff --git a/re2/testing/compile_test.cc b/re2/testing/compile_test.cc
index 7897b38..cd8406d 100644
--- a/re2/testing/compile_test.cc
+++ b/re2/testing/compile_test.cc
@@ -164,7 +164,7 @@
DumpByteMap(".", Regexp::PerlX|Regexp::Latin1, &bytemap);
EXPECT_EQ("[00-09] -> 0\n"
"[0a-0a] -> 1\n"
- "[0b-ff] -> 2\n",
+ "[0b-ff] -> 0\n",
bytemap);
}
@@ -176,9 +176,9 @@
EXPECT_EQ("[00-2f] -> 0\n"
"[30-39] -> 1\n"
"[3a-40] -> 0\n"
- "[41-46] -> 2\n"
+ "[41-46] -> 1\n"
"[47-60] -> 0\n"
- "[61-66] -> 2\n"
+ "[61-66] -> 1\n"
"[67-ff] -> 0\n",
bytemap);
@@ -195,13 +195,11 @@
"[7b-ff] -> 0\n",
bytemap);
- // FIXME: The ASCII case-folding optimization creates too many byte classes!
+ // Bug in the ASCII case-folding optimization created too many byte classes.
DumpByteMap("[^_]", Regexp::LikePerl|Regexp::Latin1, &bytemap);
- EXPECT_EQ("[00-40] -> 0\n"
- "[41-5a] -> 1\n"
- "[5b-5e] -> 0\n"
- "[5f-5f] -> 2\n"
- "[60-ff] -> 3\n",
+ EXPECT_EQ("[00-5e] -> 0\n"
+ "[5f-5f] -> 1\n"
+ "[60-ff] -> 0\n",
bytemap);
}
@@ -215,17 +213,17 @@
DumpByteMap(".", Regexp::PerlX, &bytemap);
EXPECT_EQ("[00-09] -> 0\n"
"[0a-0a] -> 1\n"
- "[0b-7f] -> 2\n"
- "[80-8f] -> 3\n"
- "[90-9f] -> 4\n"
- "[a0-bf] -> 5\n"
+ "[0b-7f] -> 0\n"
+ "[80-8f] -> 2\n"
+ "[90-9f] -> 3\n"
+ "[a0-bf] -> 4\n"
"[c0-c1] -> 1\n"
- "[c2-df] -> 6\n"
- "[e0-e0] -> 7\n"
- "[e1-ef] -> 8\n"
- "[f0-f0] -> 9\n"
- "[f1-f3] -> 10\n"
- "[f4-f4] -> 11\n"
+ "[c2-df] -> 5\n"
+ "[e0-e0] -> 6\n"
+ "[e1-ef] -> 7\n"
+ "[f0-f0] -> 8\n"
+ "[f1-f3] -> 9\n"
+ "[f4-f4] -> 10\n"
"[f5-ff] -> 1\n",
bytemap);
}