[tools][symbolize] Make text parser more robust Currently text will never parse past "{{{" or "\033[". This causes issues if you have partial versions of either these embeded in otherwise meaningful lines. So we'd like to not give up on the whole line just because one part looks a bit funny. This change makes text consume everything upto a regular expression which matches the parts of text that can actully be parsed. Change-Id: I5ed22d6d8cf5cc53fe5dcc5687ec87e13e9144e9

commit: 74fc0330a89229fd7016f8aeff8440a676bf36fe [log] [tgz]
author: Jake Ehrlich <jakehehrlich@google.com> Fri May 18 12:05:16 2018 -0700
committer: Jake Ehrlich <jakehehrlich@google.com> Fri May 18 15:59:27 2018 -0700
tree: afc07d0aa6a3e234e01989ce559edbc3cfbecce3
parent: fa12438b6e168011e6bd677036778c2b7cf62a21 [diff]
diff --git a/symbolize/filter_test.go b/symbolize/filter_test.go
index ea40b66..6b524da 100644
--- a/symbolize/filter_test.go
+++ b/symbolize/filter_test.go

@@ -111,9 +111,9 @@
 func TestMalformed(t *testing.T) {
 	// Parse a bad line
 	line := ParseLine("\033[1m Error at {{{pc:0x123879c0")
-
-	if line != nil {
-		t.Error("expected", nil, "got", line)
+	// Malformed lines should still parse
+	if line == nil {
+		t.Error("expected", "not nil", "got", line)
 	}
 }
 

diff --git a/symbolize/parser.go b/symbolize/parser.go
index d8988a8..9e460ae 100644
--- a/symbolize/parser.go
+++ b/symbolize/parser.go

@@ -10,11 +10,9 @@
 	"fmt"
 	"io"
 	"regexp"
-	"strings"
 )
 
 const (
-	elemPrefix   string = "{{{"
 	elemSuffix   string = "}}}"
 	colorPrefix  string = "\033["
 	modulePrefix string = "{{{module:"
@@ -23,21 +21,18 @@
 	btPrefix     string = "{{{bt:"
 )
 
-func findIndex(s ParserState, sub string) int {
-	idx := strings.Index(string(s), sub)
-	if idx == -1 {
-		return len(s)
-	}
-	return idx
-}
+var (
+	endTextRegex      = regexp.MustCompile("({{{.*}}})|(\033\\[[0-9]+m)")
+	beginLogLineRegex = regexp.MustCompile(`\[[0-9]+\.[0-9]+\] [0-9]+\.[0-9]+>`)
+)
 
 func ParseText(b *ParserState) interface{} {
-	idx := findIndex(*b, elemPrefix)
-	idx2 := findIndex(*b, colorPrefix)
-	if idx2 < idx {
-		idx = idx2
+	var idx int
+	if loc := endTextRegex.FindStringIndex(string(*b)); loc != nil {
+		idx = loc[0]
+	} else {
+		idx = len(*b)
 	}
-
 	if idx == 0 {
 		return nil
 	}
@@ -189,7 +184,6 @@
 	out := make(chan InputLine)
 	// This is not used for demuxing. It is a human readable line number.
 	var lineno uint64 = 1
-	re := regexp.MustCompile(`\[[0-9]+\.[0-9]+\] [0-9]+\.[0-9]+>`)
 	go func() {
 		defer close(out)
 		scanner := bufio.NewScanner(reader)
@@ -202,7 +196,7 @@
 			text := ParserState(scanner.Text())
 			b := &text
 			// Get the dummyText and needed text.
-			locs := re.FindStringIndex(string(text))
+			locs := beginLogLineRegex.FindStringIndex(string(text))
 			if locs == nil {
 				// This means the whole thing is dummy text.
 				var line InputLine

diff --git a/symbolize/parserstate.go b/symbolize/parserstate.go
index bb8a57f..2dfd2e4 100644
--- a/symbolize/parserstate.go
+++ b/symbolize/parserstate.go

@@ -29,17 +29,6 @@
 	return false
 }
 
-// This is like before but does not consume the expected delimiter
-func (b *ParserState) onlyBefore(what string) (string, error) {
-	idx := strings.Index(string(*b), what)
-	if idx == -1 {
-		return "", fmt.Errorf("expected '%s'", what)
-	}
-	str := (*b)[:idx]
-	*b = (*b)[idx:]
-	return string(str), nil
-}
-
 func (b *ParserState) before(what string) (string, error) {
 	idx := strings.Index(string(*b), what)
 	if idx == -1 {

diff --git a/symbolize/parserstate_test.go b/symbolize/parserstate_test.go
index 638ca3f..f1a8728 100644
--- a/symbolize/parserstate_test.go
+++ b/symbolize/parserstate_test.go

@@ -38,30 +38,6 @@
 	}
 }
 
-func TestOnlyBefore(t *testing.T) {
-	buf := ParserState("this is a test")
-	v1, err := buf.onlyBefore(" ")
-	if err != nil {
-		t.Error(err)
-	}
-	if v1 != "this" {
-		t.Error("expected", "this", "got", v1)
-	}
-	if string(buf) != " is a test" {
-		t.Error("expected", " is a test", "got", string(buf))
-	}
-	// The following is just to consume the trailing space
-	buf.before(" ")
-
-	_, err = buf.onlyBefore("#")
-	if err == nil {
-		t.Error("expected an error but got none")
-	}
-	if string(buf) != "is a test" {
-		t.Error("input consumed when it should not have been")
-	}
-}
-
 func TestBefore(t *testing.T) {
 	buf := ParserState("this is a test")
 	v1, err := buf.before(" ")
commit	74fc0330a89229fd7016f8aeff8440a676bf36fe	[log] [tgz]
author	Jake Ehrlich <jakehehrlich@google.com>	Fri May 18 12:05:16 2018 -0700
committer	Jake Ehrlich <jakehehrlich@google.com>	Fri May 18 15:59:27 2018 -0700
tree	afc07d0aa6a3e234e01989ce559edbc3cfbecce3
parent	fa12438b6e168011e6bd677036778c2b7cf62a21 [diff]