[tap] Implement TAP parser
The motivation for this is so that infra tools can consume the output
of //tools/testrunner. This parser does not yet support YAML documents,
directives after the plan line, etc. We can add those features as
needed.
Change-Id: Ia2fa9962dbd90bdd4d3d1e69eafc93255f735e09
diff --git a/tap/parser.go b/tap/parser.go
new file mode 100644
index 0000000..a4d6b13
--- /dev/null
+++ b/tap/parser.go
@@ -0,0 +1,224 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package tap
+
+import (
+ "errors"
+ "fmt"
+ "log"
+ "strconv"
+ "strings"
+
+ "fuchsia.googlesource.com/tools/tap/tokenizer"
+)
+
+// Parse parses the given input string into a Document. The input is allowed to contain
+// garbage lines; The parser will skip them and parse as much of the input as possible.
+// The only execption is that the first line of input must be a TAP version header of the
+// form "TAP version XXX".
+func Parse(input []byte) (*Document, error) {
+ output := make(chan *Document)
+ go parse(tokenizer.NewTokenStream(input), output)
+ return <-output, nil
+}
+
+// State represents a parser state. Each state takes the current stream of input tokens
+// and the current Document and attempts to parse the next line of input. A state must
+// return the next state to use, even when an error is encountered. If nil is returned,
+// parsing stops.
+type state func(*tokenizer.TokenStream, *Document) (state, error)
+
+// Parse parses a Document from the given Token stream. The result is emitted on the
+// output channel.
+func parse(tokens *tokenizer.TokenStream, output chan<- *Document) {
+ document := &Document{}
+
+ for state := parseVersion; state != nil; {
+ next, err := state(tokens, document)
+ if err != nil {
+ // Garbage lines are allowed; Treat errors as non-fatal.
+ log.Println(err)
+ }
+ state = next
+ }
+
+ output <- document
+}
+
+// DiscardLine is a parser state that throws away every token until a newline or EOF.
+func discardLine(tokens *tokenizer.TokenStream, _ *Document) (state, error) {
+ for {
+ token := tokens.Peek()
+ switch {
+ case token.Type == tokenizer.TypeEOF:
+ return nil, nil
+ case token.Type != tokenizer.TypeNewline:
+ tokens.Next()
+ default:
+ tokens.Next() // Skip the newline.
+ return parseNextLine, nil
+ }
+ }
+}
+
+func parseNextLine(tokens *tokenizer.TokenStream, doc *Document) (state, error) {
+ if tokens.Peek().Type == tokenizer.TypeEOF {
+ return nil, nil
+ }
+
+ if tokens.Peek().Type == tokenizer.TypeNumber {
+ return parsePlan, nil
+ }
+
+ if tokens.Peek().Value == "ok" || tokens.Peek().Value == "not" {
+ return parseTestLine, nil
+ }
+
+ return parseNextLine, unexpectedTokenError("one of 'ok', 'not' or a number", tokens.Next())
+}
+
+func parseVersion(tokens *tokenizer.TokenStream, doc *Document) (state, error) {
+ token := tokens.Next()
+ if token.Value != "TAP" {
+ return nil, unexpectedTokenError("'TAP'", token)
+ }
+
+ token = tokens.Next()
+ if token.Value != "version" {
+ return nil, unexpectedTokenError("'version'", token)
+ }
+
+ token = tokens.Next()
+ if token.Type != tokenizer.TypeNumber {
+ return nil, unexpectedTokenError("a version number", token)
+ }
+
+ version, err := strconv.ParseInt(token.Value, 10, 64)
+ if err != nil {
+ return nil, parserError(err.Error())
+ }
+
+ doc.Version = Version(version)
+ return parseNextLine, eat(tokens, tokenizer.TypeNewline)
+}
+
+func parsePlan(tokens *tokenizer.TokenStream, doc *Document) (state, error) {
+ if doc.Plan.Start != 0 || doc.Plan.End != 0 {
+ return discardLine, errors.New("plan has already been parsed")
+ }
+
+ token := tokens.Peek()
+ if token.Type != tokenizer.TypeNumber {
+ return discardLine, unexpectedTokenError("a number", token)
+ }
+
+ start, err := strconv.ParseInt(tokens.Next().Value, 10, 64)
+ if err != nil {
+ return discardLine, parserError(err.Error())
+ }
+
+ if err := eat(tokens, tokenizer.TypeDot); err != nil {
+ return discardLine, err
+ }
+
+ if err := eat(tokens, tokenizer.TypeDot); err != nil {
+ return discardLine, err
+ }
+
+ token = tokens.Peek()
+ if token.Type != tokenizer.TypeNumber {
+ return discardLine, unexpectedTokenError("a number > 1", token)
+ }
+
+ end, err := strconv.ParseInt(tokens.Next().Value, 10, 64)
+ if err != nil {
+ return discardLine, parserError(err.Error())
+ }
+
+ doc.Plan = Plan{Start: int(start), End: int(end)}
+ return parseNextLine, eat(tokens, tokenizer.TypeNewline)
+}
+
+func parseTestLine(tokens *tokenizer.TokenStream, doc *Document) (state, error) {
+ var testLine TestLine
+
+ // Parse test status.
+ token := tokens.Next()
+ switch token.Value {
+ case "not":
+ testLine.Ok = false
+ token = tokens.Next()
+ if token.Value != "ok" {
+ return discardLine, unexpectedTokenError("'ok'", token)
+ }
+ case "ok":
+ testLine.Ok = true
+ default:
+ return discardLine, unexpectedTokenError("'ok' or 'not ok'", token)
+ }
+
+ // Parse optional test number.
+ testLine.Count = len(doc.TestLines) + 1
+ if tokens.Peek().Type == tokenizer.TypeNumber {
+ count, err := strconv.ParseInt(tokens.Next().Value, 10, 64)
+ if err != nil {
+ return discardLine, parserError(err.Error())
+ }
+ testLine.Count = int(count)
+ }
+
+ // Parse optional description
+ var description []string
+ for tokens.Peek().Type == tokenizer.TypeText {
+ description = append(description, tokens.Next().Value)
+ }
+ testLine.Description = strings.Join(description, " ")
+
+ // Move to next line if there's no directive.
+ if err := eat(tokens, tokenizer.TypePound); err != nil {
+ doc.TestLines = append(doc.TestLines, testLine)
+ return discardLine, err
+ }
+
+ // Parse optional directive.
+ token = tokens.Next()
+ switch token.Value {
+ case "TODO":
+ testLine.Directive = Todo
+ case "SKIP":
+ testLine.Directive = Skip
+ default:
+ return discardLine, unexpectedTokenError("a directive", token)
+ }
+
+ // Parse explanation.
+ var explanation []string
+ for tokens.Peek().Type == tokenizer.TypeText {
+ explanation = append(explanation, tokens.Next().Value)
+ }
+ testLine.Explanation = strings.Join(explanation, " ")
+
+ doc.TestLines = append(doc.TestLines, testLine)
+ return parseNextLine, eat(tokens, tokenizer.TypeNewline)
+}
+
+// Eat consumes the next token from the stream iff it's type matches typ. If the types
+// are different, an error is returned.
+func eat(tokens *tokenizer.TokenStream, typ tokenizer.TokenType) error {
+ token := tokens.Peek()
+ if token.Type != typ {
+ return unexpectedTokenError(string(typ), token)
+ }
+ tokens.Next()
+ return nil
+}
+
+func unexpectedTokenError(wanted string, token tokenizer.Token) error {
+ return parserError("got %q but wanted %s", token, wanted)
+}
+
+func parserError(format string, args ...interface{}) error {
+ return fmt.Errorf("parse error: "+format, args...)
+}
diff --git a/tap/parser_test.go b/tap/parser_test.go
new file mode 100644
index 0000000..9cfd75e
--- /dev/null
+++ b/tap/parser_test.go
@@ -0,0 +1,137 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package tap
+
+import (
+ "reflect"
+ "strings"
+ "testing"
+)
+
+func TestParse(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ expected *Document
+ }{
+ {
+ name: "version",
+ input: strings.TrimSpace(`
+ TAP version 13
+ `),
+ expected: &Document{
+ Version: 13,
+ },
+ },
+ {
+ name: "version and plan",
+ input: strings.TrimSpace(`
+TAP version 13
+1..2
+`),
+ expected: &Document{
+ Version: 13,
+ Plan: Plan{Start: 1, End: 2},
+ },
+ },
+ {
+ name: "version and plan and test lines.",
+ input: strings.TrimSpace(`
+TAP version 13
+1..4
+ok 1 - This test passed
+ok 2 # TODO this test is disabled
+not ok 3 - This test failed
+ok 4 - This test passed also # TODO congratulate the author
+`),
+ expected: &Document{
+ Version: 13,
+ Plan: Plan{Start: 1, End: 4},
+ TestLines: []TestLine{
+ {Ok: true, Count: 1, Description: "- This test passed"},
+ {Ok: true, Count: 2, Directive: Todo, Explanation: "this test is disabled"},
+ {Ok: false, Count: 3, Description: "- This test failed"},
+ {Ok: true, Count: 4, Description: "- This test passed also", Directive: Todo, Explanation: "congratulate the author"},
+ },
+ },
+ },
+ {
+ name: "version and test lines with plan at the end",
+ input: strings.TrimSpace(`
+TAP version 13
+ok 1 - This test passed
+ok 2 # TODO this test is disabled
+not ok 3 - This test failed
+1..3
+ `),
+ expected: &Document{
+ Version: 13,
+ Plan: Plan{
+ Start: 1,
+ End: 3,
+ },
+ TestLines: []TestLine{
+ {Ok: true, Count: 1, Description: "- This test passed"},
+ {Ok: true, Count: 2, Directive: Todo, Explanation: "this test is disabled"},
+ {Ok: false, Count: 3, Description: "- This test failed"},
+ },
+ },
+ },
+ {
+ name: "with garbage output interleaved",
+ input: strings.TrimSpace(`
+TAP version 13
+ERROR: segfault at 0x33123. print stackdump;
+0x00001fff: 0x88881
+0x00001ffe: 0x88881
+0x00001ffd: 0x88881
+1..3
+0x00001ffc: 0x88881
+ok 1 - This test passed
+ok 2 # TODO this test is disabled
+exiting
+not ok 3 - This test failed
+ `),
+ expected: &Document{
+ Version: 13,
+ Plan: Plan{
+ Start: 1,
+ End: 3,
+ },
+ TestLines: []TestLine{
+ {Ok: true, Count: 1, Description: "- This test passed"},
+ {Ok: true, Count: 2, Directive: Todo, Explanation: "this test is disabled"},
+ {Ok: false, Count: 3, Description: "- This test failed"},
+ },
+ },
+ },
+ {
+ name: "line that looks like test plan, but is not",
+ input: strings.TrimSpace(`
+TAP version 13
+1..
+not ok 3 - This test failed
+ `),
+ expected: &Document{
+ Version: 13,
+ TestLines: []TestLine{
+ {Ok: false, Count: 3, Description: "- This test failed"},
+ },
+ },
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ doc, err := Parse([]byte(tt.input))
+ if err != nil {
+ t.Fatal(err)
+ }
+ if !reflect.DeepEqual(doc, tt.expected) {
+ t.Errorf("got\n%+v\nbut wanted\n%+v", doc, tt.expected)
+ }
+ })
+ }
+}
diff --git a/tap/tap.go b/tap/tap.go
new file mode 100644
index 0000000..0b89a7c
--- /dev/null
+++ b/tap/tap.go
@@ -0,0 +1,133 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package tap
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "strings"
+)
+
+// Type describes the type of TAP Object returned by the Parser.
+type Type int
+
+const (
+ // VersionType is the type returned by a Version Object.
+ VersionType Type = iota
+
+ // PlanType is the type returned by a Plan Object.
+ PlanType
+
+ // TestLineType is the type returned by a TestLine.
+ TestLineType
+)
+
+// Object is a TAP element such as a test line, plan, version header, or Yaml block.
+type Object interface {
+ Type() Type
+}
+
+// Document represents a complete TAP document.
+type Document struct {
+ Version Version
+ Plan Plan
+ TestLines []TestLine
+}
+
+// WriteTo writes this document to the given Writer as a formatted TAP output stream.
+func (d *Document) WriteTo(w io.Writer) (int64, error) {
+ n, err := w.Write([]byte(d.format()))
+ return int64(n), err
+}
+
+// Format renders this document as thought it were a TAP output stream.
+func (d *Document) format() string {
+ output := new(bytes.Buffer)
+ output.WriteString(fmt.Sprintf("TAP version %d\n", d.Version))
+ output.WriteString(fmt.Sprintf("%d..%d\n", d.Plan.Start, d.Plan.End))
+
+ for _, line := range d.TestLines {
+ var parts []string
+ ok := "ok"
+ if !line.Ok {
+ ok = "not ok"
+ }
+ parts = append(parts, ok)
+
+ if line.Count != 0 {
+ parts = append(parts, fmt.Sprintf("%d", line.Count))
+ }
+
+ if line.Description != "" {
+ parts = append(parts, line.Description)
+ }
+
+ switch line.Directive {
+ case Todo:
+ parts = append(parts, "# TODO", line.Explanation)
+ case Skip:
+ parts = append(parts, "# SKIP", line.Explanation)
+ }
+
+ output.WriteString(strings.Join(parts, " ") + "\n")
+ }
+
+ return output.String()
+}
+
+// Version represents a TAP version line.
+type Version int
+
+// Type implements Object.
+func (v Version) Type() Type {
+ return VersionType
+}
+
+func (v Version) String() string {
+ return fmt.Sprintf("TAP version %d", v)
+}
+
+// Plan represents a TAP plan line.
+type Plan struct {
+ Start int
+ End int
+}
+
+// Type implements Object.
+func (p Plan) Type() Type {
+ return PlanType
+}
+
+func (p Plan) String() string {
+ return fmt.Sprintf("%d..%d", p.Start, p.End)
+}
+
+// Directive represents a TAP directive (TODO|SKIP|<none>)
+type Directive int
+
+// Valid Tap directives.
+const (
+ None Directive = iota
+ Todo
+ Skip
+)
+
+// TestLine represents a TAP test line beginning with "ok" or "not ok".
+//
+// TODO: Support inline YAML documents.
+type TestLine struct {
+ Ok bool
+ Count int
+ Description string
+ Directive Directive
+ Explanation string
+ Diagnostic string
+}
+
+// Type implements Object.
+func (t TestLine) Type() Type {
+ return TestLineType
+}
diff --git a/tap/tokenizer.go b/tap/tokenizer.go
deleted file mode 100644
index b226550..0000000
--- a/tap/tokenizer.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2019 The Fuchsia Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-package tap
-
-// Directive is a TAP directive (TODO|SKIP|<none>)
-type Directive int
-
-// Valid Tap directives.
-const (
- None Directive = iota
- Todo
- Skip
-)
diff --git a/tap/tokenizer/lexer.go b/tap/tokenizer/lexer.go
new file mode 100644
index 0000000..3761352
--- /dev/null
+++ b/tap/tokenizer/lexer.go
@@ -0,0 +1,210 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package tokenizer
+
+import (
+ "log"
+ "strconv"
+ "unicode/utf8"
+)
+
+// TokenType describes the category a Token belongs to.
+type TokenType string
+
+// TokenType values recognized by the lexer.
+const (
+ TypePound TokenType = "POUND" // '#'
+ TypeNumber TokenType = "NUMBER" // A number
+ TypeText TokenType = "TEXT" // Catch-all type
+ TypeDot TokenType = "DOT" // '.'
+ TypeNewline TokenType = "NEWLINE" // '\n'
+ TypeEOF TokenType = "EOF" // Psuedo token to signal the end of input.
+)
+
+// Token represents some atomic TAP output string.
+type Token struct {
+ Type TokenType
+ Value string
+}
+
+// Tokenize generates a channel of Tokens read from the given input.
+func Tokenize(input []byte) <-chan Token {
+ l := &lexer{
+ input: input,
+ Tokens: make(chan Token, 1),
+ }
+ go l.run()
+ return l.Tokens
+}
+
+// EOFToken is emitted to signal the end of input.
+func EOFToken() Token {
+ return Token{
+ Type: TypeEOF,
+ Value: "",
+ }
+}
+
+// The rune emitted when the end of input has been reached.
+const eof = rune(-1)
+
+// State represents a lexical analysis state. Each state accepts a lexer as input and
+// returns the next lexer state. If the output state is nil, lexing stops.
+type state func(*lexer) state
+
+// Lexer manages the position of a lexical analysis on some TAP output string.
+type lexer struct {
+ input []byte
+ start int
+ pos int
+ width int
+ Tokens chan Token
+}
+
+func (l *lexer) run() {
+ for state := lexAny; state != nil; {
+ state = state(l)
+ }
+ close(l.Tokens)
+}
+
+func (l *lexer) emit(t TokenType) {
+ l.Tokens <- Token{Type: t, Value: string(l.input[l.start:l.pos])}
+ l.start = l.pos
+}
+
+func (l *lexer) next() rune {
+ if l.pos >= len(l.input) {
+ l.width = 0
+ return eof
+ }
+
+ // Read the next rune, skipping over all invalid utf8 sequences.
+ var rn rune
+ rn, l.width = utf8.DecodeRune(l.input[l.pos:])
+ for rn == utf8.RuneError && l.pos < len(l.input) {
+ log.Printf("invalid UTF-8 found at pos %d:\n\n%s", l.pos, string(l.input))
+ l.pos++
+ rn, l.width = utf8.DecodeRune(l.input[l.pos:])
+ }
+ l.pos += l.width
+ return rn
+}
+
+// Returns the current lexeme.
+func (l *lexer) lexeme() lexeme {
+ return lexeme(l.input[l.pos : l.pos+1][0])
+}
+
+// LexAny is the lexer start state. It's job is to put the lexer into the proper state
+// according to the next input rune. Other states should return to this state after
+// emitting their lexemes. They should also not consume runes using l.next() immediately
+// before entering this state.
+func lexAny(l *lexer) state {
+ lxm := l.lexeme()
+ if lxm.isEOF() {
+ l.emit(TypeEOF)
+ return nil
+ }
+
+ l.start = l.pos
+
+ switch {
+ case lxm.isNewline():
+ l.next()
+ l.emit(TypeNewline)
+ return lexAny
+ case lxm.isDot():
+ l.next()
+ l.emit(TypeDot)
+ return lexAny
+ case lxm.isPound():
+ l.next()
+ l.emit(TypePound)
+ return lexAny
+ case lxm.isSpace():
+ // Skip all spaces.
+ for l.lexeme().isSpace() {
+ l.next()
+ }
+ l.start = l.pos
+ return lexAny
+ case lxm.isDigit():
+ return lexNumber
+ }
+
+ return lexText
+}
+
+func lexNumber(l *lexer) state {
+ for {
+ lxm := l.lexeme()
+ if lxm.isEOF() || !lxm.isDigit() {
+ l.emit(TypeNumber)
+ return lexAny
+ }
+
+ if l.next() == eof {
+ break
+ }
+ }
+
+ // Reached EOF
+ if l.pos > l.start {
+ l.emit(TypeNumber)
+ }
+
+ l.emit(TypeEOF)
+ return nil
+}
+
+func lexText(l *lexer) state {
+ for l.next() != eof {
+ lxm := l.lexeme()
+ if lxm.isNonText() {
+ l.emit(TypeText)
+ return lexAny
+ }
+ }
+
+ // Reached EOF
+ if l.pos > l.start {
+ l.emit(TypeText)
+ }
+
+ l.emit(TypeEOF)
+ return nil
+}
+
+type lexeme rune
+
+func (l lexeme) isSpace() bool {
+ return l == ' ' || l == '\r'
+}
+
+func (l lexeme) isNewline() bool {
+ return l == '\n'
+}
+
+func (l lexeme) isDigit() bool {
+ _, err := strconv.Atoi(string(l))
+ return err == nil
+}
+
+func (l lexeme) isDot() bool {
+ return l == '.'
+}
+
+func (l lexeme) isPound() bool {
+ return l == '#'
+}
+
+func (l lexeme) isEOF() bool {
+ return rune(l) == eof
+}
+
+func (l lexeme) isNonText() bool {
+ return l.isEOF() || l.isSpace() || l.isNewline() || l.isDigit() || l.isDot() || l.isPound()
+}
diff --git a/tap/tokenizer/stream.go b/tap/tokenizer/stream.go
new file mode 100644
index 0000000..fd14d91
--- /dev/null
+++ b/tap/tokenizer/stream.go
@@ -0,0 +1,56 @@
+// Copyright 2019 The Fuchsia Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package tokenizer
+
+// NewStream creates a stream of Token values read from input.
+func NewTokenStream(input []byte) *TokenStream {
+ return &TokenStream{
+ stream: Tokenize(input),
+ }
+}
+
+// TokenStream is a read-only queue of Token values. The next Token in the stream can be
+// consumed by calling Next(). The next token can be observed without being consumed by
+// calling Peek().
+type TokenStream struct {
+ eof bool
+ lookahead *Token
+ stream <-chan Token
+}
+
+// Next consumes the next token in the stream.
+func (s *TokenStream) Next() Token {
+ if s.eof {
+ return EOFToken()
+ }
+
+ next := new(Token)
+ if s.lookahead == nil {
+ *next = <-s.stream
+ } else {
+ next = s.lookahead
+ s.lookahead = nil
+ }
+
+ if next.Type == TypeEOF {
+ s.eof = true
+ }
+
+ return *next
+}
+
+// Peek returns a read-only copy of the next token in the stream, without consuming it.
+func (s *TokenStream) Peek() Token {
+ if s.eof {
+ return EOFToken()
+ }
+
+ if s.lookahead == nil {
+ s.lookahead = new(Token)
+ *s.lookahead = <-s.stream
+ }
+
+ return *s.lookahead
+}