blob: 6b5429a8dcbd7335080ed3c31e975ee16032f071 [file] [log] [blame]
// Copyright 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package commentparser
import (
"fmt"
"reflect"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/google/licenseclassifier/commentparser/language"
)
const (
singleLineText = "single line text"
multilineText = `first line of text
second line of text
third line of text
`
)
func TestCommentParser_Lex(t *testing.T) {
tests := []struct {
description string
lang language.Language
source string
want Comments
}{
{
description: "BCPL Single Line Comments",
lang: language.Go,
source: fmt.Sprintf("//%s\n", singleLineText),
want: []*Comment{
{
StartLine: 1,
EndLine: 1,
Text: singleLineText,
},
},
},
{
description: "Go Comment With Multiline String",
lang: language.Go,
source: fmt.Sprintf("var a = `A\nmultiline\\x20\nstring`\n//%s\n", singleLineText),
want: []*Comment{
{
StartLine: 4,
EndLine: 4,
Text: singleLineText,
},
},
},
{
description: "Python Multiline String",
lang: language.Python,
source: fmt.Sprintf("#%s\n\n\n\nx = '''this is a multiline\nstring'''", singleLineText),
want: []*Comment{
{
StartLine: 1,
EndLine: 1,
Text: singleLineText,
},
},
},
{
description: "Python module-level Docstring #1",
lang: language.Python,
source: fmt.Sprintf("'''%s'''\nimport foo", multilineText),
want: []*Comment{
{
StartLine: 1,
EndLine: 4,
Text: multilineText,
},
},
},
{
description: "Python module-level Docstring #2",
lang: language.Python,
source: fmt.Sprintf("#!/usr/bin/python\n'''%s'''\nimport foo", multilineText),
want: []*Comment{
{
StartLine: 1,
EndLine: 1,
Text: "!/usr/bin/python",
},
{
StartLine: 2,
EndLine: 5,
Text: multilineText,
},
},
},
{
// Only include docstrings that start at the beginning of a line
description: "Python module-level Docstring #3",
lang: language.Python,
source: "'''zero1'''\n '''one'''\n '''two'''\n'''zero2'''",
want: []*Comment{
{
StartLine: 1,
EndLine: 1,
Text: "zero1",
},
{
StartLine: 4,
EndLine: 4,
Text: "zero2",
},
},
},
{
description: "TR Command String",
lang: language.Python,
source: fmt.Sprintf(`#%s
AUTH= \
| tr '"\n' \
| base64 -w
`, singleLineText),
want: []*Comment{
{
StartLine: 1,
EndLine: 1,
Text: singleLineText,
},
},
},
{
description: "Lisp Single Line Comments",
lang: language.Clojure,
source: fmt.Sprintf(";%s\n", singleLineText),
want: []*Comment{
{
StartLine: 1,
EndLine: 1,
Text: singleLineText,
},
},
},
{
description: "Shell Single Line Comments",
lang: language.Shell,
source: fmt.Sprintf("#%s\n", singleLineText),
want: []*Comment{
{
StartLine: 1,
EndLine: 1,
Text: singleLineText,
},
},
},
{
description: "BCPL Multiline Comments",
lang: language.C,
source: fmt.Sprintf("/*%s*/\n", multilineText),
want: []*Comment{
{
StartLine: 1,
EndLine: 4,
Text: multilineText,
},
},
},
{
description: "BCPL Multiline Comments no terminating newline",
lang: language.C,
source: fmt.Sprintf("/*%s*/", multilineText),
want: []*Comment{
{
StartLine: 1,
EndLine: 4,
Text: multilineText,
},
},
},
{
description: "Nested Multiline Comments",
lang: language.Swift,
source: "/*a /*\n nested\n*/\n comment\n*/\n",
want: []*Comment{
{
StartLine: 1,
EndLine: 5,
Text: "a /*\n nested\n*/\n comment\n",
},
},
},
{
description: "Ruby Multiline Comments",
lang: language.Ruby,
source: fmt.Sprintf("=begin\n%s=end\n", multilineText),
want: []*Comment{
{
StartLine: 1,
EndLine: 5,
Text: "\n" + multilineText,
},
},
},
{
description: "Multiple Single Line Comments",
lang: language.Shell,
source: `# First line
# Second line
# Third line
`,
want: []*Comment{
{
StartLine: 1,
EndLine: 1,
Text: " First line",
},
{
StartLine: 2,
EndLine: 2,
Text: " Second line",
},
{
StartLine: 3,
EndLine: 3,
Text: " Third line",
},
},
},
{
description: "Mixed Multiline / Single Line Comments",
lang: language.C,
source: `/*
* The first multiline line.
* The second multiline line.
*/
// The first single line comment.
// The second single line comment.
`,
want: []*Comment{
{
StartLine: 1,
EndLine: 4,
Text: `
* The first multiline line.
* The second multiline line.
`,
},
{
StartLine: 5,
EndLine: 5,
Text: " The first single line comment.",
},
{
StartLine: 6,
EndLine: 6,
Text: " The second single line comment.",
},
},
},
{
description: "Mixed Multiline / Single Line Comments",
lang: language.C,
source: `/*
* The first multiline line.
* The second multiline line.
*/
// The first single line comment.
// The second single line comment.
`,
want: []*Comment{
{
StartLine: 1,
EndLine: 4,
Text: `
* The first multiline line.
* The second multiline line.
`,
},
{
StartLine: 5,
EndLine: 5,
Text: " The first single line comment.",
},
{
StartLine: 6,
EndLine: 6,
Text: " The second single line comment.",
},
},
},
{
description: "HTML-like comments and quotes",
lang: language.HTML,
source: `# This is an important topic
I don't want to go on all day here! <-- notice the quote in there!
<!-- Well, maybe I do... -->
`,
want: []*Comment{
{
StartLine: 3,
EndLine: 3,
Text: " Well, maybe I do... ",
},
},
},
{
description: "JavaScript regex",
lang: language.JavaScript,
source: `var re = /hello"world/;
// the comment
`,
want: []*Comment{
{
StartLine: 2,
EndLine: 2,
Text: " the comment",
},
},
},
{
description: "Perl regex",
lang: language.Perl,
source: `if (/hello"world/) {
# the comment
print "Yo!"
}
`,
want: []*Comment{
{
StartLine: 2,
EndLine: 2,
Text: " the comment",
},
},
},
{
description: "SQL using MySQL-style comments",
lang: language.SQL,
source: `/*
* The first multiline line.
* The second multiline line.
*/
# The first single line comment.
# The second single line comment.
`,
want: []*Comment{
{
StartLine: 1,
EndLine: 4,
Text: `
* The first multiline line.
* The second multiline line.
`,
},
{
StartLine: 5,
EndLine: 5,
Text: " The first single line comment.",
},
{
StartLine: 6,
EndLine: 6,
Text: " The second single line comment.",
},
},
},
{
description: "SQL using MySQL-style comments",
lang: language.SQL,
source: `-- The first single line comment.
/*
* The first multiline line.
* The second multiline line.
*/
-- The second single line comment.
`,
want: []*Comment{
{
StartLine: 1,
EndLine: 1,
Text: " The first single line comment.",
},
{
StartLine: 2,
EndLine: 5,
Text: `
* The first multiline line.
* The second multiline line.
`,
},
{
StartLine: 6,
EndLine: 6,
Text: " The second single line comment.",
},
},
},
{
description: "Matlab language - Single Line Comments",
lang: language.ObjectiveC, // Matlab has same extension as Objective-C.
source: `% Copyright 2017 Yoyodyne Inc.
clear;
close all;
`,
want: []*Comment{
{
StartLine: 1,
EndLine: 1,
Text: " Copyright 2017 Yoyodyne Inc.",
},
},
},
{
description: "Matlab language - Multi-Line Comments",
lang: language.ObjectiveC, // Matlab has same extension as Objective-C.
source: `%{ Multiline comment start.
Second line of multiline comment.
%}
clear;
close all;
`,
want: []*Comment{
{
StartLine: 1,
EndLine: 3,
Text: ` Multiline comment start.
Second line of multiline comment.
`,
},
},
},
}
for _, tt := range tests {
got := Parse([]byte(tt.source), tt.lang)
if !cmp.Equal(got, tt.want) {
t.Errorf("Mismatch(%q) = %+v, want %+v, diff=%v", tt.description, got, tt.want, cmp.Diff(got, tt.want))
}
}
}
func TestCommentParser_ChunkIterator(t *testing.T) {
tests := []struct {
description string
comments Comments
want []Comments
}{
{
description: "Empty Comments",
comments: Comments{},
want: nil,
},
{
description: "Single Line Comment Chunk",
comments: Comments{
{StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
{StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
},
want: []Comments{{
{StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
{StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
}},
},
{
description: "Multiline Comment Chunk",
comments: Comments{{
StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3",
}},
want: []Comments{{{
StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3",
}}},
},
{
description: "Multiple Single Line Comment Chunks",
comments: Comments{
{StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
{StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
{StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
{StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
},
want: []Comments{
{
{StartLine: 1, EndLine: 1, Text: "Block 1 line 1"},
{StartLine: 2, EndLine: 2, Text: "Block 1 line 2"},
},
{
{StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
{StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
},
},
},
{
description: "Multiline Comment Chunk",
comments: Comments{
{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"},
{StartLine: 4, EndLine: 6, Text: "Multiline 1\n2\n3"},
},
want: []Comments{
{{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"}},
{{StartLine: 4, EndLine: 6, Text: "Multiline 1\n2\n3"}},
},
},
{
description: "Multiline and Single Line Comment Chunks",
comments: Comments{
{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"},
{StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
{StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
},
want: []Comments{
{
{StartLine: 1, EndLine: 3, Text: "Multiline 1\n2\n3"},
},
{
{StartLine: 4, EndLine: 4, Text: "Block 2 line 1"},
{StartLine: 5, EndLine: 5, Text: "Block 2 line 2"},
},
},
},
{
description: "Mixed Multiline / Single Line Comments",
comments: []*Comment{
{StartLine: 1, EndLine: 1, Text: " The first single line comment."},
{StartLine: 2, EndLine: 2, Text: " The second single line comment."},
{StartLine: 4, EndLine: 7, Text: "\n * The first multiline line.\n * The second multiline line.\n"},
},
want: []Comments{
{
{StartLine: 1, EndLine: 1, Text: " The first single line comment."},
{StartLine: 2, EndLine: 2, Text: " The second single line comment."},
},
{
{StartLine: 4, EndLine: 7, Text: "\n * The first multiline line.\n * The second multiline line.\n"},
},
},
},
}
for _, tt := range tests {
i := 0
for got := range tt.comments.ChunkIterator() {
if i >= len(tt.want) {
t.Errorf("Mismatch(%q) more comment chunks than expected = %v, want %v",
tt.description, i+1, len(tt.want))
break
}
if !reflect.DeepEqual(got, tt.want[i]) {
t.Errorf("Mismatch(%q) = %+v, want %+v", tt.description, got, tt.want[i])
}
i++
}
if i != len(tt.want) {
t.Errorf("Mismatch(%q) not enough comment chunks = %v, want %v",
tt.description, i, len(tt.want))
}
}
}